In [2]:

from sklearn import metrics
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.kernel_ridge import KernelRidge
from sklearn.multioutput import RegressorChain
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score

In [3]:

from scipy.spatial import ConvexHull, convex_hull_plot_2d
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

Machine Learning Model¶

Read training data¶

In [4]:

# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s1 = read("./FeN4C66-Li2S/fe1n4c66-li2s.2279str.xyz",":-50")

In [5]:

from numpy import loadtxt
lines = loadtxt("./FeN4C66-Li2S/fe1n4c66-li2s.2279energies.dat", comments="#", delimiter=" ", unpack=False)
binding_energies_li2s1 = []
for i in range(0,len(lines)-50):
    binding_energies_li2s1.append(lines[i][1])
    
binding_energies_li2s1 = np.asarray(binding_energies_li2s1)

In [6]:

esub = 12383.42949802
e_li2s1 = 708.7375253
binding_energies_li2s1 = binding_energies_li2s1 + esub + e_li2s1

In [7]:

# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s2 = read("./FeN4C66-Li2S2/fe1n4c66-li2s2.2220str.xyz",":-50")

In [8]:

from numpy import loadtxt
lines = loadtxt("./FeN4C66-Li2S2/fe1n4c66-li2s2.2220energies.dat", comments="#", delimiter=" ", unpack=False)

binding_energies_li2s2 = []
for i in range(0,len(lines)-50):
    binding_energies_li2s2.append(lines[i][1])
    
binding_energies_li2s2 = np.asarray(binding_energies_li2s2)

In [9]:

esub = 12383.42949802
e_li2s2 = 1013.31309
binding_energies_li2s2 = binding_energies_li2s2 + esub + e_li2s2

In [10]:

# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s4 = read("./FeN4C66-Li2S4/fe1n4c66-li2s4.2119str.xyz",":-50")

In [11]:

from numpy import loadtxt
lines = loadtxt("./FeN4C66-Li2S4/fe1n4c66-li2s4.2119energies.dat", comments="#", delimiter=" ", unpack=False)
binding_energies_li2s4 = []
for i in range(0,len(lines)-50):
    binding_energies_li2s4.append(lines[i][1])
    
binding_energies_li2s4 = np.asarray(binding_energies_li2s4)

In [12]:

esub = 12383.42949802
e_li2s4 = 1621.689849
binding_energies_li2s4 = binding_energies_li2s4 + esub + e_li2s4

In [13]:

# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s6 = read("./FeN4C66-Li2S6/fe1n4c66-li2s6.1594str.xyz",":-50")

In [14]:

from numpy import loadtxt
lines = loadtxt("./FeN4C66-Li2S6/fe1n4c66-li2s6.1594energies.dat", comments="#", delimiter=" ", unpack=False)
binding_energies_li2s6 = []
for i in range(0,len(lines)-50):
    binding_energies_li2s6.append(lines[i][1])
    
binding_energies_li2s6 = np.asarray(binding_energies_li2s6)

In [15]:

esub = 12383.42949802
e_li2s6 = 2229.344944
binding_energies_li2s6 = binding_energies_li2s6 + esub + e_li2s6

In [16]:

# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s8 = read("./FeN4C66-Li2S8/fe1n4c66-li2s8.1715str.xyz",":-50")

In [17]:

from numpy import loadtxt
lines = loadtxt("./FeN4C66-Li2S8/fe1n4c66-li2s8.1715energies.dat", comments="#", delimiter=" ", unpack=False)
binding_energies_li2s8 = []
for i in range(0,len(lines)-50):
    binding_energies_li2s8.append(lines[i][1])
    
binding_energies_li2s8 = np.asarray(binding_energies_li2s8)

In [18]:

esub = 12383.42949802
e_li2s8 = 2836.121134
binding_energies_li2s8 = binding_energies_li2s8 + esub + e_li2s8

start wrapping up stuff¶

In [19]:

binding_energies = []
for i in range(0,len(binding_energies_li2s1)):
    binding_energies.append(binding_energies_li2s1[i])
for i in range(0,len(binding_energies_li2s2)):
    binding_energies.append(binding_energies_li2s2[i])
for i in range(0,len(binding_energies_li2s4)):
    binding_energies.append(binding_energies_li2s4[i])
for i in range(0,len(binding_energies_li2s6)):
    binding_energies.append(binding_energies_li2s6[i])
for i in range(0,len(binding_energies_li2s8)):
    binding_energies.append(binding_energies_li2s8[i])

In [20]:

print(len(binding_energies))

binding_energies_untouched = binding_energies

purged = []
for i in range(0,len(binding_energies)):
    if 5 < binding_energies[i]  :
        purged.append(i)

for i in purged[::-1]:
    binding_energies = np.delete(binding_energies,i)

print(len(binding_energies_untouched))
print(len(binding_energies))

9677
9677
7793

In [21]:

print(len(binding_energies_untouched))

Get Soap representation for Training and Testing¶

In [22]:

# Let's create a list of structures and gather the chemical elements that are in all the structures.
species = set()
for structure in structure_li2s2:
    species.update(structure.get_chemical_symbols())

soap = SOAP(
    species=species,
    periodic=False,
    rcut=6.5,
    nmax=6,
    lmax=7,
    rbf="gto",
    average="outer",
    sparse=False
)

In [23]:

feature_vectors = []
feature_vectorss = []
counter = -1

for structure in structure_li2s1:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    if 5 > binding_energies_untouched[counter]  :
        feature_vectors.append(soap.create(structure, n_jobs=1))

for structure in structure_li2s2:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    if 5 > binding_energies_untouched[counter]  :
        feature_vectors.append(soap.create(structure, n_jobs=1))

for structure in structure_li2s4:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    if 5 > binding_energies_untouched[counter]  :
        feature_vectors.append(soap.create(structure, n_jobs=1))
    
for structure in structure_li2s6:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    if 5 > binding_energies_untouched[counter]  :
        feature_vectors.append(soap.create(structure, n_jobs=1))
    
for structure in structure_li2s8:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    if 5 > binding_energies_untouched[counter]  :
        feature_vectors.append(soap.create(structure, n_jobs=1))

In [24]:

feature_vectorss = np.vstack(feature_vectors)
print(len(feature_vectorss))

In [25]:

binding_energies_m = binding_energies.mean()
binding_energies_centered = binding_energies - binding_energies_m
binding_energies_variance = binding_energies.std()
binding_energies_norm = binding_energies_centered / binding_energies_variance

In [26]:

print(binding_energies_m)

1.2583438033998016

In [27]:

# here we reshape stuff to make it more friendly for coding
feature_vectors = np.asarray(feature_vectors)
print(feature_vectors.shape)
feature_vectorss = np.asarray(feature_vectorss)
print(feature_vectorss.shape)

(7793, 3720)
(7793, 3720)

Get Kernel Ridge Regressor for Training¶

In [28]:

x = feature_vectorss
yy = np.asarray(binding_energies_centered)

counter = 0

kf = KFold(n_splits=8,random_state=1010, shuffle=True)
kf.get_n_splits(x)

for train_index, test_index in kf.split(x):
    
    counter = counter + 1 
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = yy[train_index], yy[test_index]
    
    xx = []
    for i in range(0,len(X_train)):
        xx.append(X_train[i])
    for i in range(0,len(X_test)):
        xx.append(X_test[i])

    scaler = StandardScaler()
    scaled_x = scaler.fit_transform(X_train)
    scaled_y = scaler.transform(X_test)
 
  #  alphas = [1,0.5,0.25,0.125,0.0625,0.03125,0.0155,0.0008,0.0004,0.0002,0.0001]
  #  alphas = [0.015,0.014,0.013,0.012,0.011,0.01,0.009,0.008,0.007]
  #  alphas = [0.02,0.019,0.018,0.017,0.016,0.015]

  #  for alphaz in alphas: 

    tmaee = []
    trmsee = []
    tmaee2 = []
    trmsee2 = []
    r22 = []
    r222 = []

    regr = KernelRidge(alpha=0.02, kernel='rbf')

    y_pred = regr.fit(scaled_x[0:len(X_train)],y_train).predict(scaled_x[0:len(X_train)])
    y_pred2 = regr.fit(scaled_x[0:len(X_train)],y_train).predict(scaled_y[0:len(X_test)])

    aa = []
    aa2 = []

    for i in range(0,len(y_pred)):
        aa.append(y_pred[i])
    for i in range(0,len(y_pred2)):
        aa2.append(y_pred2[i])

    tmaee.append(metrics.mean_absolute_error(y_train, aa))
    trmsee.append(metrics.mean_squared_error(y_train, aa))
    r22.append(metrics.r2_score(y_train, aa))
    tmaee2.append(metrics.mean_absolute_error(y_test, aa2))
    trmsee2.append(metrics.mean_squared_error(y_test, aa2))
    r222.append(metrics.r2_score(y_test, aa2))

    print()
    print()
    print(tmaee)
    #print(trmsee)
    #print(r22)
    print(tmaee2)
    #print(trmsee2)
    #print(r222)

    print(binding_energies_variance)


[0.07236551567713013]
[0.10985851935138376]
1.2802963593751946


[0.07176517591885477]
[0.11869038095670066]
1.2802963593751946


[0.07294055592847262]
[0.1083920070386096]
1.2802963593751946


[0.07200605819421883]
[0.12250150596865342]
1.2802963593751946


[0.07085929706715868]
[0.12252927952427305]
1.2802963593751946


[0.0703655956411123]
[0.12098043636709455]
1.2802963593751946


[0.07094210364407319]
[0.11887967270596825]
1.2802963593751946


[0.07264472021745924]
[0.1139855281913159]
1.2802963593751946

GET PREDICTIONS FOR FULL SETS¶

In [29]:

# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s1 = read("./FeN4C66-Li2S/fe1n4c66.fin.mod_li2s_23499.xyz",":")

In [ ]:

feature_vectors_val = []
counter = -1 

for structure in structure_li2s1:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    feature_vectors_val.append(soap.create(structure, n_jobs=1))

In [ ]:

feature_vectorss_val = np.vstack(feature_vectors_val)
print(len(feature_vectorss_val))

In [ ]:

x = feature_vectorss
yy = np.asarray(binding_energies_centered)

counter = 0
ii = []

kf = KFold(n_splits=8,random_state=1010, shuffle=True)
kf.get_n_splits(x)

for train_index, test_index in kf.split(x):
    
    counter = counter + 1 
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = yy[train_index], yy[test_index]
    
    xx = []
    for i in range(0,len(X_train)):
        xx.append(X_train[i])
    for i in range(0,len(X_test)):
        xx.append(X_test[i])

    scaler = StandardScaler()
    scaled_x = scaler.fit_transform(X_train)
    scaled_y = scaler.transform(feature_vectorss_val)

    regr = KernelRidge(alpha=0.02, kernel='rbf')

    y_pred2 = regr.fit(scaled_x[0:len(X_train)],y_train).predict(scaled_y[0:len(feature_vectors_val)])
    
    for i in range(0,len(y_pred2)):
        ii.append(y_pred2[i])
        
bb = []
cc = []

for j in range (0,len(y_pred2)):
    aa = []
    for i in range(0,counter):
        aa.append(ii[j+(i*len(y_pred2))])

    aa = np.asarray(aa)
    bb.append(aa.mean(0)) 
    cc.append(aa.std(0))

In [ ]:

bb = bb + binding_energies_m

In [ ]:

fig, ax = plt.subplots()
plt.errorbar(x=bb, y=cc, c='orange', markersize=4., label='', fmt='o')
ax.set_xlabel('Predicted')
ax.set_ylabel('Uncertainty')
plt.savefig('parity1.png', dpi=None, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()

In [ ]:

# here we do the pca of the feature vectors

from sklearn.decomposition import PCA

pca = PCA(n_components=2)
pca.fit(feature_vectorss_val)
pc = pca.fit_transform(feature_vectorss_val)

In [ ]:

# here we plot the pca and the chosen structs

import matplotlib.pyplot as plt

plt.scatter(pc[:,0],pc[:,1],s=0.1,c=bb,cmap=plt.cm.get_cmap('magma'))
#plt.scatter(pc_proj[:,0],pc_proj[:,1],s=0.1,c=farthest_pts_idx,cmap=plt.cm.get_cmap('viridis'))
plt.colorbar(label='energy')
plt.ylabel('dim 2')
plt.xlabel('dim 1')
plt.savefig('pca-predicted-e.png', dpi=None, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()

In [ ]:

idx =  []
for i in range(0,len(structure_li2s1)):
    idx.append(i)
idx = np.array(idx)

In [ ]:

# here we do the pca of the feature vectors
pca = PCA(n_components=2)
pca.fit(feature_vectorss_val)
pc = pca.fit_transform(feature_vectorss_val)

In [ ]:

ene = []
pcf = []

for i in range(0,len(feature_vectorss_val)):
    ene.append(bb[i])
    pcf.append(pc[i,0])

ene = np.array(ene)
pcf = np.array(pcf)

len(ene)
len(pcf)

ene=np.vstack((ene,pcf))

ene = ene.T

print(ene.shape)

hull = ConvexHull(ene)

In [ ]:

plt.scatter(pc[:,0],bb,s=1,c=(pc[:,1]),cmap=plt.cm.get_cmap('viridis'))
plt.scatter(pc[5486,0],bb[5486],s=100,cmap=plt.cm.get_cmap('viridis'))
for simplex in hull.simplices:
    plt.plot(ene[simplex, 1], ene[simplex, 0],  'k-')
#plt.scatter(pc_proj[:,0],pc_proj[:,1],s=0.1,c=farthest_pts_idx,cmap=plt.cm.get_cmap('viridis'))
plt.colorbar(label='idx')
plt.ylabel('dim 2')
plt.xlabel('dim 1')
plt.savefig('pca1-energy-pca2.png', dpi=None, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()

In [ ]:

print([hull.vertices])
a = ([-3, 70])
enenew = []
enenew = np.vstack((bb ,a))
print(len(enenew ))
print(enenew .shape)
hull = ConvexHull(points=enenew,qhull_options='QG22680')
print(hull.vertices[hull.good])

In [ ]:

font = {'family': 'DejaVu Sans',
        'weight': 'normal',
        'size': 16,
        }

plt.rc('font',**font)

plt.scatter(pc[:,0],bb,s=0.25,c='orange')
#plt.scatter(pc[22480,0],bb[22480],s=100,cmap=plt.cm.get_cmap('viridis'))
for simplex in hull.simplices:
    if (ene[simplex, 0][0] < 5):
        if (ene[simplex, 0][1] < 5):
            plt.plot(ene[simplex, 1], ene[simplex, 0], 'P-', c='red', linewidth=0.5)
plt.scatter(pc[:,0],enerelax)

plt.ylabel('ML E$_{b}$ (eV)')
plt.xlabel('1st PCA')
plt.ylim(-2,9)

plt.text(-40, 9, 'C', fontsize=18)


plt.tight_layout()

plt.savefig('pca1-energy-pca111.png', dpi=600, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()

In [ ]:

enerelax = []
for i in range(0,len(feature_vectorss_val)):
    enerelax.append(100)

enerelax[20006]= -2.49
enerelax[19492]= -2.40
enerelax[20510]= -2.51
enerelax[22543]= -2.55
enerelax[23047]= -2.68
enerelax[10108]= -2.74
enerelax[9595]= -2.73
enerelax[5486]= -0.12

enerelax[13957]=-2.62
enerelax[10107]=-2.55
enerelax[9586]=-2.80

font = {'family': 'DejaVu Sans',
        'weight': 'normal',
        'size': 16,
        }

plt.rc('font',**font)
            
plt.scatter(pc[:,0],bb,s=0.25,c='orange')
#plt.scatter(pc[22480,0],bb[22480],s=100,cmap=plt.cm.get_cmap('viridis'))
for simplex in hull.simplices:
    if (ene[simplex, 0][0] < 5):
        if (ene[simplex, 0][1] < 5):
            plt.plot(ene[simplex, 1], ene[simplex, 0], 'P-', c='red', linewidth=0.5)
plt.scatter(pc[:,0],enerelax)

x_values=[pc[20006,0], pc[20006,0]]
y_values=[bb[20006], enerelax[20006]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[19492,0], pc[19492,0]]
y_values=[bb[19492], enerelax[19492]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[20510,0], pc[20510,0]]
y_values=[bb[20510], enerelax[20510]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[22543,0], pc[22543,0]]
y_values=[bb[22543], enerelax[22543]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[23047,0], pc[23047,0]]
y_values=[bb[23047], enerelax[23047]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10108,0], pc[10108,0]]
y_values=[bb[10108], enerelax[10108]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9595,0], pc[9595,0]]
y_values=[bb[9595], enerelax[9595]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[5486,0], pc[5486,0]]
y_values=[bb[5486], enerelax[5486]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9586,0], pc[9586,0]]
y_values=[bb[9586], enerelax[9586]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10107,0], pc[10107,0]]
y_values=[bb[10107], enerelax[10107]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[13957,0], pc[13957,0]]
y_values=[bb[13957], enerelax[13957]]
plt.plot(x_values, y_values, '-', c='red')

#x_values=[pc[13958,0], pc[13958,0]]
#y_values=[bb[13958], enerelax[13958]]
#plt.plot(x_values, y_values, '-', c='green')

#plt.scatter(pc_proj[:,0],pc_proj[:,1],s=0.1,c=farthest_pts_idx,cmap=plt.cm.get_cmap('viridis'))
#plt.colorbar(label='2nd PCA')
plt.ylabel('ML E$_{b}$ (eV)')
plt.xlabel('1st PCA')
plt.ylim(-3,11)

plt.text(-40, 9, 'Li$_{2}$S', fontsize=18)

plt.tight_layout()

plt.savefig('pca1-energy-pca11.png', dpi=600, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()

In [ ]:

enerelax = []
for i in range(0,len(feature_vectorss_val)):
    enerelax.append(100)

enerelax[20006]= -2.49
enerelax[19492]= -2.40
enerelax[20510]= -2.51
enerelax[22543]= -2.55
enerelax[23047]= -2.68
enerelax[10108]= -2.74
enerelax[9595]= -2.73
enerelax[5486]= -0.12

enerelax[13957]=-2.62
enerelax[10107]=-2.55
enerelax[9586]=-2.80

font = {'family': 'DejaVu Sans',
        'weight': 'normal',
        'size': 16,
        }

plt.rc('font',**font)
            
plt.scatter(pc[:,0],bb,s=0.25,c='orange')
#plt.scatter(pc[22480,0],bb[22480],s=100,cmap=plt.cm.get_cmap('viridis'))
for simplex in hull.simplices:
    if (ene[simplex, 0][0] < 5):
        if (ene[simplex, 0][1] < 5):
            plt.plot(ene[simplex, 1], ene[simplex, 0], 'P-', c='red', linewidth=0.5)
plt.scatter(pc[:,0],enerelax)

x_values=[pc[20006,0], pc[20006,0]]
y_values=[bb[20006], enerelax[20006]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[19492,0], pc[19492,0]]
y_values=[bb[19492], enerelax[19492]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[20510,0], pc[20510,0]]
y_values=[bb[20510], enerelax[20510]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[22543,0], pc[22543,0]]
y_values=[bb[22543], enerelax[22543]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[23047,0], pc[23047,0]]
y_values=[bb[23047], enerelax[23047]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10108,0], pc[10108,0]]
y_values=[bb[10108], enerelax[10108]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9595,0], pc[9595,0]]
y_values=[bb[9595], enerelax[9595]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[5486,0], pc[5486,0]]
y_values=[bb[5486], enerelax[5486]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9586,0], pc[9586,0]]
y_values=[bb[9586], enerelax[9586]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10107,0], pc[10107,0]]
y_values=[bb[10107], enerelax[10107]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[13957,0], pc[13957,0]]
y_values=[bb[13957], enerelax[13957]]
plt.plot(x_values, y_values, '-', c='red')

#x_values=[pc[13958,0], pc[13958,0]]
#y_values=[bb[13958], enerelax[13958]]
#plt.plot(x_values, y_values, '-', c='green')

#plt.scatter(pc_proj[:,0],pc_proj[:,1],s=0.1,c=farthest_pts_idx,cmap=plt.cm.get_cmap('viridis'))
#plt.colorbar(label='2nd PCA')
plt.ylabel('ML E$_{b}$ (eV)')
plt.xlabel('1st PCA')
plt.ylim(-3,11)

plt.text(-40, 9, 'D', fontsize=18)

plt.tight_layout()

plt.savefig('pca1-energy-pca1.png', dpi=600, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()

In [ ]:

[12796, 13120, 13496, 13948, 13958, 13957, 10107, 10108, 10098,
       10099,  9595,  9586, 20006, 19492, 20510, 19996, 22543, 23047,
        5486,  5485,  4981,  4476,  4475,  4474,  4473,  4482]

In [ ]:

enerelax = []
for i in range(0,len(feature_vectorss_val)):
    enerelax.append(100)

enerelax[20006]= -2.49
enerelax[19492]= -2.40
enerelax[20510]= -2.51
enerelax[22543]= -2.55
enerelax[23047]= -2.68
enerelax[10108]= -2.74
enerelax[9595]= -2.73
enerelax[5486]= -0.12

enerelax[13957]=-2.62
enerelax[10107]=-2.55
enerelax[9586]=-2.80

font = {'family': 'DejaVu Sans',
        'weight': 'normal',
        'size': 16,
        }

plt.rc('font',**font)
            
plt.scatter(pc[:,0],bb,s=0.25,c='orange')
#plt.scatter(pc[22480,0],bb[22480],s=100,cmap=plt.cm.get_cmap('viridis'))
for simplex in hull.simplices:
    if (ene[simplex, 0][0] < 5):
        if (ene[simplex, 0][1] < 5):
            plt.plot(ene[simplex, 1], ene[simplex, 0], 'P-', c='red', linewidth=0.5)
plt.scatter(pc[:,0],enerelax)

x_values=[pc[20006,0], pc[20006,0]]
y_values=[bb[20006], enerelax[20006]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[19492,0], pc[19492,0]]
y_values=[bb[19492], enerelax[19492]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[20510,0], pc[20510,0]]
y_values=[bb[20510], enerelax[20510]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[22543,0], pc[22543,0]]
y_values=[bb[22543], enerelax[22543]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[23047,0], pc[23047,0]]
y_values=[bb[23047], enerelax[23047]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10108,0], pc[10108,0]]
y_values=[bb[10108], enerelax[10108]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9595,0], pc[9595,0]]
y_values=[bb[9595], enerelax[9595]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[5486,0], pc[5486,0]]
y_values=[bb[5486], enerelax[5486]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9586,0], pc[9586,0]]
y_values=[bb[9586], enerelax[9586]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10107,0], pc[10107,0]]
y_values=[bb[10107], enerelax[10107]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[13957,0], pc[13957,0]]
y_values=[bb[13957], enerelax[13957]]
plt.plot(x_values, y_values, '-', c='red')

#x_values=[pc[13958,0], pc[13958,0]]
#y_values=[bb[13958], enerelax[13958]]
#plt.plot(x_values, y_values, '-', c='green')

#plt.scatter(pc_proj[:,0],pc_proj[:,1],s=0.1,c=farthest_pts_idx,cmap=plt.cm.get_cmap('viridis'))
#plt.colorbar(label='2nd PCA')
plt.ylabel('ML E$_{b}$ (eV)')
plt.xlabel('1st PCA')
plt.ylim(-3,11)

plt.text(-40, 9, 'C', fontsize=18)

plt.tight_layout()

plt.savefig('pca1-energy-pca111.png', dpi=600, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()

Machine Learning Model¶

Read training data¶

Li2S1¶

Li2S2¶

li2s4¶

li2s6¶

li2s8¶

start wrapping up stuff¶

Get Soap representation for Training and Testing¶

Get Kernel Ridge Regressor for Training¶

GET PREDICTIONS FOR FULL SETS¶

GCH Energy¶