In [1]:
import numpy as np

from ase.io import read
from ase.build import molecule
from ase import Atoms

from sklearn.metrics.pairwise import pairwise_distances

from dscribe.descriptors import SOAP
In [ ]:
 
In [2]:
from sklearn import metrics
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.kernel_ridge import KernelRidge
from sklearn.multioutput import RegressorChain
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
In [3]:
from scipy.spatial import ConvexHull, convex_hull_plot_2d
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

Machine Learning Model¶

Read training data¶

Li2S1¶

In [4]:
# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s1 = read("./FeN4C66-Li2S/fe1n4c66-li2s.2279str.xyz",":-50")
In [5]:
from numpy import loadtxt
lines = loadtxt("./FeN4C66-Li2S/fe1n4c66-li2s.2279energies.dat", comments="#", delimiter=" ", unpack=False)
binding_energies_li2s1 = []
for i in range(0,len(lines)-50):
    binding_energies_li2s1.append(lines[i][1])
    
binding_energies_li2s1 = np.asarray(binding_energies_li2s1)
In [6]:
esub = 12383.42949802
e_li2s1 = 708.7375253
binding_energies_li2s1 = binding_energies_li2s1 + esub + e_li2s1

Li2S2¶

In [7]:
# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s2 = read("./FeN4C66-Li2S2/fe1n4c66-li2s2.2220str.xyz",":-50")
In [8]:
from numpy import loadtxt
lines = loadtxt("./FeN4C66-Li2S2/fe1n4c66-li2s2.2220energies.dat", comments="#", delimiter=" ", unpack=False)

binding_energies_li2s2 = []
for i in range(0,len(lines)-50):
    binding_energies_li2s2.append(lines[i][1])
    
binding_energies_li2s2 = np.asarray(binding_energies_li2s2)
In [9]:
esub = 12383.42949802
e_li2s2 = 1013.31309
binding_energies_li2s2 = binding_energies_li2s2 + esub + e_li2s2

li2s4¶

In [10]:
# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s4 = read("./FeN4C66-Li2S4/fe1n4c66-li2s4.2119str.xyz",":-50")
In [11]:
from numpy import loadtxt
lines = loadtxt("./FeN4C66-Li2S4/fe1n4c66-li2s4.2119energies.dat", comments="#", delimiter=" ", unpack=False)
binding_energies_li2s4 = []
for i in range(0,len(lines)-50):
    binding_energies_li2s4.append(lines[i][1])
    
binding_energies_li2s4 = np.asarray(binding_energies_li2s4)
In [12]:
esub = 12383.42949802
e_li2s4 = 1621.689849
binding_energies_li2s4 = binding_energies_li2s4 + esub + e_li2s4

li2s6¶

In [13]:
# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s6 = read("./FeN4C66-Li2S6/fe1n4c66-li2s6.1594str.xyz",":-50")
In [14]:
from numpy import loadtxt
lines = loadtxt("./FeN4C66-Li2S6/fe1n4c66-li2s6.1594energies.dat", comments="#", delimiter=" ", unpack=False)
binding_energies_li2s6 = []
for i in range(0,len(lines)-50):
    binding_energies_li2s6.append(lines[i][1])
    
binding_energies_li2s6 = np.asarray(binding_energies_li2s6)
In [15]:
esub = 12383.42949802
e_li2s6 = 2229.344944
binding_energies_li2s6 = binding_energies_li2s6 + esub + e_li2s6

li2s8¶

In [16]:
# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s8 = read("./FeN4C66-Li2S8/fe1n4c66-li2s8.1715str.xyz",":-50")
In [17]:
from numpy import loadtxt
lines = loadtxt("./FeN4C66-Li2S8/fe1n4c66-li2s8.1715energies.dat", comments="#", delimiter=" ", unpack=False)
binding_energies_li2s8 = []
for i in range(0,len(lines)-50):
    binding_energies_li2s8.append(lines[i][1])
    
binding_energies_li2s8 = np.asarray(binding_energies_li2s8)
In [18]:
esub = 12383.42949802
e_li2s8 = 2836.121134
binding_energies_li2s8 = binding_energies_li2s8 + esub + e_li2s8

start wrapping up stuff¶

In [19]:
binding_energies = []
for i in range(0,len(binding_energies_li2s1)):
    binding_energies.append(binding_energies_li2s1[i])
for i in range(0,len(binding_energies_li2s2)):
    binding_energies.append(binding_energies_li2s2[i])
for i in range(0,len(binding_energies_li2s4)):
    binding_energies.append(binding_energies_li2s4[i])
for i in range(0,len(binding_energies_li2s6)):
    binding_energies.append(binding_energies_li2s6[i])
for i in range(0,len(binding_energies_li2s8)):
    binding_energies.append(binding_energies_li2s8[i])
In [20]:
print(len(binding_energies))

binding_energies_untouched = binding_energies

purged = []
for i in range(0,len(binding_energies)):
    if 5 < binding_energies[i]  :
        purged.append(i)

for i in purged[::-1]:
    binding_energies = np.delete(binding_energies,i)

print(len(binding_energies_untouched))
print(len(binding_energies))
9677
9677
7793
In [21]:
print(len(binding_energies_untouched))
9677

Get Soap representation for Training and Testing¶

In [22]:
# Let's create a list of structures and gather the chemical elements that are in all the structures.
species = set()
for structure in structure_li2s2:
    species.update(structure.get_chemical_symbols())

soap = SOAP(
    species=species,
    periodic=False,
    rcut=6.5,
    nmax=6,
    lmax=7,
    rbf="gto",
    average="outer",
    sparse=False
)
In [23]:
feature_vectors = []
feature_vectorss = []
counter = -1

for structure in structure_li2s1:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    if 5 > binding_energies_untouched[counter]  :
        feature_vectors.append(soap.create(structure, n_jobs=1))

for structure in structure_li2s2:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    if 5 > binding_energies_untouched[counter]  :
        feature_vectors.append(soap.create(structure, n_jobs=1))

for structure in structure_li2s4:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    if 5 > binding_energies_untouched[counter]  :
        feature_vectors.append(soap.create(structure, n_jobs=1))
    
for structure in structure_li2s6:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    if 5 > binding_energies_untouched[counter]  :
        feature_vectors.append(soap.create(structure, n_jobs=1))
    
for structure in structure_li2s8:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    if 5 > binding_energies_untouched[counter]  :
        feature_vectors.append(soap.create(structure, n_jobs=1))
In [24]:
feature_vectorss = np.vstack(feature_vectors)
print(len(feature_vectorss))
7793
In [25]:
binding_energies_m = binding_energies.mean()
binding_energies_centered = binding_energies - binding_energies_m
binding_energies_variance = binding_energies.std()
binding_energies_norm = binding_energies_centered / binding_energies_variance 
In [26]:
print(binding_energies_m)
1.2583438033998016
In [27]:
# here we reshape stuff to make it more friendly for coding
feature_vectors = np.asarray(feature_vectors)
print(feature_vectors.shape)
feature_vectorss = np.asarray(feature_vectorss)
print(feature_vectorss.shape)
(7793, 3720)
(7793, 3720)

Get Kernel Ridge Regressor for Training¶

In [28]:
x = feature_vectorss
yy = np.asarray(binding_energies_centered)

counter = 0

kf = KFold(n_splits=8,random_state=1010, shuffle=True)
kf.get_n_splits(x)

for train_index, test_index in kf.split(x):
    
    counter = counter + 1 
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = yy[train_index], yy[test_index]
    
    xx = []
    for i in range(0,len(X_train)):
        xx.append(X_train[i])
    for i in range(0,len(X_test)):
        xx.append(X_test[i])

    scaler = StandardScaler()
    scaled_x = scaler.fit_transform(X_train)
    scaled_y = scaler.transform(X_test)
 
  #  alphas = [1,0.5,0.25,0.125,0.0625,0.03125,0.0155,0.0008,0.0004,0.0002,0.0001]
  #  alphas = [0.015,0.014,0.013,0.012,0.011,0.01,0.009,0.008,0.007]
  #  alphas = [0.02,0.019,0.018,0.017,0.016,0.015]

  #  for alphaz in alphas: 

    tmaee = []
    trmsee = []
    tmaee2 = []
    trmsee2 = []
    r22 = []
    r222 = []

    regr = KernelRidge(alpha=0.02, kernel='rbf')

    y_pred = regr.fit(scaled_x[0:len(X_train)],y_train).predict(scaled_x[0:len(X_train)])
    y_pred2 = regr.fit(scaled_x[0:len(X_train)],y_train).predict(scaled_y[0:len(X_test)])

    aa = []
    aa2 = []

    for i in range(0,len(y_pred)):
        aa.append(y_pred[i])
    for i in range(0,len(y_pred2)):
        aa2.append(y_pred2[i])

    tmaee.append(metrics.mean_absolute_error(y_train, aa))
    trmsee.append(metrics.mean_squared_error(y_train, aa))
    r22.append(metrics.r2_score(y_train, aa))
    tmaee2.append(metrics.mean_absolute_error(y_test, aa2))
    trmsee2.append(metrics.mean_squared_error(y_test, aa2))
    r222.append(metrics.r2_score(y_test, aa2))

    print()
    print()
    print(tmaee)
    #print(trmsee)
    #print(r22)
    print(tmaee2)
    #print(trmsee2)
    #print(r222)

    print(binding_energies_variance)

[0.07236551567713013]
[0.10985851935138376]
1.2802963593751946


[0.07176517591885477]
[0.11869038095670066]
1.2802963593751946


[0.07294055592847262]
[0.1083920070386096]
1.2802963593751946


[0.07200605819421883]
[0.12250150596865342]
1.2802963593751946


[0.07085929706715868]
[0.12252927952427305]
1.2802963593751946


[0.0703655956411123]
[0.12098043636709455]
1.2802963593751946


[0.07094210364407319]
[0.11887967270596825]
1.2802963593751946


[0.07264472021745924]
[0.1139855281913159]
1.2802963593751946

GET PREDICTIONS FOR FULL SETS¶

In [29]:
# Let's use ASE to read atomic structures as ase.Atoms objects.
structure_li2s1 = read("./FeN4C66-Li2S/fe1n4c66.fin.mod_li2s_23499.xyz",":")
In [ ]:
feature_vectors_val = []
counter = -1 

for structure in structure_li2s1:
    # Let's create SOAP feature vectors for each structure
    counter = counter + 1
    feature_vectors_val.append(soap.create(structure, n_jobs=1))
In [ ]:
feature_vectorss_val = np.vstack(feature_vectors_val)
print(len(feature_vectorss_val))
In [ ]:
x = feature_vectorss
yy = np.asarray(binding_energies_centered)

counter = 0
ii = []

kf = KFold(n_splits=8,random_state=1010, shuffle=True)
kf.get_n_splits(x)

for train_index, test_index in kf.split(x):
    
    counter = counter + 1 
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = yy[train_index], yy[test_index]
    
    xx = []
    for i in range(0,len(X_train)):
        xx.append(X_train[i])
    for i in range(0,len(X_test)):
        xx.append(X_test[i])

    scaler = StandardScaler()
    scaled_x = scaler.fit_transform(X_train)
    scaled_y = scaler.transform(feature_vectorss_val)

    regr = KernelRidge(alpha=0.02, kernel='rbf')

    y_pred2 = regr.fit(scaled_x[0:len(X_train)],y_train).predict(scaled_y[0:len(feature_vectors_val)])
    
    for i in range(0,len(y_pred2)):
        ii.append(y_pred2[i])
        
bb = []
cc = []

for j in range (0,len(y_pred2)):
    aa = []
    for i in range(0,counter):
        aa.append(ii[j+(i*len(y_pred2))])

    aa = np.asarray(aa)
    bb.append(aa.mean(0)) 
    cc.append(aa.std(0))
In [ ]:
bb = bb + binding_energies_m
In [ ]:
fig, ax = plt.subplots()
plt.errorbar(x=bb, y=cc, c='orange', markersize=4., label='', fmt='o')
ax.set_xlabel('Predicted')
ax.set_ylabel('Uncertainty')
plt.savefig('parity1.png', dpi=None, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()
In [ ]:
# here we do the pca of the feature vectors

from sklearn.decomposition import PCA

pca = PCA(n_components=2)
pca.fit(feature_vectorss_val)
pc = pca.fit_transform(feature_vectorss_val)
In [ ]:
# here we plot the pca and the chosen structs

import matplotlib.pyplot as plt

plt.scatter(pc[:,0],pc[:,1],s=0.1,c=bb,cmap=plt.cm.get_cmap('magma'))
#plt.scatter(pc_proj[:,0],pc_proj[:,1],s=0.1,c=farthest_pts_idx,cmap=plt.cm.get_cmap('viridis'))
plt.colorbar(label='energy')
plt.ylabel('dim 2')
plt.xlabel('dim 1')
plt.savefig('pca-predicted-e.png', dpi=None, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()
In [ ]:
idx =  []
for i in range(0,len(structure_li2s1)):
    idx.append(i)
idx = np.array(idx)

GCH Energy¶

In [ ]:
# here we do the pca of the feature vectors
pca = PCA(n_components=2)
pca.fit(feature_vectorss_val)
pc = pca.fit_transform(feature_vectorss_val)
In [ ]:
ene = []
pcf = []

for i in range(0,len(feature_vectorss_val)):
    ene.append(bb[i])
    pcf.append(pc[i,0])

ene = np.array(ene)
pcf = np.array(pcf)

len(ene)
len(pcf)

ene=np.vstack((ene,pcf))

ene = ene.T

print(ene.shape)

hull = ConvexHull(ene)
In [ ]:
plt.scatter(pc[:,0],bb,s=1,c=(pc[:,1]),cmap=plt.cm.get_cmap('viridis'))
plt.scatter(pc[5486,0],bb[5486],s=100,cmap=plt.cm.get_cmap('viridis'))
for simplex in hull.simplices:
    plt.plot(ene[simplex, 1], ene[simplex, 0],  'k-')
#plt.scatter(pc_proj[:,0],pc_proj[:,1],s=0.1,c=farthest_pts_idx,cmap=plt.cm.get_cmap('viridis'))
plt.colorbar(label='idx')
plt.ylabel('dim 2')
plt.xlabel('dim 1')
plt.savefig('pca1-energy-pca2.png', dpi=None, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()
In [ ]:
print([hull.vertices])
a = ([-3, 70])
enenew = []
enenew = np.vstack((bb ,a))
print(len(enenew ))
print(enenew .shape)
hull = ConvexHull(points=enenew,qhull_options='QG22680')
print(hull.vertices[hull.good])
In [ ]:
font = {'family': 'DejaVu Sans',
        'weight': 'normal',
        'size': 16,
        }

plt.rc('font',**font)

plt.scatter(pc[:,0],bb,s=0.25,c='orange')
#plt.scatter(pc[22480,0],bb[22480],s=100,cmap=plt.cm.get_cmap('viridis'))
for simplex in hull.simplices:
    if (ene[simplex, 0][0] < 5):
        if (ene[simplex, 0][1] < 5):
            plt.plot(ene[simplex, 1], ene[simplex, 0], 'P-', c='red', linewidth=0.5)
plt.scatter(pc[:,0],enerelax)

plt.ylabel('ML E$_{b}$ (eV)')
plt.xlabel('1st PCA')
plt.ylim(-2,9)

plt.text(-40, 9, 'C', fontsize=18)


plt.tight_layout()

plt.savefig('pca1-energy-pca111.png', dpi=600, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()
In [ ]:
enerelax = []
for i in range(0,len(feature_vectorss_val)):
    enerelax.append(100)

enerelax[20006]= -2.49
enerelax[19492]= -2.40
enerelax[20510]= -2.51
enerelax[22543]= -2.55
enerelax[23047]= -2.68
enerelax[10108]= -2.74
enerelax[9595]= -2.73
enerelax[5486]= -0.12

enerelax[13957]=-2.62
enerelax[10107]=-2.55
enerelax[9586]=-2.80

font = {'family': 'DejaVu Sans',
        'weight': 'normal',
        'size': 16,
        }

plt.rc('font',**font)
            
plt.scatter(pc[:,0],bb,s=0.25,c='orange')
#plt.scatter(pc[22480,0],bb[22480],s=100,cmap=plt.cm.get_cmap('viridis'))
for simplex in hull.simplices:
    if (ene[simplex, 0][0] < 5):
        if (ene[simplex, 0][1] < 5):
            plt.plot(ene[simplex, 1], ene[simplex, 0], 'P-', c='red', linewidth=0.5)
plt.scatter(pc[:,0],enerelax)

x_values=[pc[20006,0], pc[20006,0]]
y_values=[bb[20006], enerelax[20006]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[19492,0], pc[19492,0]]
y_values=[bb[19492], enerelax[19492]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[20510,0], pc[20510,0]]
y_values=[bb[20510], enerelax[20510]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[22543,0], pc[22543,0]]
y_values=[bb[22543], enerelax[22543]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[23047,0], pc[23047,0]]
y_values=[bb[23047], enerelax[23047]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10108,0], pc[10108,0]]
y_values=[bb[10108], enerelax[10108]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9595,0], pc[9595,0]]
y_values=[bb[9595], enerelax[9595]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[5486,0], pc[5486,0]]
y_values=[bb[5486], enerelax[5486]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9586,0], pc[9586,0]]
y_values=[bb[9586], enerelax[9586]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10107,0], pc[10107,0]]
y_values=[bb[10107], enerelax[10107]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[13957,0], pc[13957,0]]
y_values=[bb[13957], enerelax[13957]]
plt.plot(x_values, y_values, '-', c='red')

#x_values=[pc[13958,0], pc[13958,0]]
#y_values=[bb[13958], enerelax[13958]]
#plt.plot(x_values, y_values, '-', c='green')

#plt.scatter(pc_proj[:,0],pc_proj[:,1],s=0.1,c=farthest_pts_idx,cmap=plt.cm.get_cmap('viridis'))
#plt.colorbar(label='2nd PCA')
plt.ylabel('ML E$_{b}$ (eV)')
plt.xlabel('1st PCA')
plt.ylim(-3,11)

plt.text(-40, 9, 'Li$_{2}$S', fontsize=18)

plt.tight_layout()

plt.savefig('pca1-energy-pca11.png', dpi=600, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()
In [ ]:
enerelax = []
for i in range(0,len(feature_vectorss_val)):
    enerelax.append(100)

enerelax[20006]= -2.49
enerelax[19492]= -2.40
enerelax[20510]= -2.51
enerelax[22543]= -2.55
enerelax[23047]= -2.68
enerelax[10108]= -2.74
enerelax[9595]= -2.73
enerelax[5486]= -0.12

enerelax[13957]=-2.62
enerelax[10107]=-2.55
enerelax[9586]=-2.80

font = {'family': 'DejaVu Sans',
        'weight': 'normal',
        'size': 16,
        }

plt.rc('font',**font)
            
plt.scatter(pc[:,0],bb,s=0.25,c='orange')
#plt.scatter(pc[22480,0],bb[22480],s=100,cmap=plt.cm.get_cmap('viridis'))
for simplex in hull.simplices:
    if (ene[simplex, 0][0] < 5):
        if (ene[simplex, 0][1] < 5):
            plt.plot(ene[simplex, 1], ene[simplex, 0], 'P-', c='red', linewidth=0.5)
plt.scatter(pc[:,0],enerelax)

x_values=[pc[20006,0], pc[20006,0]]
y_values=[bb[20006], enerelax[20006]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[19492,0], pc[19492,0]]
y_values=[bb[19492], enerelax[19492]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[20510,0], pc[20510,0]]
y_values=[bb[20510], enerelax[20510]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[22543,0], pc[22543,0]]
y_values=[bb[22543], enerelax[22543]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[23047,0], pc[23047,0]]
y_values=[bb[23047], enerelax[23047]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10108,0], pc[10108,0]]
y_values=[bb[10108], enerelax[10108]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9595,0], pc[9595,0]]
y_values=[bb[9595], enerelax[9595]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[5486,0], pc[5486,0]]
y_values=[bb[5486], enerelax[5486]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9586,0], pc[9586,0]]
y_values=[bb[9586], enerelax[9586]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10107,0], pc[10107,0]]
y_values=[bb[10107], enerelax[10107]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[13957,0], pc[13957,0]]
y_values=[bb[13957], enerelax[13957]]
plt.plot(x_values, y_values, '-', c='red')

#x_values=[pc[13958,0], pc[13958,0]]
#y_values=[bb[13958], enerelax[13958]]
#plt.plot(x_values, y_values, '-', c='green')

#plt.scatter(pc_proj[:,0],pc_proj[:,1],s=0.1,c=farthest_pts_idx,cmap=plt.cm.get_cmap('viridis'))
#plt.colorbar(label='2nd PCA')
plt.ylabel('ML E$_{b}$ (eV)')
plt.xlabel('1st PCA')
plt.ylim(-3,11)

plt.text(-40, 9, 'D', fontsize=18)

plt.tight_layout()

plt.savefig('pca1-energy-pca1.png', dpi=600, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()
In [ ]:
[12796, 13120, 13496, 13948, 13958, 13957, 10107, 10108, 10098,
       10099,  9595,  9586, 20006, 19492, 20510, 19996, 22543, 23047,
        5486,  5485,  4981,  4476,  4475,  4474,  4473,  4482]
In [ ]:
enerelax = []
for i in range(0,len(feature_vectorss_val)):
    enerelax.append(100)

enerelax[20006]= -2.49
enerelax[19492]= -2.40
enerelax[20510]= -2.51
enerelax[22543]= -2.55
enerelax[23047]= -2.68
enerelax[10108]= -2.74
enerelax[9595]= -2.73
enerelax[5486]= -0.12

enerelax[13957]=-2.62
enerelax[10107]=-2.55
enerelax[9586]=-2.80

font = {'family': 'DejaVu Sans',
        'weight': 'normal',
        'size': 16,
        }

plt.rc('font',**font)
            
plt.scatter(pc[:,0],bb,s=0.25,c='orange')
#plt.scatter(pc[22480,0],bb[22480],s=100,cmap=plt.cm.get_cmap('viridis'))
for simplex in hull.simplices:
    if (ene[simplex, 0][0] < 5):
        if (ene[simplex, 0][1] < 5):
            plt.plot(ene[simplex, 1], ene[simplex, 0], 'P-', c='red', linewidth=0.5)
plt.scatter(pc[:,0],enerelax)

x_values=[pc[20006,0], pc[20006,0]]
y_values=[bb[20006], enerelax[20006]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[19492,0], pc[19492,0]]
y_values=[bb[19492], enerelax[19492]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[20510,0], pc[20510,0]]
y_values=[bb[20510], enerelax[20510]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[22543,0], pc[22543,0]]
y_values=[bb[22543], enerelax[22543]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[23047,0], pc[23047,0]]
y_values=[bb[23047], enerelax[23047]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10108,0], pc[10108,0]]
y_values=[bb[10108], enerelax[10108]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9595,0], pc[9595,0]]
y_values=[bb[9595], enerelax[9595]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[5486,0], pc[5486,0]]
y_values=[bb[5486], enerelax[5486]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[9586,0], pc[9586,0]]
y_values=[bb[9586], enerelax[9586]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[10107,0], pc[10107,0]]
y_values=[bb[10107], enerelax[10107]]
plt.plot(x_values, y_values, '-', c='red')

x_values=[pc[13957,0], pc[13957,0]]
y_values=[bb[13957], enerelax[13957]]
plt.plot(x_values, y_values, '-', c='red')

#x_values=[pc[13958,0], pc[13958,0]]
#y_values=[bb[13958], enerelax[13958]]
#plt.plot(x_values, y_values, '-', c='green')

#plt.scatter(pc_proj[:,0],pc_proj[:,1],s=0.1,c=farthest_pts_idx,cmap=plt.cm.get_cmap('viridis'))
#plt.colorbar(label='2nd PCA')
plt.ylabel('ML E$_{b}$ (eV)')
plt.xlabel('1st PCA')
plt.ylim(-3,11)

plt.text(-40, 9, 'C', fontsize=18)

plt.tight_layout()

plt.savefig('pca1-energy-pca111.png', dpi=600, facecolor='w', edgecolor='w', 
            orientation='landscape', format=None, transparent=False, bbox_inches=None, pad_inches=0.1)
plt.show()
In [ ]: