import ase
from ase import io
import chemiscope
import pandas as pd
df_1 = pd.read_csv("mc_lit.csv")
df_1.head()
name | c_smiles | pc_1 | pc_2 | pc_3 | class | nbo1 | nbo2 | onbo1 | onbo2 | ... | foh2 | foh3 | foh4 | foh5 | foh6 | bonhr | onhr | or | pb_vol | pb_vol_nh | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 003_lig.xyz | CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... | -6.743490 | -0.921418 | 1.448254 | 7 | -0.32702 | -0.32700 | 1.90651 | 1.90650 | ... | 0.499996 | 0.224905 | 0.500006 | 0.500004 | 0.775095 | 0.588768 | 0.503643 | 0.495704 | 0.255202 | 0.269287 |
1 | 001_lig.xyz | CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... | -3.871104 | 0.201727 | 1.844392 | 7 | -0.32664 | -0.32596 | 1.90664 | 1.90604 | ... | 0.492119 | 0.202600 | 0.471320 | 0.507881 | 0.797400 | 0.619431 | 0.533248 | 0.532310 | 0.232958 | 0.246693 |
2 | 071_lig.xyz | CC(C)(C)[C@H]1COC(CC2=N[C@@H](C(C)(C)C)CO2)=N1 | -8.856419 | 3.626561 | -0.040138 | 7 | -0.33127 | -0.33126 | 1.90751 | 1.90752 | ... | 0.500242 | 0.246414 | 0.500000 | 0.499758 | 0.753586 | 0.598942 | 0.492743 | 0.483465 | 0.276956 | 0.293493 |
3 | 045_lig.xyz | CC(C)(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(... | -6.396528 | 2.966455 | 0.802578 | 7 | -0.32671 | -0.32670 | 1.90647 | 1.90647 | ... | 0.500051 | 0.222372 | 0.499997 | 0.499949 | 0.777628 | 0.596814 | 0.516015 | 0.510199 | 0.250626 | 0.265893 |
4 | 008_lig.xyz | CC(C)(C1=N[C@@H](c2ccccc2)[C@@H](c2ccccc2)O1)C... | 12.880947 | -3.747163 | -5.190745 | 7 | -0.33394 | -0.33394 | 1.90009 | 1.90008 | ... | 0.499974 | 0.224356 | 0.499997 | 0.500026 | 0.775644 | 0.556734 | 0.271177 | 0.293158 | 0.156265 | 0.163763 |
5 rows × 238 columns
# Dataset 1, lit_ligs
name = "lit_ligs"
mols_1 = []
for structure in df_1["name"]:
mols_1.append(ase.io.read(f"lit_xyz/{structure}"))
properties_1 = {}
for key in df_1.keys():
units = None
if key == "name" or key == "c_smiles":
continue
else:
keydict = {
"target": "structure",
"values": df_1[f"{key}"].to_list(),
"units": f"{units}",
}
properties_1[f"{key}"] = keydict
chemiscope.write_input(
path=f"{name}-chemiscope.json.gz",
frames=mols_1,
properties=properties_1,
)
df_2 = pd.read_csv("mc_csd.csv")
df_2.head()
name | c_smiles | pc_1 | pc_2 | pc_3 | nbo1 | nbo2 | onbo1 | onbo2 | cnbo1 | ... | foh2 | foh3 | foh4 | foh5 | foh6 | bonhr | onhr | or | pb_vol | pb_vol_nh | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ADALIB_Complex_23_Ligand_0_ON_Cu.xyz | [H]N([H])C([H])(C(=O)[O-])C([H])([H])C(=O)[O-] | -38.355599 | 13.393779 | -31.300485 | -0.27722 | 0.02803 | 1.97867 | 1.96293 | -0.82745 | ... | 0.428158 | 0.007569 | 0.368579 | 0.571842 | 0.992431 | 0.978093 | 0.688112 | 0.731148 | 0.412376 | 0.412979 |
1 | AFEHUP_Complex_1_Ligand_0_ON_Cu.xyz | [H]c1c([H])c([H])c(C2=NC([H])(C([H])([H])C([H]... | -10.718583 | 1.751250 | 15.576961 | -0.46124 | -0.17937 | 1.98022 | 1.90831 | -0.72905 | ... | 0.525384 | 0.088731 | 0.403101 | 0.474616 | 0.911269 | 0.841914 | 0.572820 | 0.559357 | 0.289816 | 0.302757 |
2 | AGOROE_Complex_18_Ligand_1_OO_Cu.xyz | [H]c1c([H])c([H])c(C([H])([O-])C(=O)[O-])c([H]... | -20.514131 | 3.477669 | -0.729662 | -0.26508 | -0.29251 | 1.98011 | 1.98642 | -0.80905 | ... | 0.335946 | 0.009630 | 0.269329 | 0.664054 | 0.990370 | 0.977005 | 0.535290 | 0.549776 | 0.318634 | 0.329300 |
3 | AKEXUJ_Complex_0_Ligand_2_NN_Cu.xyz | [H]c1nc2c(N=S(=O)([O-])c3c([H])c([H])c(C([H])(... | -6.360036 | 5.150151 | 11.742726 | -0.19325 | -0.20986 | 1.92629 | 1.88851 | -0.43034 | ... | 0.671608 | 0.037289 | 0.331732 | 0.328392 | 0.962711 | 0.963514 | 1.613252 | 1.637060 | 0.212371 | 0.218249 |
4 | ALIPER_Complex_2_Ligand_0_NN_Cu.xyz | [H]N([H])C(C([H])([H])[H])(C([H])([H])[H])C([H... | -22.124139 | 6.105218 | 6.281952 | -0.26595 | -0.31697 | 1.95726 | 1.93270 | -0.82367 | ... | 0.504936 | 0.063942 | 0.393897 | 0.495064 | 0.936058 | 0.889532 | 0.669182 | 0.683628 | 0.372106 | 0.390437 |
5 rows × 237 columns
# Dataset 2, csd_ligs
name = "csd_ligs"
mols_2 = []
for structure in df_2["name"]:
mols_2.append(ase.io.read(f"csd_xyz/{structure}"))
properties_2 = {}
for key in df_2.keys():
units = None
if key == "name" or key == "c_smiles":
continue
else:
keydict = {
"target": "structure",
"values": df_2[f"{key}"].to_list(),
"units": f"{units}",
}
properties_2[f"{key}"] = keydict
chemiscope.write_input(
path=f"{name}-chemiscope.json.gz",
frames=mols_2,
properties=properties_2,
)
widget = chemiscope.show(mols_2, properties_2)
widget
ChemiscopeWidget(value='{"meta": {"name": " "}, "structures": [{"size": 14, "names": ["C", "C", "C", "C", "H",…
df_oa = pd.read_csv("mc_preds_oa.csv")
df_oa.head()
name | c_smiles | ddg | prediction_train | uncertainty_train | prediction_loo | uncertainty_loo | |
---|---|---|---|---|---|---|---|
0 | 003_lig.xyz | CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... | 0.732764 | 0.749866 | 0.128353 | 0.752067 | 0.133444 |
1 | 001_lig.xyz | CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... | 0.715907 | 0.667394 | 0.124937 | 0.662763 | 0.128766 |
2 | 071_lig.xyz | CC(C)(C)[C@H]1COC(CC2=N[C@@H](C(C)(C)C)CO2)=N1 | 0.290050 | 0.459142 | 0.123971 | 0.470306 | 0.120008 |
3 | 045_lig.xyz | CC(C)(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(... | 0.749889 | 0.619766 | 0.125303 | 0.607693 | 0.125047 |
4 | 008_lig.xyz | CC(C)(C1=N[C@@H](c2ccccc2)[C@@H](c2ccccc2)O1)C... | 0.666819 | 0.390119 | 0.124535 | 0.366863 | 0.103764 |
# Predictions, oa
name = "oa_preds"
mols_oa = []
for structure in df_oa["name"]:
mols_oa.append(ase.io.read(f"lit_xyz/{structure}"))
properties_oa = {}
for key in df_oa.keys():
units = None
if key == "name" or key == "c_smiles":
continue
else:
keydict = {
"target": "structure",
"values": df_oa[f"{key}"].to_list(),
"units": f"{units}",
}
properties_oa[f"{key}"] = keydict
chemiscope.write_input(
path=f"{name}-chemiscope.json.gz",
frames=mols_oa,
properties=properties_oa,
)
df_cp = pd.read_csv("mc_preds_cp.csv")
df_cp.head()
name | c_smiles | ddg | prediction_train | uncertainty_train | prediction_loo | uncertainty_loo | |
---|---|---|---|---|---|---|---|
0 | 045_lig.xyz | CC(C)(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(... | 2.059538 | 2.071310 | 0.326476 | 2.074841 | 0.340917 |
1 | 047_lig.xyz | CC(C)[C@H]1COC(C(C)(C)C2=N[C@@H](C(C)C)CO2)=N1 | 1.075533 | 1.230937 | 0.306142 | 1.243840 | 0.310797 |
2 | 044_lig.xyz | CC(C)(C1=N[C@@H](c2ccccc2)CO1)C1=N[C@@H](c2ccc... | 0.821359 | 0.915003 | 0.311781 | 0.927545 | 0.319021 |
3 | 018_lig.xyz | CC(C)(C1=NCCO1)C1=N[C@@H](c2ccccc2)CO1 | 0.240232 | 0.153533 | 0.307419 | 0.145710 | 0.313557 |
4 | 019_lig.xyz | CC(C)(C1=NC[C@@H](c2ccccc2)O1)C1=NC[C@@H](c2cc... | 0.142885 | 0.815728 | 0.307219 | 0.877433 | 0.280022 |
# Predictions, cp
name = "cp_preds"
mols_cp = []
for structure in df_cp["name"]:
mols_cp.append(ase.io.read(f"lit_xyz/{structure}"))
properties_cp = {}
for key in df_cp.keys():
units = None
if key == "name" or key == "c_smiles":
continue
else:
keydict = {
"target": "structure",
"values": df_cp[f"{key}"].to_list(),
"units": f"{units}",
}
properties_cp[f"{key}"] = keydict
chemiscope.write_input(
path=f"{name}-chemiscope.json.gz",
frames=mols_cp,
properties=properties_cp,
)
df_cc = pd.read_csv("mc_preds_cc.csv")
df_cc.head()
name | c_smiles | ddg | prediction_train | uncertainty_train | prediction_loo | uncertainty_loo | |
---|---|---|---|---|---|---|---|
0 | 081_lig.xyz | c1ccc(C[C@H]2COC(C3=N[C@@H](Cc4ccccc4)CO3)=N2)cc1 | 0.827255 | 0.832734 | 0.191842 | 0.833625 | 0.196477 |
1 | 072_lig.xyz | C[C@H]1COC(C2=N[C@@H](C)CO2)=N1 | 0.946439 | 1.011898 | 0.189420 | 1.017055 | 0.193161 |
2 | 073_lig.xyz | CCCCCC[C@H]1COC(C2=N[C@@H](CCCCCC)CO2)=N1 | 0.925534 | 0.789815 | 0.191780 | 0.775290 | 0.193762 |
3 | 074_lig.xyz | CC(C)[C@H]1COC(C2=N[C@@H](C(C)C)CO2)=N1 | 1.679456 | 1.870811 | 0.199996 | 1.919626 | 0.200915 |
4 | 075_lig.xyz | CC(C)C[C@H]1COC(C2=N[C@@H](CC(C)C)CO2)=N1 | 0.967820 | 1.106218 | 0.190640 | 1.118827 | 0.192910 |
# Predictions, cc
name = "cc_preds"
mols_cc = []
for structure in df_cc["name"]:
mols_cc.append(ase.io.read(f"lit_xyz/{structure}"))
properties_cc = {}
for key in df_cc.keys():
units = None
if key == "name" or key == "c_smiles":
continue
else:
keydict = {
"target": "structure",
"values": df_cc[f"{key}"].to_list(),
"units": f"{units}",
}
properties_cc[f"{key}"] = keydict
chemiscope.write_input(
path=f"{name}-chemiscope.json.gz",
frames=mols_cc,
properties=properties_cc,
)
df_da_f = pd.read_csv("mc_preds_da_f.csv")
df_da_f.head()
name | c_smiles | ddg | prediction_train | uncertainty_train | prediction_loo | uncertainty_loo | |
---|---|---|---|---|---|---|---|
0 | 071_lig.xyz | CC(C)(C)[C@H]1COC(CC2=N[C@@H](C(C)(C)C)CO2)=N1 | 0.601015 | 0.722513 | 0.337461 | 0.729974 | 0.343626 |
1 | 045_lig.xyz | CC(C)(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(... | 1.782003 | 0.852742 | 0.336311 | 0.808930 | 0.286655 |
2 | 061_lig.xyz | Clc1cccc(Cl)c1/C=N/[C@@H]1CCCC[C@H]1/N=C/c1c(C... | 1.232460 | 1.161735 | 0.343226 | 1.151536 | 0.351172 |
3 | 047_lig.xyz | CC(C)[C@H]1COC(C(C)(C)C2=N[C@@H](C(C)C)CO2)=N1 | 0.587532 | 0.651846 | 0.341726 | 0.659048 | 0.349261 |
4 | 044_lig.xyz | CC(C)(C1=N[C@@H](c2ccccc2)CO1)C1=N[C@@H](c2ccc... | 0.274510 | 0.684712 | 0.338584 | 0.711307 | 0.335931 |
# Predictions, da_f
name = "da_f_preds"
mols_da_f = []
for structure in df_da_f["name"]:
mols_da_f.append(ase.io.read(f"lit_xyz/{structure}"))
properties_da_f = {}
for key in df_da_f.keys():
units = None
if key == "name" or key == "c_smiles":
continue
else:
keydict = {
"target": "structure",
"values": df_da_f[f"{key}"].to_list(),
"units": f"{units}",
}
properties_da_f[f"{key}"] = keydict
chemiscope.write_input(
path=f"{name}-chemiscope.json.gz",
frames=mols_da_f,
properties=properties_da_f,
)
widget = chemiscope.show(mols_da_f, properties_da_f)
widget
ChemiscopeWidget(value='{"meta": {"name": " "}, "structures": [{"size": 45, "names": ["C", "N", "C", "H", "C",…