import ase
from ase import io
import chemiscope
import pandas as pd
df_1 = pd.read_csv("mc_lit.csv")
df_1.head()
name | tsne1 | tsne2 | class | c_smiles | nbo1 | nbo2 | onbo1 | onbo2 | cnbo1 | ... | foh2 | foh3 | foh4 | foh5 | foh6 | bonhr | onhr | or | pb_vol | pb_vol_nh | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 001_lig.xyz | 1.552168 | 1.878821 | 13-BOX | CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... | -0.32664 | -0.32596 | 1.90664 | 1.90604 | -0.53385 | ... | 0.492119 | 0.202600 | 0.471320 | 0.507881 | 0.797400 | 0.619431 | 0.533248 | 0.532310 | 0.232958 | 0.246693 |
1 | 002_lig.xyz | -2.956746 | 1.180600 | IndaBOX | c1ccc2c(c1)C[C@H]1OC(CC3=N[C@H]4c5ccccc5C[C@H]... | -0.33453 | -0.33453 | 1.90711 | 1.90711 | -0.52714 | ... | 0.499996 | 0.262131 | 0.499996 | 0.500004 | 0.737869 | 1.641139 | 3.322038 | 3.216236 | 0.229697 | 0.239004 |
2 | 003_lig.xyz | 1.400345 | 2.463032 | 13-BOX | CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... | -0.32702 | -0.32700 | 1.90651 | 1.90650 | -0.53782 | ... | 0.499996 | 0.224905 | 0.500006 | 0.500004 | 0.775095 | 0.588768 | 0.503643 | 0.495704 | 0.255202 | 0.269287 |
3 | 004_lig.xyz | 3.257014 | 1.687758 | 13-BOX | CCC(CC)(C1=N[C@@H](C(C)C)CO1)C1=N[C@@H](C(C)C)CO1 | -0.32393 | -0.32386 | 1.90309 | 1.90389 | -0.54841 | ... | 0.455473 | 0.151901 | 0.542472 | 0.544527 | 0.848099 | 0.758725 | 0.899208 | 0.878992 | 0.247297 | 0.262024 |
4 | 005_lig.xyz | 2.307831 | 2.299776 | 13-BOX | CC(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(C)C... | -0.32938 | -0.32914 | 1.90717 | 1.90717 | -0.53902 | ... | 0.485473 | 0.231617 | 0.495657 | 0.514527 | 0.768383 | 0.601081 | 0.507730 | 0.500018 | 0.262485 | 0.278259 |
5 rows × 237 columns
# Dataset 1, lit_ligs
name = "lit_ligs"
mols_1 = []
for structure in df_1["name"]:
mols_1.append(ase.io.read(f"lit_xyz/{structure}"))
properties_1 = {}
for key in df_1.keys():
units = None
if key == "name" or key == "c_smiles":
continue
else:
keydict = {
"target": "structure",
"values": df_1[f"{key}"].to_list(),
"units": f"{units}",
}
properties_1[f"{key}"] = keydict
chemiscope.write_input(
path=f"Chemiscope_{name}.json.gz",
frames=mols_1,
properties=properties_1,
)
df_2 = pd.read_csv("mc_csd.csv")
df_2.head()
name | tsne1 | tsne2 | c_smiles | nbo1 | nbo2 | onbo1 | onbo2 | cnbo1 | cnbo2 | ... | foh2 | foh3 | foh4 | foh5 | foh6 | bonhr | onhr | or | pb_vol | pb_vol_nh | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ADALIB_Complex_23_Ligand_0_ON_Cu.xyz | 2.300118 | 5.822061 | [H]N([H])C([H])(C(=O)[O-])C([H])([H])C(=O)[O-] | -0.27722 | 0.02803 | 1.97867 | 1.96293 | -0.82745 | -0.75102 | ... | 0.428158 | 0.007569 | 0.368579 | 0.571842 | 0.992431 | 0.978093 | 0.688112 | 0.731148 | 0.412376 | 0.412979 |
1 | AFEHUP_Complex_1_Ligand_0_ON_Cu.xyz | 4.482296 | 4.553890 | [H]c1c([H])c([H])c(C2=NC([H])(C([H])([H])C([H]... | -0.46124 | -0.17937 | 1.98022 | 1.90831 | -0.72905 | -0.57050 | ... | 0.525384 | 0.088731 | 0.403101 | 0.474616 | 0.911269 | 0.841914 | 0.572820 | 0.559357 | 0.289816 | 0.302757 |
2 | AGOROE_Complex_18_Ligand_1_OO_Cu.xyz | 4.027641 | 4.484736 | [H]c1c([H])c([H])c(C([H])([O-])C(=O)[O-])c([H]... | -0.26508 | -0.29251 | 1.98011 | 1.98642 | -0.80905 | -0.92312 | ... | 0.335946 | 0.009630 | 0.269329 | 0.664054 | 0.990370 | 0.977005 | 0.535290 | 0.549776 | 0.318634 | 0.329300 |
3 | AKEXUJ_Complex_0_Ligand_2_NN_Cu.xyz | 4.372118 | 4.152382 | [H]c1nc2c(N=S(=O)([O-])c3c([H])c([H])c(C([H])(... | -0.19325 | -0.20986 | 1.92629 | 1.88851 | -0.43034 | -0.93295 | ... | 0.671608 | 0.037289 | 0.331732 | 0.328392 | 0.962711 | 0.963514 | 1.613252 | 1.637060 | 0.212371 | 0.218249 |
4 | ALIPER_Complex_2_Ligand_0_NN_Cu.xyz | 3.347076 | 4.930252 | [H]N([H])C(C([H])([H])[H])(C([H])([H])[H])C([H... | -0.26595 | -0.31697 | 1.95726 | 1.93270 | -0.82367 | -0.53245 | ... | 0.504936 | 0.063942 | 0.393897 | 0.495064 | 0.936058 | 0.889532 | 0.669182 | 0.683628 | 0.372106 | 0.390437 |
5 rows × 236 columns
# Dataset 2, csd_ligs
name = "csd_ligs"
mols_2 = []
for structure in df_2["name"]:
mols_2.append(ase.io.read(f"csd_xyz/{structure}"))
properties_2 = {}
for key in df_2.keys():
units = None
if key == "name" or key == "c_smiles":
continue
else:
keydict = {
"target": "structure",
"values": df_2[f"{key}"].to_list(),
"units": f"{units}",
}
properties_2[f"{key}"] = keydict
chemiscope.write_input(
path=f"Chemiscope_{name}.json.gz",
frames=mols_2,
properties=properties_2,
)
widget = chemiscope.show(mols_2, properties_2)
widget
ChemiscopeWidget(value='{"meta": {"name": " "}, "structures": [{"size": 14, "names": ["C", "C", "C", "C", "H",…