In [15]:
import ase
from ase import io
import chemiscope
import pandas as pd
In [16]:
df_1 = pd.read_csv("mc_lit.csv")
df_1.head()
Out[16]:
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
name tsne1 tsne2 class c_smiles nbo1 nbo2 onbo1 onbo2 cnbo1 ... foh2 foh3 foh4 foh5 foh6 bonhr onhr or pb_vol pb_vol_nh
0 001_lig.xyz 1.552168 1.878821 13-BOX CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... -0.32664 -0.32596 1.90664 1.90604 -0.53385 ... 0.492119 0.202600 0.471320 0.507881 0.797400 0.619431 0.533248 0.532310 0.232958 0.246693
1 002_lig.xyz -2.956746 1.180600 IndaBOX c1ccc2c(c1)C[C@H]1OC(CC3=N[C@H]4c5ccccc5C[C@H]... -0.33453 -0.33453 1.90711 1.90711 -0.52714 ... 0.499996 0.262131 0.499996 0.500004 0.737869 1.641139 3.322038 3.216236 0.229697 0.239004
2 003_lig.xyz 1.400345 2.463032 13-BOX CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... -0.32702 -0.32700 1.90651 1.90650 -0.53782 ... 0.499996 0.224905 0.500006 0.500004 0.775095 0.588768 0.503643 0.495704 0.255202 0.269287
3 004_lig.xyz 3.257014 1.687758 13-BOX CCC(CC)(C1=N[C@@H](C(C)C)CO1)C1=N[C@@H](C(C)C)CO1 -0.32393 -0.32386 1.90309 1.90389 -0.54841 ... 0.455473 0.151901 0.542472 0.544527 0.848099 0.758725 0.899208 0.878992 0.247297 0.262024
4 005_lig.xyz 2.307831 2.299776 13-BOX CC(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(C)C... -0.32938 -0.32914 1.90717 1.90717 -0.53902 ... 0.485473 0.231617 0.495657 0.514527 0.768383 0.601081 0.507730 0.500018 0.262485 0.278259

5 rows × 237 columns

In [17]:
# Dataset 1, lit_ligs
name = "lit_ligs"
mols_1 = []
for structure in df_1["name"]:
    mols_1.append(ase.io.read(f"lit_xyz/{structure}"))


properties_1 = {}
for key in df_1.keys():
    units = None
    if key == "name" or key == "c_smiles":
        continue

    else:
        keydict = {
            "target": "structure",
            "values": df_1[f"{key}"].to_list(),
            "units": f"{units}",
        }
    properties_1[f"{key}"] = keydict

chemiscope.write_input(
    path=f"Chemiscope_{name}.json.gz",
    frames=mols_1,
    properties=properties_1,
)
In [18]:
df_2 = pd.read_csv("mc_csd.csv")
df_2.head()
Out[18]:
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
name tsne1 tsne2 c_smiles nbo1 nbo2 onbo1 onbo2 cnbo1 cnbo2 ... foh2 foh3 foh4 foh5 foh6 bonhr onhr or pb_vol pb_vol_nh
0 ADALIB_Complex_23_Ligand_0_ON_Cu.xyz 2.300118 5.822061 [H]N([H])C([H])(C(=O)[O-])C([H])([H])C(=O)[O-] -0.27722 0.02803 1.97867 1.96293 -0.82745 -0.75102 ... 0.428158 0.007569 0.368579 0.571842 0.992431 0.978093 0.688112 0.731148 0.412376 0.412979
1 AFEHUP_Complex_1_Ligand_0_ON_Cu.xyz 4.482296 4.553890 [H]c1c([H])c([H])c(C2=NC([H])(C([H])([H])C([H]... -0.46124 -0.17937 1.98022 1.90831 -0.72905 -0.57050 ... 0.525384 0.088731 0.403101 0.474616 0.911269 0.841914 0.572820 0.559357 0.289816 0.302757
2 AGOROE_Complex_18_Ligand_1_OO_Cu.xyz 4.027641 4.484736 [H]c1c([H])c([H])c(C([H])([O-])C(=O)[O-])c([H]... -0.26508 -0.29251 1.98011 1.98642 -0.80905 -0.92312 ... 0.335946 0.009630 0.269329 0.664054 0.990370 0.977005 0.535290 0.549776 0.318634 0.329300
3 AKEXUJ_Complex_0_Ligand_2_NN_Cu.xyz 4.372118 4.152382 [H]c1nc2c(N=S(=O)([O-])c3c([H])c([H])c(C([H])(... -0.19325 -0.20986 1.92629 1.88851 -0.43034 -0.93295 ... 0.671608 0.037289 0.331732 0.328392 0.962711 0.963514 1.613252 1.637060 0.212371 0.218249
4 ALIPER_Complex_2_Ligand_0_NN_Cu.xyz 3.347076 4.930252 [H]N([H])C(C([H])([H])[H])(C([H])([H])[H])C([H... -0.26595 -0.31697 1.95726 1.93270 -0.82367 -0.53245 ... 0.504936 0.063942 0.393897 0.495064 0.936058 0.889532 0.669182 0.683628 0.372106 0.390437

5 rows × 236 columns

In [19]:
# Dataset 2, csd_ligs
name = "csd_ligs"
mols_2 = []
for structure in df_2["name"]:
    mols_2.append(ase.io.read(f"csd_xyz/{structure}"))


properties_2 = {}
for key in df_2.keys():
    units = None
    if key == "name" or key == "c_smiles":
        continue

    else:
        keydict = {
            "target": "structure",
            "values": df_2[f"{key}"].to_list(),
            "units": f"{units}",
        }
    properties_2[f"{key}"] = keydict

chemiscope.write_input(
    path=f"Chemiscope_{name}.json.gz",
    frames=mols_2,
    properties=properties_2,
)
In [20]:
widget = chemiscope.show(mols_2, properties_2)
In [21]:
widget
Out[21]:
ChemiscopeWidget(value='{"meta": {"name": " "}, "structures": [{"size": 14, "names": ["C", "C", "C", "C", "H",…
In [ ]: