In [1]:
import ase
from ase import io
import chemiscope
import pandas as pd
In [3]:
df_1 = pd.read_csv("mc_lit.csv")
df_1.head()
Out[3]:
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
name c_smiles pc_1 pc_2 pc_3 class nbo1 nbo2 onbo1 onbo2 ... foh2 foh3 foh4 foh5 foh6 bonhr onhr or pb_vol pb_vol_nh
0 003_lig.xyz CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... -6.743490 -0.921418 1.448254 7 -0.32702 -0.32700 1.90651 1.90650 ... 0.499996 0.224905 0.500006 0.500004 0.775095 0.588768 0.503643 0.495704 0.255202 0.269287
1 001_lig.xyz CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... -3.871104 0.201727 1.844392 7 -0.32664 -0.32596 1.90664 1.90604 ... 0.492119 0.202600 0.471320 0.507881 0.797400 0.619431 0.533248 0.532310 0.232958 0.246693
2 071_lig.xyz CC(C)(C)[C@H]1COC(CC2=N[C@@H](C(C)(C)C)CO2)=N1 -8.856419 3.626561 -0.040138 7 -0.33127 -0.33126 1.90751 1.90752 ... 0.500242 0.246414 0.500000 0.499758 0.753586 0.598942 0.492743 0.483465 0.276956 0.293493
3 045_lig.xyz CC(C)(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(... -6.396528 2.966455 0.802578 7 -0.32671 -0.32670 1.90647 1.90647 ... 0.500051 0.222372 0.499997 0.499949 0.777628 0.596814 0.516015 0.510199 0.250626 0.265893
4 008_lig.xyz CC(C)(C1=N[C@@H](c2ccccc2)[C@@H](c2ccccc2)O1)C... 12.880947 -3.747163 -5.190745 7 -0.33394 -0.33394 1.90009 1.90008 ... 0.499974 0.224356 0.499997 0.500026 0.775644 0.556734 0.271177 0.293158 0.156265 0.163763

5 rows × 238 columns

In [7]:
# Dataset 1, lit_ligs
name = "lit_ligs"
mols_1 = []
for structure in df_1["name"]:
    mols_1.append(ase.io.read(f"lit_xyz/{structure}"))


properties_1 = {}
for key in df_1.keys():
    units = None
    if key == "name" or key == "c_smiles":
        continue

    else:
        keydict = {
            "target": "structure",
            "values": df_1[f"{key}"].to_list(),
            "units": f"{units}",
        }
    properties_1[f"{key}"] = keydict

chemiscope.write_input(
    path=f"{name}-chemiscope.json.gz",
    frames=mols_1,
    properties=properties_1,
)
In [8]:
df_2 = pd.read_csv("mc_csd.csv")
df_2.head()
Out[8]:
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
name c_smiles pc_1 pc_2 pc_3 nbo1 nbo2 onbo1 onbo2 cnbo1 ... foh2 foh3 foh4 foh5 foh6 bonhr onhr or pb_vol pb_vol_nh
0 ADALIB_Complex_23_Ligand_0_ON_Cu.xyz [H]N([H])C([H])(C(=O)[O-])C([H])([H])C(=O)[O-] -38.355599 13.393779 -31.300485 -0.27722 0.02803 1.97867 1.96293 -0.82745 ... 0.428158 0.007569 0.368579 0.571842 0.992431 0.978093 0.688112 0.731148 0.412376 0.412979
1 AFEHUP_Complex_1_Ligand_0_ON_Cu.xyz [H]c1c([H])c([H])c(C2=NC([H])(C([H])([H])C([H]... -10.718583 1.751250 15.576961 -0.46124 -0.17937 1.98022 1.90831 -0.72905 ... 0.525384 0.088731 0.403101 0.474616 0.911269 0.841914 0.572820 0.559357 0.289816 0.302757
2 AGOROE_Complex_18_Ligand_1_OO_Cu.xyz [H]c1c([H])c([H])c(C([H])([O-])C(=O)[O-])c([H]... -20.514131 3.477669 -0.729662 -0.26508 -0.29251 1.98011 1.98642 -0.80905 ... 0.335946 0.009630 0.269329 0.664054 0.990370 0.977005 0.535290 0.549776 0.318634 0.329300
3 AKEXUJ_Complex_0_Ligand_2_NN_Cu.xyz [H]c1nc2c(N=S(=O)([O-])c3c([H])c([H])c(C([H])(... -6.360036 5.150151 11.742726 -0.19325 -0.20986 1.92629 1.88851 -0.43034 ... 0.671608 0.037289 0.331732 0.328392 0.962711 0.963514 1.613252 1.637060 0.212371 0.218249
4 ALIPER_Complex_2_Ligand_0_NN_Cu.xyz [H]N([H])C(C([H])([H])[H])(C([H])([H])[H])C([H... -22.124139 6.105218 6.281952 -0.26595 -0.31697 1.95726 1.93270 -0.82367 ... 0.504936 0.063942 0.393897 0.495064 0.936058 0.889532 0.669182 0.683628 0.372106 0.390437

5 rows × 237 columns

In [9]:
# Dataset 2, csd_ligs
name = "csd_ligs"
mols_2 = []
for structure in df_2["name"]:
    mols_2.append(ase.io.read(f"csd_xyz/{structure}"))


properties_2 = {}
for key in df_2.keys():
    units = None
    if key == "name" or key == "c_smiles":
        continue

    else:
        keydict = {
            "target": "structure",
            "values": df_2[f"{key}"].to_list(),
            "units": f"{units}",
        }
    properties_2[f"{key}"] = keydict

chemiscope.write_input(
    path=f"{name}-chemiscope.json.gz",
    frames=mols_2,
    properties=properties_2,
)
In [12]:
widget = chemiscope.show(mols_2, properties_2)
In [13]:
widget
Out[13]:
ChemiscopeWidget(value='{"meta": {"name": " "}, "structures": [{"size": 14, "names": ["C", "C", "C", "C", "H",…
In [26]:
df_oa = pd.read_csv("mc_preds_oa.csv")
df_oa.head()
Out[26]:
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
name c_smiles ddg prediction_train uncertainty_train prediction_loo uncertainty_loo
0 003_lig.xyz CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... 0.732764 0.749866 0.128353 0.752067 0.133444
1 001_lig.xyz CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C... 0.715907 0.667394 0.124937 0.662763 0.128766
2 071_lig.xyz CC(C)(C)[C@H]1COC(CC2=N[C@@H](C(C)(C)C)CO2)=N1 0.290050 0.459142 0.123971 0.470306 0.120008
3 045_lig.xyz CC(C)(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(... 0.749889 0.619766 0.125303 0.607693 0.125047
4 008_lig.xyz CC(C)(C1=N[C@@H](c2ccccc2)[C@@H](c2ccccc2)O1)C... 0.666819 0.390119 0.124535 0.366863 0.103764
In [27]:
# Predictions, oa
name = "oa_preds"
mols_oa = []
for structure in df_oa["name"]:
    mols_oa.append(ase.io.read(f"lit_xyz/{structure}"))


properties_oa = {}
for key in df_oa.keys():
    units = None
    if key == "name" or key == "c_smiles":
        continue

    else:
        keydict = {
            "target": "structure",
            "values": df_oa[f"{key}"].to_list(),
            "units": f"{units}",
        }
    properties_oa[f"{key}"] = keydict

chemiscope.write_input(
    path=f"{name}-chemiscope.json.gz",
    frames=mols_oa,
    properties=properties_oa,
)
In [28]:
df_cp = pd.read_csv("mc_preds_cp.csv")
df_cp.head()
Out[28]:
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
name c_smiles ddg prediction_train uncertainty_train prediction_loo uncertainty_loo
0 045_lig.xyz CC(C)(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(... 2.059538 2.071310 0.326476 2.074841 0.340917
1 047_lig.xyz CC(C)[C@H]1COC(C(C)(C)C2=N[C@@H](C(C)C)CO2)=N1 1.075533 1.230937 0.306142 1.243840 0.310797
2 044_lig.xyz CC(C)(C1=N[C@@H](c2ccccc2)CO1)C1=N[C@@H](c2ccc... 0.821359 0.915003 0.311781 0.927545 0.319021
3 018_lig.xyz CC(C)(C1=NCCO1)C1=N[C@@H](c2ccccc2)CO1 0.240232 0.153533 0.307419 0.145710 0.313557
4 019_lig.xyz CC(C)(C1=NC[C@@H](c2ccccc2)O1)C1=NC[C@@H](c2cc... 0.142885 0.815728 0.307219 0.877433 0.280022
In [29]:
# Predictions, cp
name = "cp_preds"
mols_cp = []
for structure in df_cp["name"]:
    mols_cp.append(ase.io.read(f"lit_xyz/{structure}"))


properties_cp = {}
for key in df_cp.keys():
    units = None
    if key == "name" or key == "c_smiles":
        continue

    else:
        keydict = {
            "target": "structure",
            "values": df_cp[f"{key}"].to_list(),
            "units": f"{units}",
        }
    properties_cp[f"{key}"] = keydict

chemiscope.write_input(
    path=f"{name}-chemiscope.json.gz",
    frames=mols_cp,
    properties=properties_cp,
)
In [32]:
df_cc = pd.read_csv("mc_preds_cc.csv")
df_cc.head()
Out[32]:
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
name c_smiles ddg prediction_train uncertainty_train prediction_loo uncertainty_loo
0 081_lig.xyz c1ccc(C[C@H]2COC(C3=N[C@@H](Cc4ccccc4)CO3)=N2)cc1 0.827255 0.832734 0.191842 0.833625 0.196477
1 072_lig.xyz C[C@H]1COC(C2=N[C@@H](C)CO2)=N1 0.946439 1.011898 0.189420 1.017055 0.193161
2 073_lig.xyz CCCCCC[C@H]1COC(C2=N[C@@H](CCCCCC)CO2)=N1 0.925534 0.789815 0.191780 0.775290 0.193762
3 074_lig.xyz CC(C)[C@H]1COC(C2=N[C@@H](C(C)C)CO2)=N1 1.679456 1.870811 0.199996 1.919626 0.200915
4 075_lig.xyz CC(C)C[C@H]1COC(C2=N[C@@H](CC(C)C)CO2)=N1 0.967820 1.106218 0.190640 1.118827 0.192910
In [33]:
# Predictions, cc
name = "cc_preds"
mols_cc = []
for structure in df_cc["name"]:
    mols_cc.append(ase.io.read(f"lit_xyz/{structure}"))


properties_cc = {}
for key in df_cc.keys():
    units = None
    if key == "name" or key == "c_smiles":
        continue

    else:
        keydict = {
            "target": "structure",
            "values": df_cc[f"{key}"].to_list(),
            "units": f"{units}",
        }
    properties_cc[f"{key}"] = keydict

chemiscope.write_input(
    path=f"{name}-chemiscope.json.gz",
    frames=mols_cc,
    properties=properties_cc,
)
In [37]:
df_da_f = pd.read_csv("mc_preds_da_f.csv")
df_da_f.head()
Out[37]:
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
name c_smiles ddg prediction_train uncertainty_train prediction_loo uncertainty_loo
0 071_lig.xyz CC(C)(C)[C@H]1COC(CC2=N[C@@H](C(C)(C)C)CO2)=N1 0.601015 0.722513 0.337461 0.729974 0.343626
1 045_lig.xyz CC(C)(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(... 1.782003 0.852742 0.336311 0.808930 0.286655
2 061_lig.xyz Clc1cccc(Cl)c1/C=N/[C@@H]1CCCC[C@H]1/N=C/c1c(C... 1.232460 1.161735 0.343226 1.151536 0.351172
3 047_lig.xyz CC(C)[C@H]1COC(C(C)(C)C2=N[C@@H](C(C)C)CO2)=N1 0.587532 0.651846 0.341726 0.659048 0.349261
4 044_lig.xyz CC(C)(C1=N[C@@H](c2ccccc2)CO1)C1=N[C@@H](c2ccc... 0.274510 0.684712 0.338584 0.711307 0.335931
In [38]:
# Predictions, da_f
name = "da_f_preds"
mols_da_f = []
for structure in df_da_f["name"]:
    mols_da_f.append(ase.io.read(f"lit_xyz/{structure}"))


properties_da_f = {}
for key in df_da_f.keys():
    units = None
    if key == "name" or key == "c_smiles":
        continue

    else:
        keydict = {
            "target": "structure",
            "values": df_da_f[f"{key}"].to_list(),
            "units": f"{units}",
        }
    properties_da_f[f"{key}"] = keydict

chemiscope.write_input(
    path=f"{name}-chemiscope.json.gz",
    frames=mols_da_f,
    properties=properties_da_f,
)
In [39]:
widget = chemiscope.show(mols_da_f, properties_da_f)
In [40]:
widget
Out[40]:
ChemiscopeWidget(value='{"meta": {"name": " "}, "structures": [{"size": 45, "names": ["C", "N", "C", "H", "C",…
In [ ]: