In [15]:

import ase
from ase import io
import chemiscope
import pandas as pd

In [16]:

df_1 = pd.read_csv("mc_lit.csv")
df_1.head()

Out[16]:

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }

	name	tsne1	tsne2	class	c_smiles	nbo1	nbo2	onbo1	onbo2	cnbo1	...	foh2	foh3	foh4	foh5	foh6	bonhr	onhr	or	pb_vol	pb_vol_nh
0	001_lig.xyz	1.552168	1.878821	13-BOX	CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C...	-0.32664	-0.32596	1.90664	1.90604	-0.53385	...	0.492119	0.202600	0.471320	0.507881	0.797400	0.619431	0.533248	0.532310	0.232958	0.246693
1	002_lig.xyz	-2.956746	1.180600	IndaBOX	c1ccc2c(c1)C[C@H]1OC(CC3=N[C@H]4c5ccccc5C[C@H]...	-0.33453	-0.33453	1.90711	1.90711	-0.52714	...	0.499996	0.262131	0.499996	0.500004	0.737869	1.641139	3.322038	3.216236	0.229697	0.239004
2	003_lig.xyz	1.400345	2.463032	13-BOX	CC(C)(C)[C@H]1COC(C2(C3=N[C@@H](C(C)(C)C)CO3)C...	-0.32702	-0.32700	1.90651	1.90650	-0.53782	...	0.499996	0.224905	0.500006	0.500004	0.775095	0.588768	0.503643	0.495704	0.255202	0.269287
3	004_lig.xyz	3.257014	1.687758	13-BOX	CCC(CC)(C1=N[C@@H](C(C)C)CO1)C1=N[C@@H](C(C)C)CO1	-0.32393	-0.32386	1.90309	1.90389	-0.54841	...	0.455473	0.151901	0.542472	0.544527	0.848099	0.758725	0.899208	0.878992	0.247297	0.262024
4	005_lig.xyz	2.307831	2.299776	13-BOX	CC(C1=N[C@@H](C(C)(C)C)CO1)C1=N[C@@H](C(C)(C)C...	-0.32938	-0.32914	1.90717	1.90717	-0.53902	...	0.485473	0.231617	0.495657	0.514527	0.768383	0.601081	0.507730	0.500018	0.262485	0.278259

5 rows × 237 columns

In [17]:

# Dataset 1, lit_ligs
name = "lit_ligs"
mols_1 = []
for structure in df_1["name"]:
    mols_1.append(ase.io.read(f"lit_xyz/{structure}"))


properties_1 = {}
for key in df_1.keys():
    units = None
    if key == "name" or key == "c_smiles":
        continue

    else:
        keydict = {
            "target": "structure",
            "values": df_1[f"{key}"].to_list(),
            "units": f"{units}",
        }
    properties_1[f"{key}"] = keydict

chemiscope.write_input(
    path=f"Chemiscope_{name}.json.gz",
    frames=mols_1,
    properties=properties_1,
)

In [18]:

df_2 = pd.read_csv("mc_csd.csv")
df_2.head()

Out[18]:

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }

	name	tsne1	tsne2	c_smiles	nbo1	nbo2	onbo1	onbo2	cnbo1	cnbo2	...	foh2	foh3	foh4	foh5	foh6	bonhr	onhr	or	pb_vol	pb_vol_nh
0	ADALIB_Complex_23_Ligand_0_ON_Cu.xyz	2.300118	5.822061	[H]N([H])C([H])(C(=O)[O-])C([H])([H])C(=O)[O-]	-0.27722	0.02803	1.97867	1.96293	-0.82745	-0.75102	...	0.428158	0.007569	0.368579	0.571842	0.992431	0.978093	0.688112	0.731148	0.412376	0.412979
1	AFEHUP_Complex_1_Ligand_0_ON_Cu.xyz	4.482296	4.553890	[H]c1c([H])c([H])c(C2=NC([H])(C([H])([H])C([H]...	-0.46124	-0.17937	1.98022	1.90831	-0.72905	-0.57050	...	0.525384	0.088731	0.403101	0.474616	0.911269	0.841914	0.572820	0.559357	0.289816	0.302757
2	AGOROE_Complex_18_Ligand_1_OO_Cu.xyz	4.027641	4.484736	[H]c1c([H])c([H])c(C([H])([O-])C(=O)[O-])c([H]...	-0.26508	-0.29251	1.98011	1.98642	-0.80905	-0.92312	...	0.335946	0.009630	0.269329	0.664054	0.990370	0.977005	0.535290	0.549776	0.318634	0.329300
3	AKEXUJ_Complex_0_Ligand_2_NN_Cu.xyz	4.372118	4.152382	[H]c1nc2c(N=S(=O)([O-])c3c([H])c([H])c(C([H])(...	-0.19325	-0.20986	1.92629	1.88851	-0.43034	-0.93295	...	0.671608	0.037289	0.331732	0.328392	0.962711	0.963514	1.613252	1.637060	0.212371	0.218249
4	ALIPER_Complex_2_Ligand_0_NN_Cu.xyz	3.347076	4.930252	[H]N([H])C(C([H])([H])[H])(C([H])([H])[H])C([H...	-0.26595	-0.31697	1.95726	1.93270	-0.82367	-0.53245	...	0.504936	0.063942	0.393897	0.495064	0.936058	0.889532	0.669182	0.683628	0.372106	0.390437

5 rows × 236 columns

In [19]:

# Dataset 2, csd_ligs
name = "csd_ligs"
mols_2 = []
for structure in df_2["name"]:
    mols_2.append(ase.io.read(f"csd_xyz/{structure}"))


properties_2 = {}
for key in df_2.keys():
    units = None
    if key == "name" or key == "c_smiles":
        continue

    else:
        keydict = {
            "target": "structure",
            "values": df_2[f"{key}"].to_list(),
            "units": f"{units}",
        }
    properties_2[f"{key}"] = keydict

chemiscope.write_input(
    path=f"Chemiscope_{name}.json.gz",
    frames=mols_2,
    properties=properties_2,
)

In [20]:

widget = chemiscope.show(mols_2, properties_2)

In [21]:

widget

Out[21]:

ChemiscopeWidget(value='{"meta": {"name": " "}, "structures": [{"size": 14, "names": ["C", "C", "C", "C", "H",…

In [ ]: