In [1]:

import ase
from ase import io
import numpy as np
import chemiscope
import pandas as pd

In [2]:

df_1 = pd.read_csv('I2-BBN-final.csv')
assert df_1.isnull().sum().sum() == 0

df_2 = pd.read_csv('I2-BMe2-final.csv')
assert df_2.isnull().sum().sum() == 0

df_3 = pd.read_csv('I2-BPh2-final.csv')
assert df_3.isnull().sum().sum() == 0

# Now you need to decompress the I2.tar.gz file in order to acess the files inside.
# For instance run tar -zxvf I2.tar.gz
# But you can also do this in python, of course.

In [3]:

# Dataset 1, LPAS-BBN-FINAL
name = "I2-BBN-final"
mols_1 =[]
for structure in df_1['name'] :
    mols_1.append(ase.io.read(f"I2/BBN/final/{structure}.xyz"))

properties_1 = {}
for key in df_1.keys() :
    key_edited = None
    units = None
    if key == "Structure" or key == "SMILES" or key == 'name' :
        continue
    if "d" in key :
        units = "Å"
    if key == "Phi" :
        units = "°"
    if "FEHA" in key :
        units = "kcal/mol"
    if "FEPA" in key :
        units = "kcal/mol"
    if key_edited is None:
        key_edited = key
    if units is not None :
        keydict = { "target" : "structure", "values" : df_1[f"{key}"].to_list(), "units" : f"{units}"}
    else :
        keydict = { "target" : "structure", "values" : df_1[f"{key}"].to_list()}
    properties_1[f"{key_edited}"] = keydict

#print(properties_1.keys())
chemiscope.write_input(
    path=f"{name}_chemiscope.json.gz",
    frames=mols_1,
    properties=properties_1,
)

In [4]:

# Dataset 2, LPAS-BMe2-FINAL
name = "I2-BMe2-final"
mols_2 =[]
for structure in df_2['name'] :
    mols_2.append(ase.io.read(f"I2/BMe2/final/{structure}.xyz"))

properties_2 = {}
for key in df_2.keys() :
    key_edited = None
    units = None
    if key == "Structure" or key == "SMILES" or key == 'name' :
        continue
    if "d" in key :
        units = "Å"
    if key == "Phi" :
        units = "°"
    if "FEHA" in key :
        units = "kcal/mol"
    if "FEPA" in key :
        units = "kcal/mol"
    if key_edited is None:
        key_edited = key
    if units is not None :
        keydict = { "target" : "structure", "values" : df_2[f"{key}"].to_list(), "units" : f"{units}"}
    else :
        keydict = { "target" : "structure", "values" : df_2[f"{key}"].to_list()}
    properties_2[f"{key_edited}"] = keydict

#print(properties_1.keys())
chemiscope.write_input(
    path=f"{name}_chemiscope.json.gz",
    frames=mols_2,
    properties=properties_2,
)

In [5]:

# Dataset 3, LPAS-BPh2-FINAL
name = "I2-BPh2-final"
mols_3 =[]
for structure in df_3['name'] :
    mols_3.append(ase.io.read(f"I2/BPh2/final/{structure}.xyz"))

properties_3 = {}
for key in df_3.keys() :
    key_edited = None
    units = None
    if key == "Structure" or key == "SMILES" or key == 'name' :
        continue
    if "d" in key :
        units = "Å"
    if key == "Phi" :
        units = "°"
    if "FEHA" in key :
        units = "kcal/mol"
    if "FEPA" in key :
        units = "kcal/mol"
    if key_edited is None:
        key_edited = key
    if units is not None :
        keydict = { "target" : "structure", "values" : df_3[f"{key}"].to_list(), "units" : f"{units}"}
    else :
        keydict = { "target" : "structure", "values" : df_3[f"{key}"].to_list()}
    properties_3[f"{key_edited}"] = keydict

#print(properties_1.keys())
chemiscope.write_input(
    path=f"{name}_chemiscope.json.gz",
    frames=mols_3,
    properties=properties_3,
)

In [6]:

widget = chemiscope.show(mols_1, properties_1)

In [7]:

widget

Out[7]:

ChemiscopeWidget(value=None, data='{"meta": {"name": " "}, "structures": [{"size": 62, "names": ["O", "C", "O"…