In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import ase.io
import chemiscope
from pymatgen.core import Structure, Lattice, Composition

import warnings
warnings.filterwarnings('ignore')
In [2]:
df = pd.read_json('topoclass.json.gz')
In [3]:
df
Out[3]:
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ID subset MAT_TYPE TQC_TYPE structure MP_ID ICSD_ID AtomicOrbitals|HOMO_character AtomicOrbitals|HOMO_element AtomicOrbitals|HOMO_energy ... BondFractions|Ho - La bond frac. BondFractions|Au - Pd bond frac. BondFractions|Dy - Hg bond frac. BondFractions|Er - Yb bond frac. BondFractions|Hg - Tm bond frac. BondFractions|Er - Hg bond frac. BondFractions|Be - Sc bond frac. BondFractions|Hg - Tb bond frac. BondFractions|Be - Lu bond frac. BondFractions|Er - Y bond frac.
0 MAT00026906 M/T TI None {'@module': 'pymatgen.core.structure', '@class... mp-10008 NaN 1 20 -0.141411 ... 0 0 0 0 0 0 0.0 0 0.0 0
1 MAT00020270 M/T TrI None {'@module': 'pymatgen.core.structure', '@class... mp-1001604 NaN 2 16 -0.261676 ... 0 0 0 0 0 0 0.0 0 0.0 0
2 MAT00015322 M/T HSLSM None {'@module': 'pymatgen.core.structure', '@class... mp-1001605 NaN 4 59 -0.155138 ... 0 0 0 0 0 0 0.0 0 0.0 0
3 MAT00025094 M/T TCI None {'@module': 'pymatgen.core.structure', '@class... mp-1001844 NaN 2 7 -0.266297 ... 0 0 0 0 0 0 0.0 0 0.0 0
4 MAT00025479 M/T HSLSM None {'@module': 'pymatgen.core.structure', '@class... mp-1002220 NaN 2 51 -0.185623 ... 0 0 0 0 0 0 0.0 0 0.0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
35603 MAT00038674 M&T TrI LCEBR {'@module': 'pymatgen.core.structure', '@class... mp-696736 28541.0 2 9 -0.415606 ... 0 0 0 0 0 0 0.0 0 0.0 0
35604 MAT00038675 M&T TrI LCEBR {'@module': 'pymatgen.core.structure', '@class... mp-707276 20730.0 2 7 -0.266297 ... 0 0 0 0 0 0 0.0 0 0.0 0
35605 MAT00038676 M&T TrI LCEBR {'@module': 'pymatgen.core.structure', '@class... mp-707334 67509.0 2 8 -0.338381 ... 0 0 0 0 0 0 0.0 0 0.0 0
35606 MAT00038677 M&T TrI LCEBR {'@module': 'pymatgen.core.structure', '@class... mp-707897 59327.0 2 8 -0.338381 ... 0 0 0 0 0 0 0.0 0 0.0 0
35607 MAT00038678 M&T TrI LCEBR {'@module': 'pymatgen.core.structure', '@class... mp-9198 79005.0 2 15 -0.206080 ... 0 0 0 0 0 0 0.0 0 0.0 0

35608 rows × 4717 columns

In [4]:
classes = {"TrI": 0, "NTM": 1, "TI": 2, "TCI": 3, "HSPSM": 4, "HSLSM": 5}
mpe = "MaximumPackingEfficiency|max packing efficiency"
fpv = "ValenceOrbital|frac p valence electrons"
deltaH = "Miedema|Miedema_deltaH_inter"
mat_type = "MAT_TYPE"
In [5]:
X = df[[mpe,fpv]].values
X_scaled = StandardScaler().fit_transform(X)
tsne = TSNE(n_components=2, init='pca', random_state=0)
X_tsne = tsne.fit_transform(X_scaled)
In [6]:
df["tSNE_1"]=X_tsne[:,0]
df["tSNE_2"]=X_tsne[:,1]
In [7]:
actual = (df[mat_type] == "TrI")
predicted = (df["tSNE_1"] > 0)
confusion_matrix = metrics.confusion_matrix(actual, predicted)
#TP = confusion_matrix[0,0]
#FP = confusion_matrix[1,0]
#FN = confusion_matrix[0,1]
#TN = confusion_matrix[1,1]
metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = ["NTM", "TrI"]).plot(values_format='')
Out[7]:
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x10deab7d0>
No description has been provided for this image
In [8]:
print("Accuracy: ", metrics.accuracy_score(actual, predicted))
print("Precision:", metrics.precision_score(actual, predicted))
print("Recall:   ", metrics.recall_score(actual, predicted))
print("F1:       ", metrics.f1_score(actual, predicted))
Accuracy:  0.7575544821388452
Precision: 0.9218279209909608
Recall:    0.6955331958068455
F1:        0.7928494301139772
In [9]:
frames = []
for ind, row in df.iterrows():
    struc = Structure.from_dict(row["structure"])
    atoms = struc.to_ase_atoms()
    atoms.info = {
        "mat_id": ind,
        "mpe": row[mpe],
        "fpv": row[fpv],
        "deltaH": row[deltaH],
        "tSNE_1": -row["tSNE_1"],
        "tSNE_2": row["tSNE_2"],
        "mat_type_2": int(classes[row[mat_type]]>0),
        "mat_type_5": classes[row[mat_type]]
        }
    frames.append(atoms)
In [10]:
settings = chemiscope.quick_settings(x="tSNE_1", y="tSNE_2", color="mat_type_2")
settings["map"]["palette"]="viridis"
chemiscope.show(
    frames=frames,
    properties=chemiscope.extract_properties(frames),
    meta=dict(name="Topological classification"),
    settings=settings,
)
Out[10]:
In [11]:
chemiscope.write_input(
    "topoclass.chemiscope.json",
    frames=frames,
    properties=chemiscope.extract_properties(frames),
    meta=dict(name="Topological classification"),
    settings=settings,
)
In [ ]: