import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import ase.io
import chemiscope
from pymatgen.core import Structure, Lattice, Composition
import warnings
warnings.filterwarnings('ignore')
df = pd.read_json('topoclass.json.gz')
df
ID | subset | MAT_TYPE | TQC_TYPE | structure | MP_ID | ICSD_ID | AtomicOrbitals|HOMO_character | AtomicOrbitals|HOMO_element | AtomicOrbitals|HOMO_energy | ... | BondFractions|Ho - La bond frac. | BondFractions|Au - Pd bond frac. | BondFractions|Dy - Hg bond frac. | BondFractions|Er - Yb bond frac. | BondFractions|Hg - Tm bond frac. | BondFractions|Er - Hg bond frac. | BondFractions|Be - Sc bond frac. | BondFractions|Hg - Tb bond frac. | BondFractions|Be - Lu bond frac. | BondFractions|Er - Y bond frac. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | MAT00026906 | M/T | TI | None | {'@module': 'pymatgen.core.structure', '@class... | mp-10008 | NaN | 1 | 20 | -0.141411 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 |
1 | MAT00020270 | M/T | TrI | None | {'@module': 'pymatgen.core.structure', '@class... | mp-1001604 | NaN | 2 | 16 | -0.261676 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 |
2 | MAT00015322 | M/T | HSLSM | None | {'@module': 'pymatgen.core.structure', '@class... | mp-1001605 | NaN | 4 | 59 | -0.155138 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 |
3 | MAT00025094 | M/T | TCI | None | {'@module': 'pymatgen.core.structure', '@class... | mp-1001844 | NaN | 2 | 7 | -0.266297 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 |
4 | MAT00025479 | M/T | HSLSM | None | {'@module': 'pymatgen.core.structure', '@class... | mp-1002220 | NaN | 2 | 51 | -0.185623 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
35603 | MAT00038674 | M&T | TrI | LCEBR | {'@module': 'pymatgen.core.structure', '@class... | mp-696736 | 28541.0 | 2 | 9 | -0.415606 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 |
35604 | MAT00038675 | M&T | TrI | LCEBR | {'@module': 'pymatgen.core.structure', '@class... | mp-707276 | 20730.0 | 2 | 7 | -0.266297 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 |
35605 | MAT00038676 | M&T | TrI | LCEBR | {'@module': 'pymatgen.core.structure', '@class... | mp-707334 | 67509.0 | 2 | 8 | -0.338381 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 |
35606 | MAT00038677 | M&T | TrI | LCEBR | {'@module': 'pymatgen.core.structure', '@class... | mp-707897 | 59327.0 | 2 | 8 | -0.338381 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 |
35607 | MAT00038678 | M&T | TrI | LCEBR | {'@module': 'pymatgen.core.structure', '@class... | mp-9198 | 79005.0 | 2 | 15 | -0.206080 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 |
35608 rows × 4717 columns
classes = {"TrI": 0, "NTM": 1, "TI": 2, "TCI": 3, "HSPSM": 4, "HSLSM": 5}
mpe = "MaximumPackingEfficiency|max packing efficiency"
fpv = "ValenceOrbital|frac p valence electrons"
deltaH = "Miedema|Miedema_deltaH_inter"
mat_type = "MAT_TYPE"
X = df[[mpe,fpv]].values
X_scaled = StandardScaler().fit_transform(X)
tsne = TSNE(n_components=2, init='pca', random_state=0)
X_tsne = tsne.fit_transform(X_scaled)
df["tSNE_1"]=X_tsne[:,0]
df["tSNE_2"]=X_tsne[:,1]
actual = (df[mat_type] == "TrI")
predicted = (df["tSNE_1"] > 0)
confusion_matrix = metrics.confusion_matrix(actual, predicted)
#TP = confusion_matrix[0,0]
#FP = confusion_matrix[1,0]
#FN = confusion_matrix[0,1]
#TN = confusion_matrix[1,1]
metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = ["NTM", "TrI"]).plot(values_format='')
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x10deab7d0>
print("Accuracy: ", metrics.accuracy_score(actual, predicted))
print("Precision:", metrics.precision_score(actual, predicted))
print("Recall: ", metrics.recall_score(actual, predicted))
print("F1: ", metrics.f1_score(actual, predicted))
Accuracy: 0.7575544821388452 Precision: 0.9218279209909608 Recall: 0.6955331958068455 F1: 0.7928494301139772
frames = []
for ind, row in df.iterrows():
struc = Structure.from_dict(row["structure"])
atoms = struc.to_ase_atoms()
atoms.info = {
"mat_id": ind,
"mpe": row[mpe],
"fpv": row[fpv],
"deltaH": row[deltaH],
"tSNE_1": -row["tSNE_1"],
"tSNE_2": row["tSNE_2"],
"mat_type_2": int(classes[row[mat_type]]>0),
"mat_type_5": classes[row[mat_type]]
}
frames.append(atoms)
settings = chemiscope.quick_settings(x="tSNE_1", y="tSNE_2", color="mat_type_2")
settings["map"]["palette"]="viridis"
chemiscope.show(
frames=frames,
properties=chemiscope.extract_properties(frames),
meta=dict(name="Topological classification"),
settings=settings,
)
chemiscope.write_input(
"topoclass.chemiscope.json",
frames=frames,
properties=chemiscope.extract_properties(frames),
meta=dict(name="Topological classification"),
settings=settings,
)