Extract statistically significant information about effective interactions between Mo and Re atoms
Notebook setup
%matplotlib inline
import os
import sys
import copy
import pickle
from collections import Counter
from itertools import product, combinations
import numpy as np
import matplotlib.pyplot as plt
import h5py
from scipy.stats import norm, chi2
from scipy.optimize import fmin
from subprocess import Popen, PIPE
import multiprocessing as mp
%load_ext autoreload
%autoreload 2
sys.path.append('../src/utils')
from utils import *
path_raw = '../data/raw'
path_tmp = "../data/tmp"
Extract Mo and Re positions for each image
# select relevant atomic coordinates for further analysis
# different files store different atoms in centroids_1 and centroids_2 -> sort this out
names = ['re05', 're55', 're78', 're95'] # hdf5 file name roots
# Separate atoms to Mo and Re based on image-specific intensity threshold
intensity_thresh = [0.80, 0.45, 0.45, 0.45] # intensity thresholds for each image
atom_pos, atom_ids, intensities = get_atom_positions(path_raw, names, intensity_thresh)
show_images(names, atom_pos, atom_ids, intensities)
# Save atom positions for further spatial analys
for i in range(len(names)):
np.save(os.path.join(path_tmp, names[i] + "_pos.npy"), atom_pos[i])
np.save(os.path.join(path_tmp, names[i] + "_ids.npy"), atom_ids[i])
distance_thresh = [45, 32, 30, 26] # distance thresholds for each image
target_hist, num_samples = collect_target_histograms(names, atom_pos, atom_ids, distance_thresh)
cfg_types = sorted(set([c for h in target_hist.values() for c in h]))
# symmetry number for each configuration
conf_counts, conf_dict, trans_hist = config_symmetry_numbers(cfg_types)
#trans_hist = make_transform_matrix(cfg_types, conf_dict)
# probability of each symmetrized configuration for a given Re concentration
x_re = rhenium_concentration(target_hist)
# random - null hypothesis
prob_conf = random_config_proba(names, conf_counts, x_re)
# applies uninformative multinomial (Jeffrey's) prior to deal with low-sampling-caused zero counts
target_hist_adjusted = apply_prior(names, conf_counts, cfg_types, target_hist, prob_conf)
# get estimated probability distribution of the target for model optimization
gsv = get_probability_histogram(names, cfg_types, target_hist_adjusted)
#gsv = get_probability_histogram(names, cfg_types, target_hist)
Comparison of target (adjusted) and random (null) histograms of local configurations
plot_histograms(names, num_samples, target_hist, target_hist_adjusted, prob_conf, cfg_types, x_re)