###############################################################################
# Author : Arijit Dutta
# Date : 07-03-2020 
#
# Extended & modified by Prasenjit Sen
# Date : 1 May, 2020
#
# This script reads json files for a set of materials, and
# calculates features that are configuration average atomic properties,
# stoichiometric features etc.
###############################################################################

import numpy as np
import matplotlib.pyplot as plt # For plotting things
import ase.db # ASE library: https://wiki.fysik.dtu.dk/ase/about.html
from ase import Atoms
from ase.formula import Formula

from math import sqrt, log
import statistics

# We will create a list of the formulae of the compounds found. We will
# eventually print it to a file. The file is opened here.

compounds = open("MAE-compounds",'w')
compounds_list = []

def chkList(lst):
    res = True
    if len(lst) < 0 :
        res = True
    res = all(ele == lst[0] for ele in lst)
    return(res)

def Group(z):
    if (z == 1) or (z == 3) or (z == 11) or (z == 55):
        grp = int(1)
    if (z == 4) or (z == 12) or (z == 56):
        grp = int(2)
    if (z == 2) or (z == 36) or (z == 54):
        grp = int(18)
    if (z >= 5) and (z <= 10):
        grp = int(z+8)
    if (z >= 13) and (z <= 18):
        grp = int(z)
    if ((z > 18) and (z <= 35)):
        grp = int(z%18)
    if (z >= 37) and (z <= 53):
        grp = int(z%18)
    if (z >= 57) and (z <= 71):
        grp = int(3)
    if (z >= 72) and (z <= 86):
        grp = int(z%18 + 4)
    return(grp)

def Period(z):
    if (z <= 2): 
        period = int(1)
    if (z >= 3) and (z <= 10):
        period = int(2)
    if (z >= 11) and (z <= 18):
        period = int(3)
    if (z >= 19) and (z <= 36):
        period = int(4)
    if (z >= 37) and (z <= 54):
        period = int(5)
    if (z >= 55) and (z <= 86):
        period = int(6)

    return(period)

def Valence(group):
    if(group >= 1) and (group <= 12):
        valence = int(group)

    if (group >=13) and (group <=17):
        valence = int(group%10)

    if (group == 18):
        valence = int(0)

    return(valence)


#Storing Pauling electronegativity values in a dictionary
electronegativity = {'H' : 2.20, 'Li' : 0.98, 'Be': 1.57, 'B': 2.04,'C': 2.55, 'N': 3.04, 'O': 3.44, 'F': 3.98, \
'Na': 0.93, 'Mg': 1.31, 'Al': 1.61,'Si':1.90, 'P':2.19, 'S':2.58, 'Cl': 3.16, \
'K':0.82, 'Ca':1.00, 'Sc':1.36, 'Ti': 1.54, 'V':1.63, 'Cr':1.66, 'Mn':1.55, 'Fe':1.83, 'Co':1.88, 'Ni':1.91, \
          'Cu':1.90, 'Zn':1.65, 'Ga': 1.81, 'Ge':2.01, 'As':2.18, 'Se':2.55, 'Br':2.96, 'Kr':3.00, \
'Rb':0.82, 'Sr':0.95, 'Y':1.22, 'Zr':1.33, 'Nb':1.6, 'Mo':2.16, 'Tc':1.9, 'Ru':2.2, 'Rh':2.28, 'Pd':2.20, \
          'Ag':1.93, 'Cd':1.69, 'In':1.78, 'Sn':1.96, 'Sb':2.05, 'Te':2.1, 'I':2.66, 'Xe':2.60, \
'Cs':0.79, 'Ba':0.89, 'La':1.1, \
                      'Ce':1.12, 'Pr':1.13, 'Nd':1.14, 'Pm': 1.13, 'Sm':1.17, 'Eu':1.2, 'Gd':1.2, 'Tb':1.1, 'Dy':1.22, \
                      'Ho':1.23, 'Er':1.24, 'Tm':1.25, 'Yb':1.1, 'Lu':1.27, \
'Hf':1.3, 'Ta':1.5, 'W':2.36, 'Re':1.9, 'Os':2.2, 'Ir':2.2, 'Pt':2.28, 'Au':2.54, 'Hg':2.0, \
'Tl':1.62, 'Pb':1.87, 'Bi':2.02, 'Po':2.0, 'At':2.2, 'Rn':2.2}

#Storing d-electron counts for metal atoms in a dictionary. Set to zero for rest of the elements
d_electron_cnt = {'H' : 0, 'Li' : 0, 'Be': 0, 'B': 0,'C': 0, 'N': 0, 'O': 0, 'F': 0, \
'Na': 0, 'Mg': 0, 'Al': 0,'Si':0, 'P':0, 'S':0, 'Cl': 0, \
'K':0, 'Ca':0, 'Sc':1, 'Ti': 2, 'V':3, 'Cr':5, 'Mn':5, 'Fe':6, 'Co':7, 'Ni':8, \
          'Cu':10, 'Zn':10, 'Ga': 0, 'Ge':0, 'As':0, 'Se':0, 'Br':0, 'Kr':0, \
'Rb':0, 'Sr':0, 'Y':1, 'Zr':2, 'Nb':4, 'Mo':5, 'Tc':5, 'Ru':7, 'Rh':8, 'Pd':10, \
          'Ag':10, 'Cd':10, 'In':0, 'Sn':0, 'Sb':0, 'Te':0, 'I':0, 'Xe':0, \
'Cs':0, 'Ba':0, 'La':1, \
                      'Ce':0, 'Pr':0, 'Nd':0, 'Pm': 0, 'Sm':0, 'Eu':0, 'Gd':0, 'Tb':0, 'Dy':0, \
                      'Ho':0, 'Er':0, 'Tm':0, 'Yb':0, 'Lu':0, \
'Hf':2, 'Ta':3, 'W':4, 'Re':5, 'Os':6, 'Ir':7, 'Pt':9, 'Au':10, 'Hg':10, \
'Tl':10, 'Pb':10, 'Bi':10, 'Po':10, 'At':0, 'Rn':0}


# Connect to database
db = ase.db.connect('c2db.db')

############################################################################
# Select rows from the database based on certain critera, e.g., to select
# stable magnetic compunds we pass 'hform<0, is_magnetic=True' in the first
# argument. The rows can be sorted with respect to some key which is passed
# as the second argument, e.g. 'gap'.
#
# List of keys: Visit https://cmr.fysik.dtu.dk/c2db/c2db.html for more details
# ---------------------------------------------------------------------------- 
# COD_id
# D_cbm
# D_cbm_nosoc
# D_vbm
# D_vbm_nosoc
# ICSD_id
# age	Time since creation
# alphax
# alphay
# alphaz
# bse_binding
# c_11
# c_12
# c_22
# calculator	ASE-calculator name
# cbm
# cbm_gllbsc
# cbm_gllbsc_nosoc
# cbm_gw
# cbm_hse
# cbm_hse_nosoc
# cbm_nosoc
# cell_area
# charge	Charge
# class
# dE_NM
# deltaxc_gllbsc_nosoc
# dipz
# dir_gap
# dir_gap_gllbsc
# dir_gap_gllbsc_nosoc
# dir_gap_gw
# dir_gap_hse
# dir_gap_hse_nosoc
# dir_gap_nosoc
# dosef_nosoc
# dosef_soc
# dynamic_stability_level
# efermi_gw
# efermi_hse
# efermi_hse_nosoc
# ehull
# emass1
# emass1_nosoc
# emass2
# emass2_nosoc
# energy	Total energy	eV
# evac
# evac_gllbsc_nosoc
# evacdiff
# evacmean
# excitonmass1
# excitonmass2
# fmaxMaximum 	force	eV/Ang
# folder
# formula	Chemical formula
# gap
# gap_gllbsc
# gap_gllbsc_nosoc
# gap_gw
# gap_hse
# gap_hse_nosoc
# gap_nosoc
# has_invsymm
# hform
# hmass1
# hmass1_nosoc
# hmass2
# hmass2_nosoc
# id	Uniqe row ID
# is_dir_gap
# is_dir_gap_nosoc
# is_magnetic
# is_metallic
# is_metallic_nosoc
# dE_zx		Magnetic anisotropy energy between x and z axis 	meV/unit cell
# dE_zy		Magnetic anisotropy energy between y and z axis 	meV/unit cell
# magmom	Magnetic moment	au
# magstate
# mass	Mass	au
# minhessianeig
# monolayer_doi
# nkinds
# plasmafrequency_x
# plasmafrequency_y
# prototype
# smax	Maximum stress	eV/Ang^3
# smaxinplane
# spacegroup
# speed_of_sound_x
# speed_of_sound_y
# spin_orientation
# stoichiometry
# thermodynamic_stability_level
# uid
# unique_id	Random (unique) ID
# user	Username
# vbm
# vbm_gllbsc
# vbm_gllbsc_nosoc
# vbm_gw
# vbm_hse
# vbm_hse_nosoc
# vbm_nosoc
# volume	Volume of unit-cell	Ang^3
# warning
# work_function
# work_function_nosoc
# ------------------------------------------------------------------------- 

#rows = db.select('hform<0, is_magnetic=True,  magstate=FM', sort='gap')
rows = db.select('hform<0, is_magnetic=True,  magstate=AFM',  has_asr_magnetic_anisotropy='True', sort='gap')

compound = {}

tot = 0	# Counts the total no. of rows in the database slice
cnt = 0 # Counts the rows for which the relevant queries are available

#This block is for FM's
#print("%s  \t %s \t %s \t %s \t %s \t %s \t %s \t %s\
#\t %s \t %s \t %s \t %s \t %s \t %s \t %s \t %s \t %s \
#\t %s \t %s \t %s\
#\t %s \t %s \t %s \t %s"% \
#('id', 'mag_mom', 'd_count', 'mean_Z', 'del_Z', 'mode_Z', 'L2_norm', 'L3_norm',\
#'mean_grp', 'del_grp', 'mode_grp', 'mean_period', 'del_period', 'mode_period', 'mean_valelec', 'del_valelec', 'mode_valelec',\
#'mean_electroneg', 'del_electroneg', 'mode_electroneg',\
#'stoichio_entropy', 'E_zx', 'E_zy', 'cell_area'),sep='\t')

#This block is for AFM's
print("%s  \t %s \t %s \t %s \t %s \t %s \t %s\
\t %s \t %s \t %s \t %s \t %s \t %s \t %s \t %s \t %s \
\t %s \t %s \t %s\
\t %s \t %s \t %s \t %s"% \
('id', 'd_count', 'mean_Z', 'del_Z', 'mode_Z', 'L2_norm', 'L3_norm',\
'mean_grp', 'del_grp', 'mode_grp', 'mean_period', 'del_period', 'mode_period', 'mean_valelec', 'del_valelec', 'mode_valelec',\
'mean_electroneg', 'del_electroneg', 'mode_electroneg',\
'stoichio_entropy', 'E_zx', 'E_zy', 'cell_area'),sep='\t')

for row in rows:
	# Make queries based on the key values and print them.
	# If the data for some of the keys is not available then
	# those rows are skipped

	try:
		compound = Formula(row.formula).count()
		#print('Length ',len(compound))
		#print('Compound= ',row.formula)
		#if (compound.__contains__("V")): 
		#	metal_count = compound['V']
# Features are extracted/constructed here
		count = []
		Z_list = []
		group_list = []
		period_list = []
		valelec_list = []
		electroneg_list = []
		x = []
		Lp_norm = []
# Number of d electrons
		d_elect = []

		for element in compound.keys():
			count.append(compound[element])
			a = Atoms(element,[(0,0,0)])
			Z = int(a.get_atomic_numbers())
			Z_list.append(Z)
			grp = Group(Z)
			period = Period(Z)
# We will only count number of d electrons on the TM atoms
			valence = Valence(grp)
			group_list.append(grp)
			period_list.append(period)
			valelec_list.append(valence)
			electroneg_list.append(electronegativity[element])
			d_elect.append(d_electron_cnt[element])
		total = sum(count)

		sum_x1 =0.0
		sum_x2 =0.0
		sum_x3 =0.0

		stoichio_entropy = 0.0

		
# Features related to Z
		mean_Z = 0.0
		del_Z = 0.0
#Features related to group and period numbers
		mean_grp = 0.0
		del_grp = 0.0
		mean_period = 0.0
		del_period = 0.0
# Feature for # of valence electron
		mean_valelec = 0.0
		del_valelec = 0.0
#Feature related to electronegativity
		mean_electroneg = 0.0
		del_electroneg = 0.0

		for i in range(len(compound)):
			x.append(count[i]/total)

			sum_x1 += x[i]
			sum_x2 += pow(x[i],2)
			sum_x3 += pow(x[i],3)

			stoichio_entropy += -x[i]*log(x[i])

			mean_Z += Z_list[i]*x[i]
			mean_grp += group_list[i]*x[i]
			mean_period += period_list[i]*x[i]
			mean_valelec += valelec_list[i]*x[i]
			mean_electroneg += electroneg_list[i]*x[i]
			#print('Mean electronegativity ',mean_electroneg)
		Lp_norm = [sum_x1, sqrt(sum_x2), np.cbrt(sum_x3)]
		print(x, sqrt(sum_x2))
# d electron count
		d_count = max(d_elect)

# Create the v_chem vector here
		v_chem = [0.0 for col in range(85)]

		for i in range(len(compound)):
			del_Z += abs(Z_list[i] - mean_Z)*x[i]
			del_grp += abs(group_list[i] - mean_grp)*x[i]
			del_grp += abs(group_list[i] - mean_grp)*x[i]
			del_period += abs(period_list[i] - mean_period)*x[i]
			del_valelec += abs(valelec_list[i] - mean_valelec)*x[i]
			del_electroneg += abs(electroneg_list[i] - mean_electroneg)*x[i]
			v_chem[Z_list[i]-1] = x[i]

		#print(v_chem)

		res = chkList(lst=x)
		if res:
			#mode_Z = statistics.mean(Z_list) #This doesn't work. WHY?
			mode_Z = mean_Z
			mode_grp = mean_grp
			mode_period = mean_period
			mode_valelec = mean_valelec
			mode_electroneg = mean_electroneg
		else:
			maximum = 0.0
			for i in range(len(x)):
				if (x[i] > maximum):
					i_max = i
			mode_Z = Z_list[i_max]
			mode_grp = group_list[i_max]
			mode_period = period_list[i_max]
			mode_valelec = valelec_list[i_max]
			mode_electroneg = electroneg_list[i_max]
	
		
		#print('x= ',x)
		#print('Lp norm ',Lp_norm)
		#print('S= ', stoichio_entropy)
		#print('Del Z= ',del_Z)
		#print('Mode group= ',mode_grp)
		#print('Mean group= ',mean_grp)
		#print('Del valelec= ',del_valelec)
			


# Use this block for FM's
#		print("%s \t %7.3f \t %6d \t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \
#\t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \
#\t %7.3f \t %7.3f \t %7.3f\
#\t %7.3f \t %10.5f \t %10.5f \t %10.5f"% \
#(row.uid, row.magmom, d_count, mean_Z, del_Z, mode_Z, Lp_norm[1], Lp_norm[2],\
#mean_grp, del_grp, mode_grp, mean_period, del_period, mode_period, mean_valelec, del_valelec, mode_valelec,\
#mean_electroneg, del_electroneg, mode_electroneg,\
#stoichio_entropy, row.dE_zx, row.dE_zy, row.cell_area))


# Use this block for AFM's
		print("%s \t %6d \t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \
\t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \t %7.3f \
\t %7.3f \t %7.3f \t %7.3f\
\t %7.3f \t %10.5f \t %10.5f \t %10.5f"% \
(row.uid, d_count, mean_Z, del_Z, mode_Z, Lp_norm[1], Lp_norm[2],\
mean_grp, del_grp, mode_grp, mean_period, del_period, mode_period, mean_valelec, del_valelec, mode_valelec,\
mean_electroneg, del_electroneg, mode_electroneg,\
stoichio_entropy, row.dE_zx, row.dE_zy, row.cell_area))

		print(*v_chem,sep='\t')

#		with open('temp-file4.csv', 'w', newline='') as csvfile:
#			MAE = csv.writer(csvfile, delimiter=' ',quotechar='"', quoting=csv.QUOTE_MINIMAL)
#			MAE.writerow([row.uid,'\t',row.magmom,'\t',Z[0],'\t',row.dE_zx,'\t',row.dE_zy,'\t',row.cell_area])

		compounds_list.append(row.formula)
		cnt += 1
	except:
		pass
	tot += 1

print('\nNo. of stable magnetic compounds:\t',tot)
print('\nNo. of compounds with magnetic anisotropy data:\t',cnt)
#print('\n'.join(' '.join(str(cell) for cell in row) for row in v_chem))
#print(*v_chem,sep='\t')

for item in compounds_list:
	compounds.write("%s\n" % item)
###############################################################################
