###################################
# P. Sen, April 2021
#
# This script reads structure  and composition
# of a set of compounds given in json format
# as reported in c2db, and uses Dscribe to calculate
# ACSF and sine matrix. All transition metals are replaced by 
# Ra(Z=88). All group 1, 13, 14, 15, 16 and 17 elements are replaced
# by H, B, C, N, O and F respectively. ACSF is calculated
# on on the TM elements. The relevant lines will make clear which
# components of ACSF are calculated.
#####################################

from ase import Atoms
from ase.io import read
from ase.formula import Formula

from numpy import linalg as LA

from pathlib import Path

#```python
import numpy as np
from ase.build import molecule
from dscribe.descriptors import ACSF
from dscribe.descriptors import SineMatrix
np.set_printoptions(threshold=np.inf, formatter={'float': lambda x: "{0:9.5f}".format(x)})

print('id', '\t')
#for i in range(825):
#	print('acsf'+str(i+1)+'\t',end='')
for i in range(12):
	print('smeig'+str(i+1)+'\t',end='')

# For compounds from Mounet et al.
#pathlist = Path("./").glob('*conv.json')
# For compounds from cmr
pathlist = Path("./").glob('*.json')

for item in pathlist:
	filename = str(item)
# The next file is for output json files from gpaw for compounds 
# taken from Mounet et al.
	#name = filename[:-10]
# The next line is for compounds from cmr.
	name = filename[:-5]
	words = name.split("-")
	formula = words[0]
	#print(formula)
	atoms = read(filename)
	Z_numbers = atoms.numbers
	#print('Z_numbers ',Z_numbers,'\n')
	if (len(Z_numbers) > 12):
	        print('# atoms= ', len(Z_numbers))


	samples = [atoms]
	current = samples[0]
	
	sm_desc = SineMatrix(n_atoms_max=12, permutation="sorted_l2", sparse=False, flatten=False) 
	sine_matrix = sm_desc.create(current)
	eigenvalues = LA.eigvalsh(sine_matrix)

# The list positions stores positions (indices in atoms.numbers) of the 'metal'
# atoms in the compound. 
#	positions = []

#	for i in range(len(Z_numbers)):
#	        if ((Z_numbers[i] >= 11) and (Z_numbers[i] <= 13)) \
#or ((Z_numbers[i] >= 19) and (Z_numbers[i] <= 31)) \
#or ((Z_numbers[i] >= 37) and (Z_numbers[i] <= 51)) \
#or (Z_numbers[i] == 55) or (Z_numbers[i] == 56) \
#or ((Z_numbers[i] >= 72) and (Z_numbers[i] <= 83)):


#	                positions.append(i)

#	compound = Formula(formula).count()

# temp_set will contain all the Z values appearing in the compound only once.
#	temp_set = set(Z_numbers)
# Make the unique Z numbers into a list
#	temp_list = list(temp_set)
# Add a non-physical element so that the index remains in range while removing
# elements from temp_list below
#	temp_list = temp_list + [1000]
	
	#print('Temp  list',temp_list)
	

# Replace all the metal atoms by Ra(Z=88).
# Restrict species to species = [1,5,6,7,8,9,15,16,17,33,34,35,52,53,88]

#	Z_changed = Z_numbers
#	for i in range(len(Z_numbers)):
#	        if ((Z_numbers[i] >= 11) and (Z_numbers[i] <= 13)) \
#or ((Z_numbers[i] >= 19) and (Z_numbers[i] <= 31)) \
#or ((Z_numbers[i] >= 37) and (Z_numbers[i] <= 51)) \
#or (Z_numbers[i] == 55) or (Z_numbers[i] == 56) \
#or ((Z_numbers[i] >= 72) and (Z_numbers[i] <= 83)):
#	                Z_changed[i] = 88

#	        elif (Z_numbers[i] == 7) or (Z_numbers[i] == 15) or \
#(Z_numbers[i] == 33):
#	                Z_changed[i] = 7

#	        elif (Z_numbers[i] == 8) or (Z_numbers[i] == 16) or \
#(Z_numbers[i] == 34) or (Z_numbers[i] == 52):
#	                Z_changed[i] = 8
#
#	        elif (Z_numbers[i] == 9) or (Z_numbers[i] == 17) or \
#(Z_numbers[i] == 35) or (Z_numbers[i] == 53):
#	                Z_changed[i] = 9

#	atoms.numbers = Z_changed


	##samples = [atoms]
	#species = ["H", "C", "O", "N", "F", "Al", "P", "S", "Cl",\
#"Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Ga", "Se", "Br",\
#"Sr", "Y", "Zr", "Nb", "Mo", "Ru", "Rh", "Pd", "Ag", "In", "Sb", "Te", "I",\
#"Cs", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", "Pb", "Bi"]

	#species = [1,5,6,7,8,9,15,16,17,33,34,35,52,53,88]
	species = [1,5,6,7,8,9,88]

# Setup descriptors

# Parameters for ACSF
#	g2_params=[[0.01, 2], [0.1,2], [1,2], [0.01,6], [0.1,6], [1,6]] # eta/Rs
#	g3_params=[1] #kappa
#	g4_params=[[0.01, 2, 1], [0.1, 2, 1], [1, 2, 1], [0.01, 2, -1], [0.1, 2, -1], [1, 2, -1]] # eta/zeta/lambda
#	g5_params=[[1, 1, 0.111111]] # eta/zeta/lambda

	#distances = current.get_all_distances()
	#print(distances)
# Create descriptors as numpy arrays or scipy sparse matrices
	#sm_desc = SineMatrix(n_atoms_max=12, permutation="sorted_l2", sparse=False, flatten=False) 
#	acsf_desc = ACSF(species=species, g2_params=g2_params, 
#g3_params=g3_params,\
#g4_params=g4_params, \
#g5_params=g5_params,
#rcut=7.0, periodic=True)


	#current = samples[0]

	#print(soap_desc.get_number_of_features())
	#print(acsf_desc.get_number_of_features())
#	length = acsf_desc.get_number_of_features()

	#print('Current= ',current)
#	acsf = acsf_desc.create(current,positions=positions)
	#coulomb_matrix = cm_desc.create(current)
	#sine_matrix = sm_desc.create(current)

	#eigenvalues = LA.eigvalsh(sine_matrix)

	#print('Length= ', len(acsf))
	#print(acsf)

#	sum = [0.0 for col in range(length)]
#	for i in range(len(acsf)):
#	        tmp = acsf[i]
#	        sum += tmp
#	
#	acsf_av = sum/(len(acsf))
#	print(len(acsf_av))
	#print('%20s' % name)
	with np.printoptions(precision=5):
	        print(name, *eigenvalues, sep= '\t')
#	        print(name, len(positions), *acsf_av, *eigenvalues, sep= '\t')
	#print('# eigenvalues= ', len(eigenvalues))

	#print(sine_matrix)

