
import numpy as np
import matplotlib.pyplot as plt
import math
from scipy import linalg
import sys
import os


def getnumberofbasisfunctions(file):
    
    """
    This function extracts the number of basis functions from
    a gaussian output file given the path of the file
    """
    
    N_basis_functions = 0
    with open(file, 'r') as file:
        line = file.readline()
        while N_basis_functions == 0:
            if 'basis functions,' in line and N_basis_functions ==0:
                N_basis_functions = int(line.split()[0])
            line = file.readline()
            
    return N_basis_functions

def getnumberofbasisfunctionperfraction(file, N_atoms_per_fraction, N_basis_functions):

    """
    This function extracts the number of basis functions for each fraction of the molecule 
    and outputs it as is list.
    """
    
    N_basis_functions_per_fraction = []
    
    N_basis_fraction = 0
    
    with open(file, 'r') as file:
        line = file.readline()
        while N_basis_fraction == 0:
            if 'Gross orbital populations:' in line:
                line = file.readline()
                j=0
                N_atom_fraction = N_atoms_per_fraction[j]
                for i in range(N_basis_functions):
                    data = file.readline().split()
                    try:
                        data[1] = int(data[1])
                    except ValueError:
                        pass
                    if data[1] != N_atom_fraction+1:
                        N_basis_fraction += 1
                    else:
                        N_basis_functions_per_fraction.append(N_basis_fraction)
                        N_basis_fraction = 1
                        j += 1
                        N_atom_fraction += N_atoms_per_fraction[j]
                N_basis_functions_per_fraction.append(N_basis_fraction)
            line = file.readline()
    
    return N_basis_functions_per_fraction



def getfockmatrix(file):
    
    """
    This function extracts the Fock matrix in atomic basis functions 
    from a Gaussian output file (file). When multiple Fock matrices 
    are printed only the last one will be returned.
    To print the Fock matrix in atomic base, the keyword iop(5/33=3)
    must have been included in the Gaussian input file. 
    """
    
    hartree_to_eV = 27.212 #meV/hartree
    
    N_basis_functions = getnumberofbasisfunctions(file)
    
    F_mat = None
    
    with open(file, 'r') as file:
        line = file.readline()
        while line:
            if 'Fock matrix (alpha)' in line:
                F_mat = np.zeros((N_basis_functions, N_basis_functions), np.float64)
                #loop over number of rows 
                for i in range(int(math.ceil(N_basis_functions/5.0))):
                    line = file.readline() #header row will not extract data from this
                    for j in range(i*5, N_basis_functions):
                        data = file.readline().split()
                        for k in range(len(data)-1):
                            F_mat[j, i*5+k] = np.float(data[k+1].replace('D', 'E'))
                            F_mat[i*5+k, j] = F_mat[j, i*5+k]
            line = file.readline()
    
    F_mat *= hartree_to_eV        
    
    
    return F_mat


# In[5]:


def getoverlapmatrix(file):
    
    """
    This function axtracts the overlap matrix (S) in atomic orbital basis
    from a gaussian output file (file). When multiple overlap matrices exists
    only the last one will be returned.
    To print the overlap matrix, the keyword: iop(3/33=4) and pop=full
    must have been included in the gaussian input file
    """
    
    N_basis_functions = getnumberofbasisfunctions(file)
    
    S_mat = None
    
    with open(file, 'r') as file:
        line = file.readline()
        while line:
            if '*** Overlap ***' in line:
                S_mat = np.zeros((N_basis_functions, N_basis_functions), np.float64)
                for i in range(int(math.ceil(N_basis_functions/5.0))):
                    line = file.readline()
                    for j in range(i*5, N_basis_functions):
                        data = file.readline().split()
                        for k in range(len(data)-1):
                            S_mat[j, i*5+k] = np.float(data[k+1].replace('D','E'))
                            S_mat[i*5+k, j] = S_mat[j, i*5+k]
            line = file.readline()
            
    return S_mat


# In[6]:


def lowdin_transformed_S(F_mat, S_mat):
    
    """
    This function performs a Lowdin symmetric transformation of the Fock matrix
    expressed in the atomic basis.
    """

    
    S_inv_sqrt = linalg.inv(linalg.sqrtm(S_mat))
    
    F_lowdin = np.dot(S_inv_sqrt, np.dot(F_mat, S_inv_sqrt))

    
    
    return F_lowdin



def block_diagonalization_with_bridge_new(fock_matrix, DCA_basis):

    
    """
    This function block diagonalizes the D-B1, B1-C-B2 and B2-A sections of the fock matrix,to form the diabatic states. 
    
    Output: C_full: matrix containing the coefficient for transforming the 
    AOs to the diabatic (non-orthornormal) basis as the rows of the matrix
    """
    
    
    DB1_basis = DCA_basis[0]+DCA_basis[1]
    B1CB2_basis = DCA_basis[1]+DCA_basis[2]+DCA_basis[3]
    B2A_basis = DCA_basis[3]+DCA_basis[4]
    
    DB1_fock = np.zeros([DB1_basis, DB1_basis])
    B1CB2_fock = np.zeros([B1CB2_basis, B1CB2_basis])
    B2A_fock = np.zeros([B2A_basis, B2A_basis])
    
    DB1_fock[:,:] = fock_matrix[:DB1_basis, :DB1_basis]
    B1CB2_fock[:,:] = fock_matrix[DCA_basis[0]:(B1CB2_basis+DCA_basis[0]), DCA_basis[0]:(B1CB2_basis+DCA_basis[0])]
    B2A_fock[:,:] = fock_matrix[(DCA_basis[0]+DCA_basis[1]+DCA_basis[2]):, (DCA_basis[0]+DCA_basis[1]+DCA_basis[2]):]
    
    eigenvalues_DB1, C_DB1 = np.linalg.eigh(DB1_fock)
    eigenvalues_B1CB2, C_B1CB2 = np.linalg.eigh(B1CB2_fock)
    eigenvalues_B2A, C_B2A = np.linalg.eigh(B2A_fock)
    
    C_full = np.zeros([sum(DCA_basis), DB1_basis+B1CB2_basis+B2A_basis])
    C_full[:DB1_basis, :DB1_basis] = C_DB1
    C_full[DCA_basis[0]:(DCA_basis[0]+B1CB2_basis), DB1_basis:(DB1_basis+B1CB2_basis)] = C_B1CB2
    C_full[(DCA_basis[0]+DCA_basis[1]+DCA_basis[2]):, (DB1_basis+B1CB2_basis):] = C_B2A
    f_diabatic = np.dot(C_full.T, np.dot(fock_matrix, C_full))

    
    return C_full, f_diabatic
    
    


def getnumberofoccmos(file):

    """
    This function extract the number of electrons in the given molecule, to get
    the number of occupied Mos assuming a ground state configuration
    """

    N_electrons = 0
    with open(file, 'r') as file:
        line = file.readline()
        while N_electrons == 0:
            if 'alpha electrons' in line and N_electrons == 0:
                N_alpha_electrons = int(line.split()[0])
                N_beta_electrons = int(line.split()[3])
                N_electrons = N_alpha_electrons + N_beta_electrons
            line = file.readline()

    N_occ_mos = math.ceil(N_electrons/2)

    return N_occ_mos


def donor_acceptor_orthorgonalize(C_full, f_diabatic, orbital_indexes):

    """
    This function extracts the orbitals indicated in "orbital_indexes" from the 
    diabatic matrix that is not and orthorgonal and forms the 2 by 2 fock matrix 
    and overlap matrix needed to do the final Lowdin transformation. 
    """
    
    initial_orbital = orbital_indexes[0]
    final_orbital = orbital_indexes[1]
    
    S_system = np.identity(2)
    
    S_system[0,1] = np.dot((C_full[:,initial_orbital]).T, C_full[:,final_orbital])
    S_system[1,0] = np.dot((C_full[:,final_orbital]).T, C_full[:,initial_orbital])
    
    
    S_system_inv_sqrt = linalg.inv(linalg.sqrtm(S_system))
    
    
    f_system = np.zeros([2,2])
    f_system[0,0] = f_diabatic[initial_orbital,initial_orbital]
    f_system[1,1] = f_diabatic[final_orbital, final_orbital]

    
    f_system[0,1] = f_diabatic[initial_orbital, final_orbital]
    f_system[1,0] = f_diabatic[final_orbital, initial_orbital]
    
    
    f_system_orthorgonal = np.dot(S_system_inv_sqrt, np.dot(f_system, S_system_inv_sqrt))
    
    return f_system_orthorgonal


# In[37]:


def getelectronsdonoracceptor(file, N_atoms_per_fragment):

    """
    This function calculates the number of electrons in each fragment based on  
    the atomic numbers of the atoms in the fragment. 
    """
    electrons_per_fragment = []
    with open(file, 'r') as file:
        line = file.readline()
        while len(electrons_per_fragment) == 0:
            if 'Standard orientation:' in line:
                k=0
                for i in range(4):
                    line = file.readline()
                for j in range(len(N_atoms_per_fragment)):
                    N_electrons_fragment = 0
                    for l in range(N_atoms_per_fragment[k]):
                        line = file.readline()
                        N_electrons_fragment += int(line.split()[1])
                    electrons_per_fragment.append(N_electrons_fragment)
                    k+=1
            line = file.readline()
            
    return electrons_per_fragment

if __name__ == '__main__':
      

    N_atoms_per_fragment = [22, 10, 38, 10, 30]

    file = sys.argv[1]
    
    N_basis_functions = getnumberofbasisfunctions(file)

    N_basis_per_fragment = getnumberofbasisfunctionperfraction(file, N_atoms_per_fragment, N_basis_functions)

    N_occ_mos = getnumberofoccmos(file)
    
    fock_mat = getfockmatrix(file)

    S_mat = getoverlapmatrix(file)

    f_trans = lowdin_transformed_S(fock_mat, S_mat)

    fock_mat = f_trans
    
    C_full, f_diabatic = block_diagonalization_with_bridge_new(fock_mat, N_basis_per_fragment)

    N_eigenvalues = f_diabatic.shape[0]

    diabatic_eigenvalues = [f_diabatic[i,i] for i in range(N_eigenvalues)]

    electrons_per_fragment = getelectronsdonoracceptor(file, N_atoms_per_fragment)

    """
    This section determines the number of electrons needed to be put in each charge  site(donor, chromophore and acceptor)
    The function, "electron_per_fragment" calculates the number of electrons for each fragment:
    (donor (without bridge), bridge1, chromophore(withoutbridge), bridge2, acceptor(without bridge))
    based on the atomic number for each atom in the bridge. The electrons for a charge site is then the sum of the fragment 
    electrons included in that with one electron withdrawn for each bond that is broken to form the fragmet. 
    """
    N_donor_electrons = electrons_per_fragment[0]+electrons_per_fragment[1]-1
    N_chromophore_electrons = electrons_per_fragment[1]+electrons_per_fragment[2]+electrons_per_fragment[3]-2
    N_acceptor_electrons = electrons_per_fragment[3]+electrons_per_fragment[4]-1
    
    homo_indexes = [int(N_donor_electrons/2)-1, N_basis_per_fragment[0]+N_basis_per_fragment[1]+int(N_chromophore_electrons/2)-1, N_basis_per_fragment[0]+2*N_basis_per_fragment[1]+N_basis_per_fragment[2]+N_basis_per_fragment[3]+int(N_acceptor_electrons/2)-1]
    lumo_indexes = [int(N_donor_electrons/2), N_basis_per_fragment[0]+N_basis_per_fragment[1]+int(N_chromophore_electrons/2), N_basis_per_fragment[0]+2*N_basis_per_fragment[1]+N_basis_per_fragment[2]+N_basis_per_fragment[3]+int(N_acceptor_electrons/2)]


    homo_enegies = [diabatic_eigenvalues[homo_indexes[i]] for i in range(len(homo_indexes))]
    lumo_energies = [diabatic_eigenvalues[lumo_indexes[i]] for i in range(len(lumo_indexes))]

    """
     This section determines which orbital couplings will be calculated by changing the third argument to donor_acceptor_orthogonalize. 
     E.g. choosing "[homo_indexes[0], homo_indexes[1]]" will couple the HOMO of site 1 (donor) with the HOMO of site 2 (chromophore)
     while "[lumo_indexes[1], lumo_indexes[2]]" couples the lumo of site 2 (chromophore) with the lumo of site 3 (acceptor).
    """

    f_system_orthorgonal_homo = donor_acceptor_orthorgonalize(C_full, f_diabatic, [homo_indexes[0], homo_indexes[1]])
    f_system_orthorgonal_lumo = donor_acceptor_orthorgonalize(C_full, f_diabatic, [lumo_indexes[1], lumo_indexes[2]])
    f_system_orthorgonal_recom_dan = donor_acceptor_orthorgonalize(C_full, f_diabatic, [homo_indexes[0], lumo_indexes[1]])
    f_system_orthorgonal_recom_anac = donor_acceptor_orthorgonalize(C_full, f_diabatic, [homo_indexes[1], lumo_indexes[2]])
    print(f_system_orthorgonal_homo)
    print(f_system_orthorgonal_lumo)
    print(f_system_orthorgonal_recom_dan)
    print(f_system_orthorgonal_recom_anac)
    

    







