import re

def extract_bond_orders(logfile):
    """Extracts bond orders and checks for ground and excited state bond order calculations."""
    with open(logfile, 'r') as file:
        data = file.read()

    # Locate the starting point of NBO analysis sections
    nbo_start_matches = [match.start() for match in re.finditer(r"Natural Bond Orbitals \(Summary\):", data)]

    failed = False
    if not nbo_start_matches:
        failed = True
        print( "No NBO analysis found.")

    results = []

    for start_idx in nbo_start_matches:
        # Extract the section from the start point up to "Charge unit"
        section_end = data.find("Charge unit", start_idx)
        if section_end == -1:
            break  # If "Charge unit" is not found, stop further parsing

        section = data[start_idx:section_end]
        bond_orders = []
        lines = section.split("\n")

# Combine multi-line entries
        combined_lines = []
        current_entry = ""
        for line in lines:
            if re.match(r"\s*\d+\.\s*\w{2}\*?\s*\(", line):  # Start of a new NBO entry
                if current_entry:
                    combined_lines.append(current_entry)
                current_entry = line.strip()
            else:  # Continuation of the current entry
                current_entry += " " + line.strip()
        if current_entry:
            combined_lines.append(current_entry)

        # Regex pattern to extract details
        pattern = re.compile(
            r"\s*\d+\.\s*(\w{2}\*?)\s*\(\s*\d+\)\s*([A-Z])\s*(\d+)?\s*-?\s*([A-Z])?\s*(\d+)?\s+([1-9]\d*\.\d+|\d\.\d+)\s+([\-\d.]+)"
        )
        # Process each combined line
        for line in combined_lines:
            match = pattern.match(line)
            if match:
                orbital_type = match.group(1)  # Orbital type (e.g., CR, LP, BD)
                atom1 = f"{match.group(2)}{match.group(3)}" if match.group(3) else match.group(2)  # e.g., C1
                atom2 = f"{match.group(4)}{match.group(5)}" if match.group(5) else None  # e.g., O2 or None
                occupancy = float(match.group(6))  # Occupancy value
                energy = float(match.group(7))  # Energy value                
                bond_orders.append({
                    "orbital_type": orbital_type,
                    "atoms": (atom1, atom2),
                    "occupancy": occupancy
                })

                # print(f"Orbital Type: {orbital_type}, Atoms: {atom1}-{atom2}, Occupancy: {occupancy}, Energy: {energy}")
        results.append(bond_orders)

    # Check for multiple states
    if len(results) > 1:
        Results ={
            "ground_state": results[0],
            "excited_state": results[1:]  # If there are multiple excited states
        }
    else:
        if failed :
            Results = {"ground_state": 100000000.}
            return 10000000000.

        else :
            Results = {"ground_state": results[0]}

    if "ground_state" in Results:
        occupancy_sums = {}

    # Loop through bond_orders and sum occupancies for each orbital type
        for bond in Results["ground_state"]:
            orbital_type = bond["orbital_type"]
            occupancy = bond["occupancy"]
            # Add the occupancy to the appropriate orbital type
            if orbital_type not in occupancy_sums:
                occupancy_sums[orbital_type] = 0
            occupancy_sums[orbital_type] += occupancy
        Ground_antibonding_occ = sum(occupancy for orbital, occupancy in occupancy_sums.items() if "*" in orbital)
        # print(f"Total occupancy of antibonding orbitals: {Ground_antibonding_occ:.6f}")        
        # Print the summed occupancies
        # print("Summed Occupancies by Orbital Type:")
        # for orbital_type, total_occupancy in occupancy_sums.items():
            # print(f"{orbital_type}: {total_occupancy:.5f}")
        # print("Ground State Bond Orders:", results["ground_state"])
    d_anti_occupancy=[]
    if "excited_state" in Results:
        for idx, excited_state in enumerate(Results["excited_state"], start=1):
            occupancy_sums = {}
        # Loop through bond_orders and sum occupancies for each orbital type
            for bond in excited_state:
                orbital_type = bond["orbital_type"]
                occupancy = bond["occupancy"]
                # Add the occupancy to the appropriate orbital type
                if orbital_type not in occupancy_sums:
                    occupancy_sums[orbital_type] = 0
                occupancy_sums[orbital_type] += occupancy
            antibonding_occupancy = sum(occupancy for orbital, occupancy in occupancy_sums.items() if "*" in orbital)
            d_anti_occupancy.append(antibonding_occupancy-Ground_antibonding_occ)
            # print(f"Total occupancy of antibonding orbitals: {antibonding_occupancy:.6f}")        

            # Print the summed occupancies
            # print("Summed Occupancies by Orbital Type:")
            # for orbital_type, total_occupancy in occupancy_sums.items():
                # print(f"{orbital_type}: {total_occupancy:.5f}")
            # print(f"Excited State {idx} Bond Orders:", excited_state)
        print ("d_anti_occupancy", d_anti_occupancy)
        return max(d_anti_occupancy)

# Example usage
if __name__=="__main__":
#    logfile = "molecule.logNBO"
    logfile = "molecule.log"
    d_anti_occupancy = extract_bond_orders(logfile)
    print (d_anti_occupancy)
