# -*- coding: utf-8 -*-
"""
Created on Fri Oct  1 18:13:35 2021
@author: Thomas Schultz

--------------------------------------------------------------------------
Python Code for Molecular Structure Analysis from Rotational Constants (I)
--------------------------------------------------------------------------

Script to fit benzene r0 structure parameters to isotopologue inertial moments (or rotational
constants). The goal is to test whether it is possible to determine the r0 geometry difference
between C-H and C-D bonds in benzene.

Written by Thomas Schultz, 2021 (schultz@unist.ac.kr)

I kept the code as general as possible to allow the fitting of other molecular structures
in the future. The code is written for readability and not compactness or efficiency.
The output of most function tests is routed into variables (no screen output), for better
readability in Jupyter. I usually give a comment with the output values.

Abbreviations
COM: center of mass
MOI: moment of inertia
PMI: principal moments of inertia

Dependencies (version number):
numpy (1.21.2)
scipy (1.7.3)
tqdm  (4.62.3)
matplotlib (3.5.0)

To run the code (if unfamiliar with Python):
Install Miniconda [https://docs.conda.io/en/latest/miniconda.html].
Open an 'Anaconda prompt' and install dependencies:
>>> conda install numpy scipy matplotlib tdqm ipython
Then run 'ipython' and execute the script by copy/pasting code from below.

The code should be run sequentially (top to bottom), e.g., in an iPython or Jupyter environment.
The sections are organized as follows:
SECTION (1): Imports, natural constants and conversion functions
SECTION (2): Experimental / theoretical data for benzene
SECTION (3): Explicit derivation of benzene MOI and r_0 bond lengths
SECTION (4): Principal moments of inertia (MOI,PMI) calculation
SECTION (5): Fitting of r_0 geometric parameters from MOI
SECTION (6): Monte-Carlo analysis (r_0 error propagation)
SECTION (7): Fitting and Monte Carlo simulations for synthetic data
"""

# SECTION (1)
#-----------------------------------------------------------#
#--- Imports, natural constants, conversion functions    ---#
#-----------------------------------------------------------#
# (no output)
# (1.1)  Natural constants and functions from scipy, numpy
import numpy as np                  # Numerical python
npa = np.array                      # Shortcut for converting list to numpy array
import scipy.constants as sc        # Scientific python
from scipy.optimize import leastsq  # Wrapper for MINPACK Levenberg Marquard minimization algorithm
# Other imports
from tqdm.auto import tqdm               # Progressbar; wrap around iterable: > for tqdm(iterable): ...
import matplotlib.pyplot as plt     # Plotting of simulation results
plt.style.use('seaborn-whitegrid')  # Plot with grid, sensible color scheme,...
# Test environment to set inline plots in Jupyter or non-blocking graphs in iPython
env = get_ipython().__class__.__name__
if env == 'TerminalInteractiveShell':       # Ipython environment
    mgc = get_ipython().magic
    mgc(u'%matplotlib qt')                  # Enable interactive, non-blocking plots
if env == 'ZMQInteractiveShell':            # Jupyter environment
    mgc = get_ipython().magic
    mgc(u'%pylab inline')                   # Enable inline plots

# (1.2)  Atomic masses from NIST:
#---------------------------------------------------------------
# Coursey, J.S., Schwab, D.J., Tsai, J.J., and Dragoset, R.A. (2015),
# Atomic Weights and Isotopic Compositions (version 4.1). [Online]
# Available: http://physics.nist.gov/Comp [accessed Oct. 16 2020].
# National Institute of Standards and Technology, Gaithersburg, MD.
# For compactness, I substitute 'D' for H(2) and 'X' for C(13) heavy isotopologues
NISTmass = {'H':1.00782503223, 'D':2.01410177812, 'C':12.0, 'X':13.00335483507}
mH,mD,mC,m13C = NISTmass['H'],NISTmass['D'],NISTmass['C'],NISTmass['X']

# (1.3)  Utility functions for unit conversion
#---------------------------------------------------------------
# Conversion between rotational frequency units (nu in Hz, MHz, cm-1)
# and corresponding moments-of-inertia (I in amu*Angstrom**2)
# Moments of inertia are: I = h/(8*pi**2*c*nu)
M2c = lambda value: value/sc.c*1e4                        # Converter function MHz to cm-1
c2M = lambda value: value*sc.c*1e-4                       # Converter function cm-1 to MHz

#MHz2I = lambda nu: sc.h/(8*sc.pi**2 *nu*1e6) /(1e-20 * sc.atomic_mass)       # MHz to amu*A**2
MHz2I  = lambda nu: 505379.00914143625/nu                                     # MHz to amu*A**2
#I2MHz = lambda I: sc.h/(8*sc.pi**2 *I*1e6)   /(1e-20 * sc.atomic_mass)       # amu*A**2 to MHz
I2MHz  = lambda I: 505379.00914143625/I                                       # amu*A**2 to MHz
#MHz2cm = lambda nu: nu*1e6/sc.c/100                                          # MHz to wavenumbers
#cm2I  = lambda nu: sc.h/(8*sc.pi**2 *nu*sc.c*100) /(1e-20 * sc.atomic_mass)  # wavenumbers to amu*A**2
cm2I  = lambda nu: 16.857629191640175/nu                                      # wavenumbers to amu*A**2
#I2cm  = lambda I: 1/16.857629191640175/I                                     # amu*A**2 to wavenumbers

# define cos(30 deg), sin(30 deg) for calculation of benzene atom positions
cos30 = np.sqrt(3/4)
sin30 = 0.5

# sqrt(sum(squares(list))) for error propagation
gSum = lambda l: np.sqrt(np.sum(npa(l)**2))


# SECTION (2)
#---------------------------------------------------#
#--- Experimental / theoretical data for benzene ---#
#---------------------------------------------------#
# (no output)
# (2.1)  Ab intio structure of benzene for testing our code.
#---------------------------------------------------------------
# The geometry was optimized using an ORCA Hartree Fock calculation with electron correlation from
# resolution-of-identity, second-order Moller-Plesset pertubation theory and aug-cc-pVTZ basis.
# The geometry was obtained from a RI-MP2 geometry optimization with aug-cc-pVTZ basis.
# The calculation was performed using the Orca ab initio package.
# (keywords: ! RI-MP2 aug-cc-pVTZ aug-cc-pVTZ/C Opt TIGHTOPT TIGHTSCF Usesym PAL4)
atoms = np.array([['C',  '1.394208', '-0.000000', '0.000000'],
                  ['C',  '0.697104', '-1.207420', '0.000000'],
                  ['C',  '0.697104',  '1.207420', '0.000000'],
                  ['C', '-0.697104', '-1.207420', '0.000000'],
                  ['C', '-1.394208',  '0.000000', '0.000000'],
                  ['C', '-0.697104',  '1.207420', '0.000000'],
                  ['H',  '2.476418', '-0.000000', '0.000000'],
                  ['H',  '1.238209', '-2.144641', '0.000000'],
                  ['H',  '1.238209',  '2.144641', '0.000000'],
                  ['H', '-1.238209', '-2.144641', '0.000000'],
                  ['H', '-2.476418',  '0.000000', '0.000000'],
                  ['H', '-1.238209',  '2.144641', '0.000000']], dtype='<U9')
rC = float(atoms[0,1])                              # Distance of carbon from COM
rH = float(atoms[6,1])                              # Distance of hydrogen from COM

# (2.2)  List of experimental benzene isotopologue rotational constants.
#--------------------------------------------------------------------------
# The isotopologue string describes the benzene atoms clockwise with carbons atoms
# (isotopologue[0:5]) and hydrogen atoms (isotopologue[6:11]). I.e., 'CCCCCCDHDHHH' is m-C6H4D2.
# To calculate MIM, isotope characters will be converted to the NIST isotope masses based on
# the values stored in the NISTmass dictionary (2.3).
# For easy comparison of literature values, I converted all frequencies to MHz.
# Values in fitABC can be used to mask outliers or data with high uncertainty.
# E.g., we masked out the 'Heo2021' C-constants in 13C isotopologues due to excessive uncertainty.
#[  Isotopologue, [        (rot._constants_A,B,C),    (uncertainty_A,B,C)],[fit_A,B,C],       Author ],
cList = [ # List of isotopologe composition, rotational constants (MHz) and 1-sigma uncertainties
 ['CCCCCCHHHHHH', [(     0   ,5688.921 ,    0    ), (0     ,0.060 ,0      )], [0,1,0],      'Pliva1990'], #!!!
 ['CCCCCCHHHHHH', [(     0   ,5689.2664,    0    ), (0     ,0.0060,0      )], [0,1,0],'Hollenstein1990'],
 ['CCCCCCHHHHHH', [(     0   ,5689.2124,    0    ), (0     ,0.0090,0      )], [0,1,0],        'Doi2004'],
 ['CCCCCCHHHHHH', [(     0   ,5689.2671,    0    ), (0     ,0.0052,0      )], [0,1,0],        'Lee2019'],
 ['CCCCCCHHHHHH', [(     0   ,5689.2855,    0    ), (0     ,0.0054,0      )], [0,1,0],        'Heo2021'],
 ['XCCCCCHHHHHH', [(5689.474 ,5568.473 ,2868.6   ), (0.018 ,0.023 ,7.3    )], [1,1,0],        'Heo2021'],
 ['CCCCCCDHHHHH', [(5689.144 ,5323.934 ,2749.674 ), (0.006 ,0.006 ,0.006  )], [1,1,1],     'Oldani1984'],
 ['CCCCCCDHHHHH', [(5689.1435,5323.9333,2749.6754), (0.0060,0.0060,0.0060 )], [1,1,1],  'Kunishige2015'],
 ['CCCCCCDDHHHH', [(5498.062 ,5164.242 ,2662.496 ), (0.004 ,0.004 ,0.004  )], [1,1,1],     'Oldani1988'],
 ['CCCCCCDDHHHH', [(5498.0318,5164.2129,2662.4658), (0.0090,0.0090,0.0060 )], [1,1,1],  'Kunishige2015'],
 ['CCCCCCDHDHHH', [(5502.669 ,5152.057 ,2660.358 ), (0.007 ,0.006 ,0.006  )], [1,1,1],     'Oldani1988'],
 ['CCCCCCDHDHHH', [(5502.6666,5152.0533,2660.3523), (0.0090,0.0090,0.0090 )], [1,1,1],  'Kunishige2015'],
 ['CCCCCCDDDHHH', [(5168.017 ,5151.933 ,2579.5792), (0.015 ,0.060 ,0.0060 )], [1,1,1],  'Kunishige2015'],
 ['CCCCCCDDDDHH', [(5163.7152,4846.8136,2499.7924), (0.0060,0.0060,0.0030 )], [1,1,1],  'Kunishige2015'],
 ['CCCCCCDDDHDH', [(5151.99  ,4850.31  ,2497.901 ), (0.12  ,0.12  ,0.030  )], [1,1,1],  'Kunishige2015'],
 ['CCCCCCDDDDDH', [(4998.170 ,4707.221 ,2423.972 ), (0.15  ,0.15  ,0.060  )], [0,1,0],  'Kunishige2015'],
 ['CCCCCCDDDDDD', [(     0   ,4707.1253,    0    ), (0     ,0.0060,0      )], [0,1,0],        'Doi2004'], #!!!
 ['CCCCCCDDDDDD', [(     0   ,4707.312 ,    0    ), (0     , 0.052,0      )], [0,1,0],      'Pliva1989'],
 ['CCCCCCDDDDDD', [(     0   ,4707.3277,    0    ), (0     ,0.0060,0      )], [0,1,0],      'Snels2002'],
 ['CCCCCCDDDDDD', [(     0   ,4707.3175,    0    ), (0     ,0.0034,0      )], [0,1,0],        'Heo2021'],
 ['XCCCCCDDDDDD', [(4707.541 ,4624.1884,2332.    ), (0.036 ,0.031 ,16     )], [1,1,0],        'Heo2021'],
 ['XXXXXXHHHHHH', [(     0   ,5337.925 ,    0    ), (0     ,0.060 ,0      )], [0,1,0],      'Pliva1990'],
 ['XXXXXXHHHHHH', [(     0   ,5337.884 ,    0    ), (0     ,0.051 ,0      )], [0,1,0],        'Heo2021'],
 ['XXXXXXDDDDDD', [(     0   ,4464.371 ,    0    ), (0     ,0.024 ,0      )], [0,1,0],      'Pliva1991'],
]
# To use the table for fitting, I modify the table structure a bit and add fit weights.
# If the exp.uncertainties would dominate the fit errors, we should set the weights to uncert.**(-2).
# If the model errors dominate, then all fit weights should be equal.
# Eijck [J. Mol. Spectr. 91, 348 (1982).] proposed the use of w = 1/(b**2 + uncert.**2) with
# b = 0.0005 uA**2, as (empirically derived) compromise to weight experimental data.
# Here, I set the fit weights to 1/uncertainty, so uncertain values won't skew the fit too much.
# For r0 fits, the inertial defect (Ia+Ib != Ic) leads to large residuals for Ic versus Ia,Ib
# because there are no rovib. corrections. --> I do not fit c-axis values (fitAxis=[1,1,0])
def expandFitList(isotopeList, fitAxis=[1,1,1], fitWeights=2):
    """ Create fitting list from isotopeList with separate entries for each inertial axis.
        Concurrently convert from B [MHz] to I [amu*A**2] and calculate fit-weights.
        Expanded list: [       isotope, axis, (       MIM, uncertainty), fit weight,    author],
        e.g.:          ['XCCCCCHHHHHH',    1, (90.7579155, 0.000293375),  309357.16,  'Heo2021'],
    """
    fitList = []
    for row in isotopeList:                             # Cycle through rows in isotopeList
        name, [const,sigma], fitABC, author = row       # Parse row
        _i_ = [0 if c==0 else MHz2I(c) for c in const]                 # MOI from rot. constant
        _s_ = [0 if c==0 else s/c*i for c,s,i in zip(const,sigma,_i_)] # MOI uncertainty
        weights = [1 if w>0 else 0 for w in npa(sigma)*npa(fitABC)*npa(fitAxis)]    # Deltas as fit weights [0/1]
        if fitWeights == 1:                             # Fit weights MOI/sigma
            weights = [0 if s==0 else c/s for c,s in zip(const,npa(_s_)*fitABC*fitAxis)]
        if fitWeights == 2:                             # Eijck1982 fit weights 1/(sigma**2+0.005**2)
            weights = npa(weights)/(npa(_s_)**2 + npa([0.0005]*3)**2)
        for axis in [0,1,2]:                            # Cycle through A,B,C axes
            if weights[axis]:                           # Drop non-existent / 0-weight isotope constants
                fitList.append([name, axis, (_i_[axis], _s_[axis]), weights[axis], author])
    return fitList

# The Pliva1990 C6H6 and the Doi2004 C6H6, C6D6 values are outliers and could be masked out.
# Note that there is an abundance of contradictory rot,. constants in the benzene literature!
mList = [row for row in cList if not ('Doi2004' in row[3] or ('Pliva1990'in row[3] and 'CCCCCCHHHHHH' in row[0]))]

# A few subsets of data that we want to fit:
InList_AB = expandFitList([row for row in cList if row[3] in ['Heo2021']], fitAxis=[1,1,0])  # Our measurements (A,B)
InList_ABC = expandFitList([row for row in cList if row[3] in ['Heo2021']], fitAxis=[1,1,1]) # Our measurements (A,B,C)
fullList_AB = expandFitList(cList, fitAxis=[1,1,0])                                         # All experimental data in cList (A,B)
fullList_ABC = expandFitList(cList, fitAxis=[1,1,1])                                        # All experimental data in cList
maskedList_AB = expandFitList(mList, fitAxis=[1,1,1])                                       # All experimental data but without outliers
maskedList_ABC = expandFitList(mList, fitAxis=[1,1,1])                                      # All experimental data but without outliers
fullList_ABC_w0 = expandFitList(cList, fitAxis=[1,1,1], fitWeights=0)                       # All experimental data with equal fit weights
KunishigeList = expandFitList([row for row in cList if row[3] in ['Kunishige2015','Doi2004']])  # Only data in Kunishige2015
noKunishigeList = expandFitList([row for row in cList if not row[3] in ['Kunishige2015','Doi2004']])  # No data from Kunishige2015
# The 'best' data list for fitting of r0 values should be maskedList_AB:
# - The C constant is not available for all species and may therefore systematically distort the analysis
#   (Note that inertial defect will lead to a systematic model error in the description of C versus A and B constants).
# - Outliers should not be included in the fit if there is reason to beleiev that they are due to non-statistical deviations.
#   Both Doi2004 values stem from the same experiment (same calibration method) and are significantly lower than other reference values.
#   The Pliva1990 value for benzene is significantly below modern reference values (see discussion in Lee2019).
# We will explore the robustness of the fitting results with


# SECTION (3)
#---------------------------------------------------------------#
#--- Explicit derivation of benzene MOI and r_0 bond lengths ---#
#---------------------------------------------------------------#
# This section derives analytical functions for the benzene moment of inertia.
# This is to check the numerical accuracy of the general MOI functions defined
# below and to clarify the assumptions made for the benzene geometry

# (3.1) Manually derived MOI for benzene and D6-benzene (identical results for A and B axes)
#--------------------------------------------------------------------------------------------
#   C4                  # Consider symmetry unique atoms in 1-13C-benzene (D6h symmetry).
#  /  \                 # Atom distance rC to COM equals C-C bond length.
# C3   C3               # Distances to B-axis (unsubtituted):
# |    |  ---- B axis   # rC1 = rC4 = rC
# C2   C2               # rC2 = rC3 = rC * sin(30) = rC/2
#  \  /                 # MOI_C = mC* (rC1**2 + rC4**2 + 2*rC2**2  + 2*rC3**2)
#   C1                  #       = mC* (2*rC**2         + 1/2*rC**2 + 1/2*rC**2)
#                       #       = mC * 3rC**3
# Equivalent for H:       MOI_H = mH * 3rH**3
# rH describes distance of H atoms to COM; C-H bond length is: rCH = rH-rC
# the molecular MOI is the sum of all atomic contributions, MOI = sum_i(m_i * r_i**2)
benzeneMOI = 3* mC*rC**2 + 3* mH*rH**2                # 88.51928 amu*Angstrom**2

# For D6-benzene, 13C6-benzene, just replace atomic masses and bond lengths.
rD = rH; r13C = rC
D6benzeneMOI = 3* mC*rC**2 + 3* mD*rD**2              # 107.0327 amu*Angstrom**2
C13_6benzeneMOI = 3* m13C*rC**2 + 3* mH*rH**2         # 94.37028 amu*Angstrom**2

# (3.2) Manually derived MOI for 13C-benzene (mass mC13 = mC + dmC), the B-axis is shifted by dr,
#-------------------------------------------------------------------------------------------------
#   C4                  # increasing the MOI contribution of C3, C4
#  /  \                 # and reducing that of C2.
# C3   C3
# |    |  ____ B axis   # --- ___ (shift of axis by dr)
# C2   C2               # distances to B-axis (subtituted):
#  \  /                 # rC1' = rC   - dr
#   C1                  # rC2' = rC/2 - dr
#                       # rC3' = rC/2 + dr
#                       # rC4' = rC   + dr
# MOI = mC* (rC4'**2      + 2*rC3'**2        + 2*rC2'**2)        + (mC13   )* rC1'**2
#     = mC* ((rC + dr)**2 + (1/2*rC + dr)**2 + (1/2*rC - dr)**2) + (mC + dmC)*(1/2*rC - dr)**2
#     = mC* ((rC**2 + 2*rC*dr + dr**2) + (1/2*rC**2 + rc*dr + 2*dr**2)
#             + (1/2*rC**2 - rc*dr + 2*dr**2))                   + (mC + dmC)*(rC**2 - 2*rC*dr + dr**2)
#     = mC* (2*rC**2 + 2*rC*dr + 5*dr**2) + mC*(rC**2 - 2*rC*dr + dr**2) + dmC*(rC**2 - 2*rC*dr + dr**2)
#     = mC* (3*rC**2 + 6*dr**2) + dmC*(rC - dr)**2
# For the molecular MOI, we need to add the corresponding hydrogen contributions.
dmC = m13C-mC           # mass difference 12C to 13C
dr = 0.017696           # The axis displacement [A] according to the PMI function in (4);
                        # It can be formulated as function of masses and positions of the atoms (not done here).
benzene13MOI   = mC* (3*rC**2 + 6*dr**2)  + dmC*(rC - dr)**2  + mH* (3*rH**2 + 6*dr**2)   #  90.44486 amu*A**2

# For D6-13C-benzene, just replace rH with rD and mH with mD
dr2 = 0.016440          # The axis displacement according to the PMI function in (4)
D6benzene13MOI = mC* (3*rC**2 + 6*dr2**2) + dmC*(rC - dr2)**2 + mD* (3*rD**2 + 6*dr2**2)  #  108.96003 amu*A**2
# All calculated MOI are identical to those obtained via PMI() in (4)
# Note that dr and dr2 are fully determined by the geometry parameters rH, rC and rD, rC.
# This is inherently implemented in the PMI() calculation in section (4).

# (3.3) Comments on the MOI for asymmetrically substituted benzenes:
#--------------------------------------------------------------------
# with the approximation of rH = rD, r13C = rC the MOI are easily derived.
# If these approximations are removed, the equations become tedious (terms do not cancel)
# and additional independent parameters are required. We'll use the general PMI calculation
# in section (4) to handle all isotopologue cases.

# (3.4) r_0 Structure analysis using analytical MOI equations
#---------------------------------------------------------------
# Assuming rH == rD and rC == r13C, we can easily derive the benzene structure from the
# equations given above.
# D6benzeneMOI - benzeneMOI = 3*(mC*rC**2) + 3*(mD*rH**2) -  3*(mC*rC**2) - 3*(mH*rH**2)
# D6benzeneMOI - benzeneMOI = 3*(mH-mD)*rH**2
rH_ = np.sqrt( (D6benzeneMOI-benzeneMOI) / (3*(mD-mH)) )
rC_ = np.sqrt( (benzeneMOI - 3*(mH*rH_**2)) / (3*mC) )
# Results return ab initio values for rH, rC.
# Note that these structure parameters are 'r0' structure parameters. The Kraitchman equations
# for the substituted atom are given in eq. 13 of [Kraitchman, Am.J.Phys.21,17(1953)] and are based on the
# rsH = (D6benzeneMOI-benzeneMOI)/3 * ((mC+mH)+(mD-mH)) / ((mC+mH)*(mD-mH))

# (3.5) r_0 Structure analysis with Doi2004, values for rotational constants
#------------------------------------------------------------------------------------
# Reproduce values from Doi2004 Table 1 [J.Mol.Spec. 227, 180 (2004)]
expBz = cm2I(0.1897717)      # Doi2004 Table 1, col. 3; B-axis, cm-1 to MOI (amu*A**2)
expD6Bz = cm2I(0.1570128)    # Doi2004 Table 1, col. 2; B-axis, cm-1 to MOI (amu*A**2)
rH_Kun = np.sqrt( (KunBz - KunD6Bz) / (3*(mH-mD)))          # rH = 2.4777 A, r(C-H) = 1.0807 A
rC_Kun = np.sqrt( (KunBz - 3*(mH*rH_Kun**2)) / (3*mC))      # rC = 1.3971 A

# (3.6) r_0 structure analysis with our values (Heo2021) for rotational constants
#--------------------------------------------------------------------------------
# Our B-Values, 1-sigma errors for symmetric isotopologues
B_Bz,  sB_Bz    = 5689.2855, 0.0054                   # [Heo2021], C6H6, B-axis, MHz to MOI (amu*A**2)
B_D6Bz,sB_D6Bz  = 4707.3175, 0.0034                   # [Heo2021], C6D6, B-axis, MHz to MOI (amu*A**2)
B_X6Bz,sB_X6Bz  = 5337.884, 0.051                     # [Heo2021], 13-C6H6, B-axis, MHz to MOI (amu*A**2)
# Conversion MHz to MOI:
I_Bz, sI_Bz     = MHz2I(B_Bz)  , sB_Bz/B_Bz *MHz2I(B_Bz)
I_D6Bz, sI_D6Bz = MHz2I(B_D6Bz), sB_D6Bz/B_D6Bz *MHz2I(B_D6Bz)
I_X6Bz, sI_X6Bz = MHz2I(B_X6Bz), sB_X6Bz/B_X6Bz *MHz2I(B_X6Bz)
# Solve equation (D6benzeneMOI - benzeneMOI) to get r0 bond length.
# Error propagation to estimate 1-sigma uncertainties in s_rX from uncertainties in B.
# (The propagated errors are far below the uncertainties due to our approximations. We therefore
# also give Costain errors, which should encompass the range of r_0, r_s, and r_e values.)
# --- Solve equations with C6H6, C6D6 values
_rH_ = np.sqrt( (I_Bz - I_D6Bz) / (3*(mH-mD)))                      # 2.4775 A
s_rH_ = 0.5* np.sqrt( (sI_Bz/I_Bz)**2 + (sI_D6Bz/I_D6Bz)**2 ) *_rH_ # 0.000002 A
CostainSigma_rH_ = np.sqrt(s_rH_**2 + (0.00150/_rH_)**2)            # 0.0012 A
_rC_ = np.sqrt( (I_Bz/(3*mC) - mH/mC*_rH_**2) )                     # 1.3971 A
s_rC_ = 0.5* np.sqrt( (sI_Bz/(3*mC))**2 + (2*mH/mC*s_rH_)**2 )      # 0.000001 A
CostainSigma_rC_ = np.sqrt(s_rC_**2 + (0.00150/_rC_)**2)            # 0.0011 A
print(f'Analytical Calculation of benzene bond length based on C6H6 and C6D6 isoptopologue constants:')
print(f'    rCC = {_rC_:.6f} +/- {s_rC_:.6f}; Costain uncertainty: {CostainSigma_rC_:.6f}')
print(f'    rCH = {_rH_ - _rC_:.6f} +/- {gSum([s_rH_,s_rC_]):.6f}; Costain uncertainty: {gSum([CostainSigma_rH_,CostainSigma_rC_]):.6f}')
# Results (with Costain error in brackets):
# rC = 1.3971(11) A, rH = 2.47758(61) A, r(C-H) = 1.0804(12) A.     <==== Values manuscript text
# --- Solve equations with C6H6, 13-C6H6 values
_rC_ = np.sqrt( (I_Bz - I_X6Bz) / (3*(mC-m13C)))                    # 1.394208
s_rC_ = 0.5* np.sqrt( (sI_Bz/I_Bz)**2 + (sI_X6Bz/I_X6Bz)**2 ) *_rC_ # 0.000007
CostainSigma_rC_ = np.sqrt(s_rC_**2 + (0.00150/_rC_)**2)            # 0.0011
_rH_ = np.sqrt( (I_Bz/(3*mH) - mC/mH*_rC_**2) )                     # 2.499606
s_rH_ = 0.5* np.sqrt( (sI_Bz/(3*mH))**2 + (2*mC/mH*s_rC_)**2 )      # 0.000081
CostainSigma_rH_ = np.sqrt(s_rH_**2 + (0.00150/_rH_)**2)            # 0.0006
print(f'Analytical Calculation of benzene bond length based on C6H6 and 13-C6H6 isoptopologue constants:')
print(f'    rCC = {_rC_:.6f} +/- {s_rC_:.6f}; Costain uncertainty: {CostainSigma_rC_:.6f}')
print(f'    rCH = {_rH_ - _rC_:.6f} +/- {gSum([s_rH_,s_rC_]):.6f}; Costain uncertainty: {gSum([CostainSigma_rH_,CostainSigma_rC_]):.6f}')
# Results (with Costain error in brackets):
# rCC = 1.393828(1076), rCH = 1.105778 (1235)                   <==== Values in manuscript text


# SECTION (4)
#----------------------------------------------------------#
#--- Principal moments of inertia (PMI,MOI) calculation ---#
#----------------------------------------------------------#
# For a fit of structural r_0 parameters to experimental MOI, we need to relate the
# structure parameters to calculated MOI_calc. We can then minimize (MOI_exp - MOI_calc)
# to find the optimum r_0 parameters corresponding (see section (5)).
# This is implemented in the PMI function (section 4.1) and is not specific for benzene.
#
# We also need a function to map a minimal set of internal coordinates onto Cartesian
# coordinates. This function will account for symmetry and implement desired approximations
# (e.g., I assume C6 symmetry for all benzenes).
# The code in (4.2)-(4.4) defines three different internal -> Cartesian mappings for
# benzene, based on 2, 3, or 4 structural parameters.

# (4.1) General function for principle moments of inertia calculation
#----------------------------------------------------------------------
def PMI(m_i, r_i , printResults=False):
    """ Calculation of molecular principal moments of inertia, based on
        atomic masses _m and coordinates r_i.
        For a description of the MOI calculation, see Gordy and Cook,
        chapter 13, or the publication from J. Kraitchman (1953)
        [Am. J. Phys. 21, 17 (1953); doi: 10.1119/1.1933338].
        I refer to equations in Kraitchmans as KRA(equation number)
    """
    COM = np.dot(m_i, r_i ) / np.sum(m_i)  # center of mass coordinates (x,y,z) (COM)
    x,y,z = r_i [:,0]-COM[0], r_i [:,1]-COM[1], r_i [:,2]-COM[2]     # KRA(5)
                                          # Centered atomic coordinates on COM
                                          # Calculate Moments of Inertia I (eqs. 4a, 4b, or 10b, 10c)
                                          # Corrections for non-COM coordinates would be: msum = np.sum(_m)
                                          # and correction terms are given as comments
    I_xx = np.sum(m_i*(y**2 + z**2))      # - (np.sum(_m*y))**2 / msum - (np.sum(_m*z))**2 / msum
    I_yy = np.sum(m_i*(z**2 + x**2))      # - (np.sum(_m*z))**2 / msum - (np.sum(_m*x))**2 / msum
    I_zz = np.sum(m_i*(x**2 + y**2))      # - (np.sum(_m*x))**2 / msum - (np.sum(_m*y))**2 / msum
    I_xy = - np.sum(m_i*x*y)              # + np.sum(_m*x) * np.sum(_m*y) / msum
    I_xz = - np.sum(m_i*x*z)              # + np.sum(_m*x) * np.sum(_m*z) / msum
    I_yz = - np.sum(m_i*y*z)              # + np.sum(_m*y) * np.sum(_m*z) / msum
    I = np.array([[I_xx, I_xy, I_xz],     # MOI tensor as matrix    KRA(2)
                  [I_xy, I_yy, I_yz],     # Eigenvalues give the principal moments of inertia (PMI)
                  [I_xz, I_yz, I_zz]])    # in amu*Angs**2
    _PMI, e_vectors = np.linalg.eigh(I)   # Principal Moments of Inertia in amu*Angstrom**2
    _PMI = np.sort(_PMI)                  # Ensure order Ia < Ib < Ic (redundant for eigh)
    if printResults:
        print(f'PMI calculation:')
        print(f'Center-of-mass [A]:           COM = np.{repr(np.round(COM,7))}')
        print(f'Inertial moment [amu*A**2]:   PMI = np.{repr(np.round(_PMI,5))}')
        print(f'Rot. Constants [MHz]):      A,B,C = {I2MHz(_PMI[0]):10.6f}, {I2MHz(_PMI[1]):10.6f}, {I2MHz(_PMI[2]):10.6f}')
    return _PMI

# Test of PMI function versus explicit values from section (3)
m_i = [NISTmass[atom[0]] for atom in atoms]    # Values for C6H6 based on ab initio bond lengths
r_i = atoms[:,1:].astype(float)                # ...
PMI(m_i,r_i , printResults=True)               # [88.51927,88.51933,177.03860]; agrees with benzeneMOI (in 3)
                                               # A,B are not identical (slight asymmetry of ab initio coord.)
atoms[0][0] = 'X'                              # Same geometry but for 13C-C5H6 (replace 1st atom with 13C)
m_i = [NISTmass[atom[0]] for atom in atoms]    # ...
PMI(m_i,r_i )                                  # [88.51933,90.44485,178.96419]; agrees with benzene13MOI (in 3)

# (4.2) Function to calculate benzene isotopologue MOI from rC and rH parameters.
#---------------------------------------------------------------------------------
# 2-parameter function assumes identical rH = rD and rC = r13C for all atoms.
# This first-order approximation (isotope-independent geometry) corresponds to the
# approximation used for the r_0 structure determination in section (3.5).
def bzMOI_2(isotopologue, rC,rCH, printResults=False):
    """ Benzene MOI from isotopic composition and rH,rC distance to COM.
        'isotopologue' lists isotopes (C,X=13C,H,D) for all
        12 atoms in clockwise direction, e.g. ['CCCCCCDHDHHH'] for m-2D-benzene.
        I substituted sin(30/180*pi)=1/2 and cos(30/180*pi)=sqrt(3/4).  """
    global NISTmass
    global cos30
    rH = rCH + rC
    _m = [NISTmass[atom] for atom in isotopologue]  # masses from NIST table
    C1 = [   rC,         0, 0]  # x,y,z coordinates for C1
    C2 = [ rC/2,  rC*cos30, 0]  # x,y,z coordinates for C2
    C3 = [-rC/2,  rC*cos30, 0]  # ...
    C4 = [  -rC,         0, 0]
    C5 = [-rC/2, -rC*cos30, 0]
    C6 = [ rC/2, -rC*cos30, 0]
    H1 = [   rH,         0, 0]  # x,y,z coordinates for H1
    H2 = [ rH/2,  rH*cos30, 0]  # x,y,z coordinates for H2
    H3 = [-rH/2,  rH*cos30, 0]  # ...
    H4 = [  -rH,         0, 0]
    H5 = [-rH/2, -rH*cos30, 0]
    H6 = [ rH/2, -rH*cos30, 0]
    r_i = np.array([C1,C2,C3,C4,C5,C6,H1,H2,H3,H4,H5,H6])
    _PMI = PMI(_m,r_i , printResults=printResults) # Calculate principal moments (MOI)
    return np.array(_PMI)                          # Return A,B,C-axis MOI for fit

# Test against benzeneMOI in section (3):
moi2 = bzMOI_2('CCCCCCHHHHHH', rC,rH-rC)                  # [ 88.51927, 88.51927, 177.03855]; agrees

# (4.3) Function to calculate benzene isotopologue MOI from rCC, rCH,and rCD parameters.
#----------------------------------------------------------------------------------------
# The 3-parameter function assumes identical rC = r13C and that r(C-H/D) = r(13C-H/D).
# This second-order approximation could be useful because the heavy carbon has a small
# zero-point vibration amplitude and a small 12C/13C mass difference and therefore
# shows a small vibrational effect. This corresponds to the assumptions in Kunishige2015.
def bzMOI_3(isotopologue, rC,rCH,rCD, printResults=False):
    """ Benzene MOI from isotopic composition and geometry parameters.
        'isotopologue' lists isotopes (C,X=13C,H,D) for all
        12 atoms in clockwise direction, e.g. ['CCCCCCDHDHHH'] for m-2D-benzene.
        I substituted sin(30/180*pi)=1/2 and cos(30/180*pi)=sqrt(3/4).  """
    global NISTmass
    global cos30
    iso = isotopologue
    rH = rCH + rC
    rD = rCD + rC
    rHD = lambda atom: rH if iso[atom]=="H" else rD # Distinguish rH and rD
    _m = [NISTmass[atom] for atom in isotopologue]  # masses from NIST table
    C1 = [        rC,              0, 0]  # x,y,z coordinates for C1
    C2 = [      rC/2,       rC*cos30, 0]  # x,y,z coordinates for C2
    C3 = [     -rC/2,       rC*cos30, 0]  # ...
    C4 = [       -rC,              0, 0]
    C5 = [     -rC/2,      -rC*cos30, 0]
    C6 = [      rC/2,      -rC*cos30, 0]
    H1 = [    rHD(6),              0, 0]  # x,y,z coordinates for H1
    H2 = [  rHD(7)/2,   rHD(7)*cos30, 0]  # x,y,z coordinates for H2
    H3 = [ -rHD(8)/2,   rHD(8)*cos30, 0]  # ...
    H4 = [   -rHD(9),              0, 0]
    H5 = [-rHD(10)/2, -rHD(10)*cos30, 0]
    H6 = [ rHD(11)/2, -rHD(11)*cos30, 0]
    r_i = np.array([C1,C2,C3,C4,C5,C6,H1,H2,H3,H4,H5,H6])
    _PMI = PMI(_m,r_i , printResults=printResults) # Calculate principal moments (MOI)
    return _PMI                                    # Return A,B,C-axis MOI for fit

# Test against D6benzeneMOI in section (3):
moi3 = bzMOI_3('CCCCCCDDDDDD', rC,rH-rC,rD-rC)             # [107.03269, 107.03269, 214.06538]; agrees

# (4.4) Function to calculate benzene isotopologue MOI from rC, rH, rD, and rC13 parameters.
#--------------------------------------------------------------------------------------------
# A 5th parameter fHC determines how much a H/D atom position is affected by the
# position of the 12C / 13C isotopologue to which it is bonded (The H/D atom
# moves along by fHC with the carbon atom to which it is bound). We may expect fHC close to 1
# if the C/13C isotope effect is dominated by the potential anharmonicity (corresponds to
# an effective C-atom displacement).
def bzMOI_4(isotopologue, rC,rCH,rCD,r13C,fHC=0.5, printResults=False):
    """ Take benzene isotope composition and geometry parameters (r),
        generate atom coordinates for benzene
        and calculate principal moments of inertia.
        Return the B moment of inertia for fitting.
        'isotopologue' lists isotopes (C,X=13C,H,D) for all
        12 atoms in clockwise direction, e.g. ['CCCCCCDHDHHH'] for m-2D-benzene.
        Note that sin(30/180*pi)=1/2 and cos(30/180*pi)=sqrt(3/4).
        fHC defines to which degree (0..1) the hydrogen position is affected by the C/13C
        position difference (fHC=0: H position unchanged upon C/13C substitution).
    """
    iso = isotopologue
    rCC13 = lambda atom: rC if iso[atom]=="C" else r13C         # Distinguish r12C and r13C
    rH = lambda atom: (1-fHC)*rC+fHC*rCC13(atom-6)+rCH          # H-distance from COM, adjusted for C/C13
    rD = lambda atom: (1-fHC)*rC+fHC*rCC13(atom-6)+rCD          # D-distance from COM, adjusted for C/C13
    rHD = lambda atom: rH(atom) if iso[atom]=="H" else rD(atom) # Distinguish rH and rD
    global NISTmass
    _m = [NISTmass[atom] for atom in isotopologue]              # masses from NIST table
    cos30 = np.sqrt(3/4)
    C1 = [   rCC13(0),               0, 0]  # x,y,z coordinates for C1
    C2 = [ rCC13(1)/2,  rCC13(1)*cos30, 0]  # x,y,z coordinates for C2
    C3 = [-rCC13(2)/2,  rCC13(2)*cos30, 0]  # ...
    C4 = [  -rCC13(3),               0, 0]
    C5 = [-rCC13(4)/2, -rCC13(4)*cos30, 0]
    C6 = [ rCC13(5)/2, -rCC13(5)*cos30, 0]
    H1 = [     rHD(6),               0, 0]  # x,y,z coordinates for H1
    H2 = [   rHD(7)/2,    rHD(7)*cos30, 0]  # x,y,z coordinates for H2
    H3 = [  -rHD(8)/2,    rHD(8)*cos30, 0]  # ...
    H4 = [    -rHD(9),               0, 0]
    H4 = [    -rHD(9),               0, 0]
    H5 = [ -rHD(10)/2,  -rHD(10)*cos30, 0]
    H6 = [  rHD(11)/2,  -rHD(11)*cos30, 0]
    r_i = np.array([C1,C2,C3,C4,C5,C6,H1,H2,H3,H4,H5,H6])
    _PMI = PMI(_m,r_i , printResults=printResults)  # Calculate principal moments (MOI)
    return _PMI                                     # Return A,B,C-axis MOI for fit

# Test against D6benzene13MOI in section (3)
moi4 = bzMOI_4('XCCCCCDDDDDD', rC,rH-rC,rD-rC,r13C)      # [107.03269, 108.96003, 215.99272]; agrees


# SECTION (5)
#------------------------------------------------------#
#--- Fitting of r_0 geometric parameters from MOI   ---#
#------------------------------------------------------#
# There are three advantages to fitting r_0 geometry parameters to experimental MIM, as opposed to
# using analytical equations as given in section (3.5) and (3.6):
# (1) We are no longer limited to isotopologue pairs, but we can use arbitrary amounts of data.
# (2) We obtain fit uncertainties and deviations, which help to diagnose the quality of data and fit.
# (3) The fit can be easily adjusted to account for different fit assumptions.
# Just as we need a sufficient number of isotopologue MIM equations in Section 3 to solve for atomic
# positions, the fits require sufficient isotopologue data to solve for a given number of structure
# parameters. We expect that two distinct types of isotopologues (e.g., ordinary and deuterated benzenes)
# are required to solve for 2 parameters, 3 types (e.g., ordinary, deuterated and 13C benzenes) to solve
# for 3 parameters, ....

# The definition of Residuals and Fit is not specific for benzene and the code (5.2)-(5.4)
# differs only by the number of fitted parameters and the presentation of fit results. Functions
# (5.2) - (5.4) use the corresponding benzene MOI functions (4.2) - (4.4).

# (5.1) Function to plot fit results
#-------------------------------------
# Plotting the fit results helps to spot outliers in the experimental data.
plt.rcParams["figure.figsize"] = (12,4) # Wide plots to fit all the x-axis labels
def plotFitResults(isotopeList, fitResults, fitResiduals, title='Fit results'):
    sigmaI = npa([i[2][1] for i in isotopeList])                 # MOI error bars (plot on residuals)
    weight = npa([i[3] for i in isotopeList])
    Residuals = fitResiduals/weight #/fitResiduals.max() * sigmaI.max()   # Y-axis scaling
    plt.errorbar(np.arange(len(isotopeList)), Residuals, yerr=sigmaI, capsize=3, fmt='o', ecolor='b', label=f"{title}")
    plt.xticks(np.arange(len(isotopeList)), [f"{iso[4]}\n{iso[0]}, {['A','B','C'][iso[1]]}" for iso in isotopeList], rotation=85)
    plt.ylabel('Fit residuals [amu*A**2]\n(error bars [sigma])', fontsize = 11)
    plt.get_current_fig_manager().canvas.manager.set_window_title(title)
    plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.0e'))
    #plt.title('\n'.join(fitResults), fontsize = 11, y=1.0, x=0.98, pad=-40, loc='right')
    plt.axhline(y = 0, color = 'r', linestyle = '-')
    plt.legend(loc='lower right')
    plt.tight_layout()
    plt.show()

# (5.2) Fit function for 2 structural parameters: rC, rCH
#----------------------------------------------------------
def fitBz_2(isotopeList, parameters={'rC':1.4,'rCH':1.}, printResults='unspecified data'):
    """Fit rCC, rCH bond length to benzene isotopologue moments-of-inertia (MOI).
    """
    pNames = list(parameters.keys())                          # Unpack parameter names
    startValues = list(parameters.values())                   # ... and start values
  # The Residuals function will be minimized; (the difference between measured and calculated MOI)
  # Residuals =                     I_exp     -  I_calc(atoms,  r)[axis]    * weight
    Residuals = lambda r,isoList: [(iso[2][0] - bzMOI_2(iso[0],*r)[iso[1]]) * iso[3] for iso in isoList]
    Fit = leastsq(Residuals, startValues, args=(isotopeList), full_output=1)  # Fit optimization of parameters
    (popt, pcov, infodict, errmsg, ier) = Fit                 # Unpack fit results
    if printResults:                                          # Optional: print and plot results
        title = f'\n2 parameter fit of benzene inertial moments, {printResults}'
        print(title + '\n' + '-'*len(title))
        fitResiduals = npa(Residuals(popt,isotopeList))       # Uncertainty calculation ...
        #fitResiduals = infodict['fvec'] # -- Same residual data
        s_sq = (fitResiduals**2).sum() / (len(isotopeList)-len(popt)) # ... reduced chi**2
        rcov = pcov * s_sq                                    # ... covariances (cf. scipy.optimize.curve_fit)
        pSigma = np.sqrt(np.diag(rcov))                       # ... estimated 1-sigma parameter uncertainties
        pCostain = [np.sqrt((0.0015/r)**2 + u**2) for u,r in zip(pSigma,popt)]  # Costain errors
        fitResults = [f'{n:>8} = {p:.4f}({s:.4f}) [{sC:.4f}]' for n,p,s,sC in zip(pNames,popt,pSigma,pCostain)]
        print('Fitted parameters [A] (1-sigma uncertainties) [Costain uncertainties]:\n'+'\n'.join(fitResults))
        plotFitResults(isotopeList, fitResults[:2], fitResiduals, title=f'2-Parameter fit, {printResults}')
    return popt
# Fit of our data (A,B constants) with 2-parameter fit:
f= fitBz_2(InList_AB, printResults='Heo2021 A,B constants')          # rC = 1.3971(0.0001) [0.0011]; rCH = 1.0806(0.0003) [0.0014]     <==== Values in Table 4, Col. 1
# Fit of all data (A,B constants, including literature values) with 2-parameter fit:
f= fitBz_2(maskedList_AB, printResults='All Data A,B constants')      # rC = 1.3971(0.0000) [0.0011]; rCH = 1.0803(0.0002) [0.0014]     <==== Values in Table 4, Col. 2
# Fit of all data (A,B constants, including literature values) with 2-parameter fit:
f= fitBz_2(maskedList_ABC, printResults='All Data A,B,C constants')   # rC = 1.3972(0.0001) [0.0011]; rCH = 1.0803(0.0004) [0.0014]  # Note the large C-axis residuals due to inertial defect
# Fit of all data (A,B,C constants) with 2-parameter fit, with equal weight for all data:
f= fitBz_2(fullList_ABC_w0, printResults='All Data A,B,C constants, equal weight') # The values with large uncertainty might distort the fit!
                                                  # Strong distortion is observed if C-constants for Heo2021 13C isotopologues are included.
# Fit of data without outliers (A,B,C constants, including literature values) with 2-parameter fit:
f= fitBz_2(maskedList_ABC, printResults='No outliers, A,B,C constants')   # Results are identical to those for all data. Fit is quite robust.
# Fit of data used in Kunishige2015:
f= fitBz_2(KunishigeList, printResults='Kunishige A,B,C constants') # Good agreement with values given by Kunishige.
# As expected, we obtain robust results for rH, rC all cases. Because we assume rH=rD and
# rC=r13C, the resulting bond lengths should fall between the isotope-specific r0 values.

# (5.3) Fit function for 3 structural parameters rC, rCH, rCD
#--------------------------------------------------------------
def fitBz_3(isotopeList, parameters={'rC':1.4,'rCH':1.,'rCD':1.}, printResults='unspecified data'):
    """Fit benzene isotopologue moments-of-inertia listed in isotopeList.
    """
    pNames = list(parameters.keys())                          # Unpack parameter names
    startValues = list(parameters.values())                   # ... and start values
    Residuals = lambda r,isoList: [(iso[2][0] - bzMOI_3(iso[0],*r)[iso[1]]) * iso[3] for iso in isoList]
    Fit = leastsq(Residuals, startValues, args=(isotopeList), full_output=1)  # Fit optimization of parameters
    (popt, pcov, infodict, errmsg, ier) = Fit
    if printResults:                                          # Optional: print and plot results
        title = f'\n3 parameter fit of benzene inertial moments, {printResults}'
        print(title + '\n' + '-'*len(title))
        fitResiduals = npa(Residuals(popt,isotopeList))       # Uncertainty calculation ...z
        s_sq = (fitResiduals**2).sum() / (len(isotopeList)-len(popt)) # ... reduced chi**2
        rcov = pcov * s_sq                                    # ... covariances (cf. scipy.optimize.curve_fit)
        pSigma = np.sqrt(np.diag(rcov))                       # ... estimated 1-sigma parameter uncertainties
        pCostain = [np.sqrt((0.0015/r)**2 + u**2) for u,r in zip(pSigma,popt)]  # Costain errors
        fitResults = [f'{n:>8} = {p:.4f}({s:.4f}) [{sC:.4f}]' for n,p,s,sC in zip(pNames,popt,pSigma,pCostain)]
        fitResults.append(f'   -------------------------------------')
        fitResults.append(f'{"rH-rD":>8} = {popt[1]-popt[2]:.4f}({np.sqrt(pSigma[1]**2+pSigma[2]**2):.4f}) [{np.sqrt(pCostain[1]**2 + pCostain[2]**2):.4f}]')
        print('Fitted parameters [A] (1-sigma uncertainties) [Costain uncertainties]:\n'+'\n'.join(fitResults))
        plotFitResults(isotopeList, fitResults[:3], fitResiduals, title=f'3-Parameter fit, {printResults}')
    return popt
# Fit of our data / full data-set with 3-parameter fit:
f= fitBz_3(InList_AB, printResults='Heo2021 A,B constants')          # rC = 1.3945(6)[12]; rCH = 1.1003(43)[45]; rCD = 1.0917(24)[28]   <===== Values in Tab.4, Col.3
f= fitBz_3(maskedList_AB, printResults='Masked Data A,B constants')  # rC = 1.3937(3)[11]; rCH = 1.1069(24)[28]; rCD = 1.0953(14)[19]    <===== Values in Tab.4, Col.4
#f=fitBz_3(fullList_ABC, printResults='All Data A,B,C constants')   # Slight differences when C constants and outliers are included
# --- Fit only data referenced in Kunishige2015 (deuterated benzenes) to see if we reproduce their results of rH=rD
f= fitBz_3(KunishigeList, printResults='Kunishige A,B,C constants') # rC = 1.397(141.053]; rCH = 1.080(1088.150]; rCD = 1.080(614.973]  <===== Values in Tab.4, Col.4
# Fit Results:
# As expected, we can fit 3 parameters if we fit data from 3 distinct types of isotopologues
# (benzene, deuterated benzenes, 13C-benzenes), but the fit becomes badly determined when we
# only fit 2 types of isotopologues (i.e., KunishigeList).
# If we vary fit axis and isotopologues, we obtain a range of rH-rD results between 7 mA and 16 mA.
# This value is larger than the expected 'typical' 3-5 mA value (see: Laurie1962). Ignoring the
# 12C,13C isotope effect clearly introduces a significant error. Differences between 12C and 13C bond
# Lengths may be small but, because the mass of c is so much larger that of H,D, this has a
# large effect on the fitted inertial moments!

# (5.4) Fit function for 4 structural parameters rC, rCH, rCD, r13C. The number of relevant parameters
#------------------------------------------------------------------------------------------------------
# is large (distinct bond lengths for rCC, rC13C, r13C13C, rCH, r13CH, rCD, r13CD and possible
# variations in the bond angles in C/13C isotopologues). We reduce the parameters with the following
# assumptions to obtain a viable fit: (a) C6 symmetry; we fit rC and r13C versus COM. (b) We assume
# that H,D atom positions shift with C,13C positions by a factor fCH. Note that harmonic vibration leads
# to an effective shift of fitted r0 positions (because we fit <r**2>, not <r>*2), which does not correspond
# to an actual atom displacement.
def fitBz_4(isotopeList, parameters={'rC':1.4,'rCH':1,'rCD':1,'r13C':1.4}, fHC=0.5, printResults='unspecified data'):
    """Fit benzene isotopologue moments-of-inertia listed in isotope.
    """
    pNames = list(parameters.keys())                          # Unpack parameter names
    startValues = list(parameters.values())                   # ... and start values
    Residuals = lambda r,isoList: [(iso[2][0] - bzMOI_4(iso[0],*r,fHC=fHC)[iso[1]]) * iso[3] for iso in isoList]
    #   sigmaRC, sigmaRCH, sigmaRCD, sigmaR13C = np.NAN,np.NAN,np.NAN,np.NAN
    Fit = leastsq(Residuals, startValues, args=(isotopeList), full_output=1)  # Fit optimization of parameters
    (popt, pcov, infodict, errmsg, ier) = Fit
    if printResults:                                          # Optional: print and plot results
        title = f'\n4 parameter fit of benzene inertial moments, {printResults}'
        print(title + '\n' + '-'*len(title))
        fitResiduals = npa(Residuals(popt,isotopeList))       # Uncertainty calculation ...
        s_sq = (fitResiduals**2).sum() / (len(isotopeList)-len(popt)) # ... reduced chi**2
        try:
            rcov = pcov * s_sq                               # ... covariances (cf. scipy.optimize.curve_fit)
            pSigma = np.sqrt(np.diag(rcov))                       # ... estimated 1-sigma parameter uncertainties
        except:
            pSigma = npa([np.nan]*len(startValues))
        pCostain = [np.sqrt((0.0015/r)**2 + u**2) for u,r in zip(pSigma,popt)]  # Costain errors
        fitResults = [f'{n:>8} = {p:.4f}({s:.4f}) [{sC:.4f}]' for n,p,s,sC in zip(pNames,popt,pSigma,pCostain)]
        fitResults.append(f' -----------------------------------')
        fitResults.append(f'{"rH-rD":>8} = {popt[1]-popt[2]:.4f}({np.sqrt(pSigma[1]**2+pSigma[2]**2):.4f}) [{np.sqrt(pCostain[1]**2 + pCostain[2]**2):.4f}]')
        fitResults.append(f'{"r13C-rC":>8} = {popt[3]-popt[0]:.4f}({np.sqrt(pSigma[3]**2+pSigma[0]**2):.4f}) [{np.sqrt(pCostain[3]**2 + pCostain[0]**2):.4f}]')
        print('Fitted parameters [A] (1-sigma uncertainties) [Costain uncertainties]:\n'+'\n'.join(fitResults))
        plotFitResults(isotopeList, fitResults[:3], fitResiduals, title=f'4-Parameter fit, {printResults}')
    return popt
# Fit of our data / full data-set with 4-parameter fit:
fitBz_4(InList_AB, fHC=0.5, printResults='Heo2021 A,B constants, fHC=0.5')     #   rC = 1.381(0.030) [0.030];  rH = 1.203(0.226) [0.226];
                                                                              # r13C = 1.382(0.028) [0.028]; rD = 1.150(0.130) [0.130];
fitBz_4(maskedList_AB, fHC=0.5, printResults='All Data A,B constants, fHC=0.5') #   rC = 1.402(0.011) [0.011]; rCH = 1.047(0.082) [0.082];
                                                                              # r13C = 1.401(0.010) [0.010]; rCD = 1.061(0.046) [0.046];
fitBz_4(maskedList_AB, fHC=0.9, printResults='All Data A,B constants, fHC=0.9')#  rC = 1.399(0.006) [0.007]; rCH = 1.068(0.048) [0.048]
                                                                              # r13C = 1.399(0.006) [0.006]; rCD = 1.073(0.028) [0.028]
# Same fit as above, but with fHD as a fit parameter
def fitBz_41(isotopeList, parameters={'rC':1.4,'rCH':1,'rCD':1,'r13C':1.4,'fHC':0.5}, printResults='unspecified data'):
    """Fit benzene isotopologue moments-of-inertia listed in isotope.
       Here, I also try to fit fHC; the parameter determining how much an H atom moves upon
       12C/13C substitution.
    """
    pNames = list(parameters.keys())                          # Unpack parameter names
    startValues = list(parameters.values())                   # ... and start values
    Residuals = lambda r,isoList: [(iso[2][0] - bzMOI_4(iso[0],*r)[iso[1]]) * iso[3] for iso in isoList]
    #   sigmaRC, sigmaRCH, sigmaRCD, sigmaR13C = np.NAN,np.NAN,np.NAN,np.NAN
    Fit = leastsq(Residuals, startValues, args=(isotopeList), full_output=1)  # Fit optimization of parameters
    (popt, pcov, infodict, errmsg, ier) = Fit
    if printResults:                                          # Optional: print and plot results
        title = f'\n4 parameter fit of benzene inertial moments, {printResults}'
        print(title + '\n' + '-'*len(title))
        fitResiduals = npa(Residuals(popt,isotopeList))       # Uncertainty calculation ...
        s_sq = (fitResiduals**2).sum() / (len(isotopeList)-len(popt)) # ... reduced chi**2
        try:
            rcov = pcov * s_sq                               # ... covariances (cf. scipy.optimize.curve_fit)
            pSigma = np.sqrt(np.diag(rcov))                       # ... estimated 1-sigma parameter uncertainties
        except:
            pSigma = npa([np.nan]*len(startValues))
        pCostain = [np.sqrt((0.0015/r)**2 + u**2) for u,r in zip(pSigma,popt)]  # Costain errors
        fitResults = [f'{n:>8} = {p:.4f}({s:.4f}) [{sC:.4f}]' for n,p,s,sC in zip(pNames,popt,pSigma,pCostain)]
        fitResults.append(f' -----------------------------------')
        fitResults.append(f'{"rH-rD":>8} = {popt[1]-popt[2]:.4f}({np.sqrt(pSigma[1]**2+pSigma[2]**2):.4f}) [{np.sqrt(pCostain[1]**2 + pCostain[2]**2):.4f}]')
        fitResults.append(f'{"r13C-rC":>8} = {popt[3]-popt[0]:.4f}({np.sqrt(pSigma[3]**2+pSigma[0]**2):.4f}) [{np.sqrt(pCostain[3]**2 + pCostain[0]**2):.4f}]')
        print('Fitted parameters [A] (1-sigma uncertainties) [Costain uncertainties]:\n'+'\n'.join(fitResults))
        plotFitResults(isotopeList, fitResults[:3], fitResiduals, title=f'4.1-Parameter fit, {printResults}')
    return popt
# Fit of our data / full data-set with 4-parameter fit:
fitBz_41(InList_AB, printResults='Our Data A,B constants')
fitBz_41(maskedList_AB, printResults='All Data A,B constants') #   rC = 1.397(2.732); rCH = 1.081(21.0653);   rH-rD = 0.0005
                                                             # r13C = 1.397(2.521); rCD = 1.081(11.9070); r13C-rC = -0.0002; fHC = 0.3995(4951.6532)
# Fit Results:
# Uncertainties in the 4 parameter fit are excessive, indicating an under-determined fit.
# We also have the problem that there is no good way to choose a 'correct' fHC.
# As expected, it is not possible to fit isotope-specific r0 bond parameters for all species.


# SECTION (6)
#-----------------------------#
#--- Monte-Carlo analysis  ---#
#-----------------------------#
# Determine how the measurement uncertainties affect the fit result (error estimation).
# The MC simulation samples MOI values within the given experimental uncertainties to
# determine how the uncertainties affect fitted parameters (bond lengths).
#
# I follow the concepts outlined at: """http://www-personal.umd.umich.edu/~wiclarks/AstroLab/HOWTOs/NotebookStuff/MonteCarloHOWTO.html"""
# The Monte Carlo analysis repeats the following steps:
# -> Generate random MOI values within the experimental (+/- 1-sigma) error bounds
# -> Fit structure parameters for MOI
# The statistical distribution of the fit results (structure parameters rC,rH,rD) is analyzed
# to characterize the uncertainty arising from the MOI uncertainties.
#
# The code for Monte-Carlo simmulations is not specific for benzene and the code (6.2)-(6.4)
# differs only by the number of fitted parameters. Functions (6.2) - (6.4) use the corresponding
# fit functions (5.2) - (5.4).

# (6.1) Imports and function to plot Monte Carlo results
#---------------------------------------------------------
rng = np.random.default_rng()       # Default numpy pseudo-RNG (PCG algorithm)
                                    # rng.normal(value,sigma) gives normal-distributed sample
                                    # with np.mean = value and np.std = sigma.

# Print / plot how data uncertainties affect parameter values and correlations.
def plotMonteCarloResults(mcSim, name='Monte Carlo Simulation'):
    """ Calculate and plot results for Monte Carlo Simulation.
    """
    nSamples, nCoeff = mcSim.shape
    normalDist = lambda x,x0,sigma: (1 / (np.sqrt(2 * np.pi) * sigma)) *np.exp(-0.5 *((x-x0)/sigma)**2)
    rC,rH,*rest = mcSim.transpose()
    mrC,arC,srC = np.median(rC),np.average(rC),np.std(rC)     # Results for 0-C distance (equivalent to C-C distance)
    mrH,arH,srH = np.median(rH),np.average(rH),np.std(rH)     # Results for C-H distance
    print(f'Monte-Carlo analysis results for {name}')
    print('--------------------------------------------------')
    print(f'Parameters [A]: Average,  Median  (+/- 1-sigma)')
    print(f'            rC: {arC:7.4f}, {mrC:7.4f}  (+/-{srC:.5f})')
    print(f'            rH: {arH:7.4f}, {mrH:7.4f}  (+/-{srH:.5f})')
    if nCoeff > 2:
        rD,*rest = rest
        mrD,arD,srD = np.median(rD),np.average(rD),np.std(rD)           # Results for C-D distance
        print(f'            rD: {arD:7.4f}, {mrD:7.4f}  (+/-{srD:.5f})')
    if nCoeff > 3:
        r13C,*rest = rest
        mr13C,ar13C,sr13C = np.median(r13C),np.average(r13C),np.std(r13C)   # Results for 0-13C distance
        print(f'          r13C: {ar13C:7.4f}, {mr13C:7.4f}  (+/-{sr13C:.5f})')
    if nCoeff > 2:
        print('--------------------------------------------')
        rHD = rH - rD
        mrHD,arHD,srHD = np.median(rHD),np.average(rHD),np.std(rHD)      # Difference of rH and rD distance
        print(f'         rH-rD: {arHD:7.4f}, {mrHD:7.4f}  (+/-{srHD:.5f})')
    if nCoeff > 3:
        rCX = rC - r13C
        mrCX,arCX,srCX = np.median(rCX),np.average(rCX),np.std(rCX)      # Difference of rH and rD distance
        print(f'       rC-r13C: {arCX:7.4f}, {mrCX:7.4f}  (+/-{srCX:.5f})')
    # ----------- Plot distribution of bond lengths
    fig1, axs1 = plt.subplots(2, 2)
    fig1.canvas.manager.set_window_title(name + ' - Parameter distribution')
    n,bins,_ = axs1[0,0].hist((rC-mrC)*1e3,bins=50, density=1)               # Distribution of fit results for r(0-C) in mA
    axs1[0,0].plot(bins, normalDist(bins,0,srC*1000), 'k--', linewidth=2)
    axs1[0,0].set(xlabel=f'r(C-C) [mA]  - {mrC:.4f} A', ylabel='histogram')
    n,bins,_ = axs1[0,1].hist((rH-mrH)*1000,bins=50, density=1)              # Distribution of fit results for r(C-H) in mA
    axs1[0,1].plot(bins, normalDist(bins,0,srH*1000), 'k--', linewidth=2)
    axs1[0,1].set(xlabel=f'r(C-H) [mA]  - {mrH:.4f} A', ylabel='histogram')
    if nCoeff > 2:
        n,bins,_ =  axs1[1,0].hist((rD-mrD)*1000,bins=50, density=1)         # Distribution of fit results for r(C-D) in mA
        axs1[1,0].plot(bins, normalDist(bins,0,srD*1000), 'k--', linewidth=2)
        axs1[1,0].set(xlabel=f'r(C-D) [mA]  - {mrD:.4f} A', ylabel='histogram')
    if nCoeff ==3:
        n,bins,_ =  axs1[1,1].hist((rHD)*1000,bins=50, density=1)            # Distribution of fit results for r(H)-r(D) in mA
        axs1[1,1].plot(bins, normalDist(bins,mrHD*1000,srHD*1000), 'k--', linewidth=2)
        axs1[1,1].set(xlabel=f'r(H) - r(D) [mA]', ylabel='histogram')
    if nCoeff > 4:
        n,bins,_ =  axs1[1,1].hist((r13C-mr13C)*1000,bins=50, density=1)     # Distribution of fit results for r(13C) in mA
        axs1[1,1].plot(bins, normalDist(bins,0,sr13C*1000), 'k--', linewidth=2)
        axs1[1,1].set(xlabel=f'r(13C) [mA]  - {mr13C:.4f} A', ylabel='histogram')
    # --------------- Plot correlation between bond lengths as scatter plots
    fig2, axs2 = plt.subplots(2, 2)
    fig2.canvas.manager.set_window_title(name+' - Parameter correlation')
                            #--- Plot rCH versus rCC
    axs2[0,0].set(xlabel=f'r(C-C) [mA]  - {mrC:.4f} A', ylabel=f'displacement [mA]')
    axs2[0,0].scatter((rC-mrC)*1000, (rH-mrH)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCH - {mrH:.4f}')
    axs2[0,1].set(xlabel=f'r(C-H) [mA]  - {mrH:.4f} A', ylabel=f'displacement [mA]')
    axs2[0,1].scatter((rH-mrH)*1000, (rC-mrC)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCC - {mrC:.4f}')
    if nCoeff > 2:          #--- Add plots versus rCD, rCH-rCD
        axs2[0,0].scatter((rC-mrC)*1000, (rD-mrD)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCD - {mrD:.4f}')
        axs2[0,1].scatter((rH-mrH)*1000, (rD-mrD)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCD - {mrD:.4f}')
        axs2[1,0].set(xlabel=f'r(C-D) [mA] - {mrD:.4f} A', ylabel=f'displacement [mA]')
        axs2[1,0].scatter((rD-mrD)*1000, (rC-mrC)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCC - {mrC:.4f}')
        axs2[1,0].scatter((rD-mrD)*1000, (rH-mrH)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCH - {mrH:.4f}')
    if nCoeff ==3:
        axs2[1,1].set(xlabel=f'r(H) - rD [mA]', ylabel=f'displacement [mA]')
        axs2[1,1].scatter((rH- rD)*1000, (rC-mrC)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCC - {mrC:.4f}')
        axs2[1,1].scatter((rH- rD)*1000, (rH-mrH)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCH - {mrH:.4f}')
        axs2[1,1].scatter((rH- rD)*1000, (rD-mrD)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCD - {mrD:.4f}')
        axs2[1,0].legend()
        axs2[1,1].legend()
    if nCoeff > 3:         #--- Add data for r13C to plots
        axs2[0,0].scatter((rC-mrC)*1000, (r13C-mr13C)*1000, alpha=0.5, s=9, edgecolor='none',label=f'r13C - {mr13C:.4f}')
        axs2[0,1].scatter((rH-mrH)*1000, (r13C-mr13C)*1000, alpha=0.5, s=9, edgecolor='none',label=f'r13C - {mr13C:.4f}')
        axs2[1,0].scatter((rD-mrD)*1000, (r13C-mr13C)*1000, alpha=0.5, s=9, edgecolor='none',label=f'r13C - {mr13C:.4f}')
        axs2[1,1].set(xlabel=f'r(13C) [mA] - {mr13C:.4f} A', ylabel=f'displacement [mA]')
        axs2[1,1].scatter((r13C-mr13C)*1000, (rC-mrC)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCC - {mrC:.4f}')
        axs2[1,1].scatter((r13C-mr13C)*1000, (rH-mrH)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCH - {mrH:.4f}')
        axs2[1,1].scatter((r13C-mr13C)*1000, (rD-mrD)*1000, alpha=0.5, s=9, edgecolor='none',label=f'rCD - {mrD:.4f}')
        axs2[1,0].legend()
        axs2[1,1].legend()
        fig3,ax3 = plt.subplots()
        fig3.canvas.manager.set_window_title(name+' - dHD versus dC13C Parameter correlation')
        ax3.scatter((rH-rD)*1000, (rC-r13C)*1000, alpha=0.5, s=9, edgecolor='none')
        ax3.set(xlabel=f'r(C-H) - r(C-D) [mA]', ylabel=f'r(C-C) - r(C-13C) [mA]')
        fig3.tight_layout()
    axs2[0,0].legend()
    axs2[0,1].legend()
    fig1.tight_layout()
    fig2.tight_layout()
    if nCoeff == 2: fig1.axes[3].remove();fig1.axes[2].remove();fig2.axes[3].remove();fig2.axes[2].remove()

# (6.2) Monte-Carlo Simulation with 2-Parameter (rH, rC) fit function
#-----------------------------------------------------------------------
def bzMonteCarlo_2(isotopeList, samples = 100, printResults='unspecified data'):
    """Perform a Monte Carlo analysis of benzene rCC and rCH rD bond lengths
       based on a list of isotopologues and their rotational constants.
       The format for the isotopologueList is the same as fot the fits above.
       Numpy random.normal(value,sigma) gives the proper value distribution
       based on 1-sigma uncertainties.
    """
    plt.figure()                        # Create new figure for fit residuals
    popt = fitBz_2(isotopeList, printResults=printResults)         # Start values from fit of data
    startValues = {'rH':popt[0], 'rC':popt[1]}
    print(f'\nMonte-Carlo analysis')
    mcSim = []                          # List to hold Monte-Carlo results
    for i in tqdm(range(samples), ascii=' 2468#'):
        fitList_ = []                   # List of isotopologues to fit
        for isotope in isotopeList:     # Generate randomized input values within uncertainties
            fitList_.append((isotope[:2]+ [(rng.normal(isotope[2][0],isotope[2][1]),isotope[2][1])]+isotope[3:]))
        popt = fitBz_2(fitList_, parameters=startValues, printResults=False)  # Perform fit with current values
        mcSim.append(popt)              # Store results in mcSim
    mcSim_ = np.array(mcSim)            # Convert results to numpy array
    plotMonteCarloResults(mcSim_, name=printResults)
    return mcSim_
# Run on our data and the full data list
mc = bzMonteCarlo_2(InList_AB, samples=500, printResults='Heo2021 A,B constants')           # Excellent agreement between direct fit and MC results.
mc = bzMonteCarlo_2(fullList_AB, samples=500, printResults='All Data A,B constants')       # The propagated uncertainties are in the micro-A range.
mc = bzMonteCarlo_2(KunishigeList, samples=500, printResults='Kunishige A,B,C constants')

# (6.3) 3-Parameter (rCC,rCH,rC) function to perform Monte-Carlo Simulation
#----------------------------------------------------------------------------
def bzMonteCarlo_3(isotopeList, samples = 100, printResults='unspecified data'):
    """Perform a Monte Carlo analysis of benzene rCC, rCH, and rCD bond lengths
       based on a list of isotopologues and their rotational constants.
    """
    plt.figure()                        # Create new figure for fit residuals
    popt = fitBz_3(isotopeList, printResults=printResults)         # Start values from fit of data
    startValues = {'rH':popt[0], 'rC':popt[1], 'rD':popt[2]}
    print(f'\nMonte-Carlo analysis')
    mcSim = []                          # List to hold Monte-Carlo results
    for i in tqdm(range(samples), ascii=True):
        fitList_ = []                   # List of isotopologues to fit
        for isotope in isotopeList:     # Generate randomized input values within uncertainties
            fitList_.append((isotope[:2]+ [(rng.normal(isotope[2][0],isotope[2][1]),isotope[2][1])]+isotope[3:]))
        popt = fitBz_3(fitList_, parameters=startValues, printResults=False)  # Perform fit with current values
        mcSim.append(popt)              # Store results in mcSim
    mcSim_ = np.array(mcSim)            # Convert results to numpy array
    plotMonteCarloResults(mcSim_, name=printResults)
    return mcSim_
mc = bzMonteCarlo_3(InList_AB, samples=500, printResults='Heo2021 A,B constants')           # Note the strong linear correlation between parameters.
mc = bzMonteCarlo_3(fullList_AB, samples=500, printResults='All Data A,B constants')       #
mc = bzMonteCarlo_3(fullList_ABC_w0, samples=500, printResults='All Data A,B,C constants')      #
mc = bzMonteCarlo_3(KunishigeList, samples=500, printResults='Kunishige A,B,C constants')

# (6.4) 4-Parameter (rCC,rCH,rC,r13C) function to perform Monte-Carlo Simulation
#---------------------------------------------------------------------------------
def bzMonteCarlo_4(isotopeList, fHC=0.5, samples=100, printResults='unspecified data'):
    """Perform a Monte Carlo analysis of benzene rC, rH, and rD distance-to-COM
       based on a list of isotopologues and their rotational constants.
       The format for the isotopologueList corresponds to 'iList', above.
       Numpy random.normal(value,sigma) gives the proper value distribution
       based on 1-sigma uncertainties.
    """
    plt.figure()                        # Create new figure for fit residuals
    if fHC:                             # Fit with fixed fHC
        popt = fitBz_4(isotopeList, fHC=fHC, printResults=printResults)         # Start values from fit of data
        startValues = {'rH':popt[0], 'rC':popt[1], 'rD':popt[2], 'r13C':popt[3]}
    else:                               # Fit with fHC as fit parameter
        popt = fitBz_41(isotopeList, printResults=printResults)         # Start values from fit of data
        startValues = {'rH':popt[0], 'rC':popt[1], 'rD':popt[2], 'r13C':popt[3], 'fHC':popt[4]}
    print(f'\nMonte-Carlo analysis')
    print(f'\nMonte-Carlo analysis')
    mcSim = []                          # List to hold Monte-Carlo results
    failed = 0
    for i in tqdm(range(samples), ascii=True):
        fitList_ = []                    # List of isotopologues to fit
        for isotope in isotopeList:     # Generate randomized input values within uncertainties
            fitList_.append((isotope[:2]+ [(rng.normal(isotope[2][0],isotope[2][1]),isotope[2][1])]+isotope[3:]))
        try:
          if fHC:                       # Fit with fixed fHC
            popt = fitBz_4(fitList_, parameters=startValues, printResults=False, fHC=fHC)  # Perform fit with current values
          else:
            popt = fitBz_41(fitList_, parameters=startValues, printResults=False)  # Perform fit with current values
          mcSim.append(popt)           # Store results in mcSim            mcSim.append(popt)        # Store results in mcSim
        except: failed +=1
    mcSim_ = np.array(mcSim)            # Convert results to numpy array
    print(f'Failed fit attempts: {failed}')
    plotMonteCarloResults(mcSim_, name=printResults)
    return mcSim_
mc = bzMonteCarlo_4(InList_AB, samples=300, fHC=0.9, printResults='Heo2021 A,B constants')        # Note the strong linear correlation between parameters.
mc = bzMonteCarlo_4(fullList_AB, samples=200, fHC=0.9, printResults='All Data A,B constants')      #
mc = bzMonteCarlo_4(fullList_AB, samples=200, fHC=False, printResults='All Data A,B constants')      #
# Results of the Monte Carlo Anlysis:
# In most cases, the propagation of experimental 1-sigma uncertainties gives significantly smaller
# uncertainties than the fit uncertainties. This indicates that the experimental uncertainties are
# small as compared to model errors or systematic errors in the measurements. Note that the errors
# are much closer when much less precise C-axis constants are included in the fit (fullList_ABC_w0 fit).
# We see a very strong linear correlation between the fitted bond lengths.
# The uncertainty distribution in the fits is close to normal.


# SECTION (7)
#----------------------------------------------#
#--- Monte-Carlo analysis of synthetic data ---#
#----------------------------------------------#
# To illustrate the limitation to retrieve a number of structural parameters exceeding
# the number of 'unique' isotopologues, I fit synthetic data. I expect:
# Data for isotopologues containing C,H,D will allow to reliably fit 2 geometry parameters
# but will not reliably retrieve 3 geometry parameters.
# Data for isotopologues containing C,H,D,13C will allow to reliably fit 3 geometry parameters
# but will not reliably retrieve 4 geometry parameters.
# (Within C6 symmetry, any deuterated benzene isotopologue contributes the same
# information and adding more isotopologue data does not help to fit additional parameters.)

# (7.1) Synthetic data for the fits
#------------------------------------
# Start with ab initio structure parameters from section (2.1)
rCC = 1.394208                  # Distance of carbon from COM (ab initio value from section 2)
rH = 2.476418                   # Distance of hydrogen from COM (ab initio value from section 2)
rCH = rH-rCC                    # C-H bond length rCH = 1.08221 A
rCD = rCH-0.004                 # assume 4 mA contracted C-D bond, rCD = 1.07821 A
rC13C = rCC-0.0002              # assume 0.2 mA contracted C-13C bonds with rCC = 1.394008 A
_I2M  = lambda I: [505379.00914143625/i for i in I]    # amu*A**2 to MHz
sigmas = (1e-2,1e-2,1e-2)       # Assume experimental uncertainties of 0.01 MHz for MC simulation
# Synthetic data including H,D and C,13C isotopologues
synListA = [         # List for H/D, C/13C isotopologues, MOI, based on the assumed bond lengths.
 ['CCCCCCHHHHHH',[_I2M(bzMOI_4('CCCCCCHHHHHH', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['CCCCCCDDDDDD',[_I2M(bzMOI_4('CCCCCCDDDDDD', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['CCCCCCDHHHHH',[_I2M(bzMOI_4('CCCCCCDHHHHH', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['CCCCCCDHDHHH',[_I2M(bzMOI_4('CCCCCCDHDHHH', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['CCCCCCDDDHHH',[_I2M(bzMOI_4('CCCCCCDDDHHH', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['XCCCCCHHHHHH',[_I2M(bzMOI_4('XCCCCCHHHHHH', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['XCCCCCDDDDDD',[_I2M(bzMOI_4('XCCCCCDDDDDD', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic']]
synListA_e= expandFitList(synListA)
# Synthetic data including H,D isotopologues only
synListB = [         # List for H/D isotopologues, MOI, based on the assumed bond lengths.
 ['CCCCCCHHHHHH',[_I2M(bzMOI_4('CCCCCCHHHHHH', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['CCCCCCDDDDDD',[_I2M(bzMOI_4('CCCCCCDDDDDD', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['CCCCCCDHHHHH',[_I2M(bzMOI_4('CCCCCCDHHHHH', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['CCCCCCDHDHHH',[_I2M(bzMOI_4('CCCCCCDHDHHH', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['CCCCCCDDDHHH',[_I2M(bzMOI_4('CCCCCCDDDHHH', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['CCCCCCDDDDHH',[_I2M(bzMOI_4('CCCCCCDDDDHH', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],
 ['CCCCCCDDDDDD',[_I2M(bzMOI_4('CCCCCCDDDDDD', rCC,rCH,rCD,rC13C)), sigmas], [1,1,1], 'synthetic'],]
synListB_e= expandFitList(synListB)

# (7.2) Two parameter (rCC,rCH) fits and Monte Carlo simulations
#-----------------------------------------------------------------
mcSim = bzMonteCarlo_2(synListA_e, samples=100, printResults='synListA with H/D, C/13C isotopologues')
mcSim = bzMonteCarlo_2(synListB_e, samples=100, printResults='synListB with H/D isotopologues')

# (7.3) Three parameter (rCC,rCH,rC) fits and Monte Carlo simulations
#----------------------------------------------------------------------
mcSim = bzMonteCarlo_3(synListA_e, samples=100, printResults='synListA with H/D, C/13C isotopologues')
mcSim = bzMonteCarlo_3(synListB_e, samples=100, printResults='synListB with H/D isotopologues')

# (7.4) Four parameter (rCC,rCH,rC,r13C) fits and Monte Carlo simulations
#--------------------------------------------------------------------------
mcSim = bzMonteCarlo_4(synListA_e, fHC=0.5, samples=100, printResults='synListA with H/D, C/13C isotopologues, fHD=0.5')
# Isotope list A (4 distinct isotopes in molecules) allows to retrieve up to 3 parameters (rC,rH,rD),
# But small 12C/13C isotope effect leads to significant errors in H/D bond lengths when rC=r13C is presumed!
# Convergence becomes problematic and parameter retrieval unreliable for 4 parameters; result also depends on
# assumption of fHC (which really should be 0 for retrieval of the assumed values - but then
# Isotope list B (3 distinct isotopes in molecules) allows to retrieve 2 parameters (rC,rH),
# but convergence becomes problematic and parameter retrieval unreliable for 3 parameters (rC,rH,rD).
# Note that the fit retrieves the çorrect'values, but the Monte Carlo simulation reveals
# that the fit is not robust.


# SECTION (8)
#----------------------------------------------------------------#
#--- Example code for mass-weighted rovibrational corrections ---#
#----------------------------------------------------------------#
# The general concepts used for fitting r0 geometries above is readily
# adapted for other types of fit, such as the fit of rs subsitution structures,
# and Watson's first order (rm(1)) and second order (rm(2)) mass-weighted
# rovibrational corections. All fits minimize (Iexp_g-Icalc_g) for axes g=a,b,c.
# These fits differ in the definition of the molecular inertial moment Icalc:
# r0 fits:  Ir = sum(m*r_g**2) (rigid-rotor inertial moment).
# rs fits:  Is = Ir + c_g  (constant correction factor c_g).
# rm1 fits: Im1 = Ir + c_g*sqrt(Ir)  (1st-order mass-weighed correction).
# rm2 fits: Im2 = Im1 + d_g*prod(m)/sum(m))**(1/(2*N-2))  (2nd-order mass-weighed correction).
# These fits are implemented more efficiently, with a simple graphical user
# interface, with symmetry-dependent choice of c,d correction terms, and with
# corrections for isotopologue rotational axis rotations in 'FitMOI.py'. Here
# I just give an example on how to implement such a fit.

# Inertial moment calculation for Watson's rm2 fit.
def bzMOI_rm2(isotopologue, rC,rCH,ca,cc,da,dc, printResults=False):
    """ Benzene MOI from isotopic composition and rCH,rC, using Watson's
        first order (c-Term) and second order (d-Term) mass-weighted corrections.
        Here, I ignore principal axis rotation and, because benzene is a planar
        oblate top, I set cb=ca and db=da.
        'isotopologue' lists isotopes (C,X=13C,H,D) for all
        12 atoms in clockwise direction, e.g. ['CCCCCCDHDHHH'] for m-2D-benzene.
        I substituted sin(30/180*pi)=1/2."""
    global NISTmass
    global cos30
    rH = rCH + rC               # rH is distance of H to center of molecule
    _m = [NISTmass[atom] for atom in isotopologue]  # masses from NIST table
    C1 = [   rC,         0, 0]  # x,y,z coordinates for C1
    C2 = [ rC/2,  rC*cos30, 0]  # x,y,z coordinates for C2
    C3 = [-rC/2,  rC*cos30, 0]  # ...
    C4 = [  -rC,         0, 0]
    C5 = [-rC/2, -rC*cos30, 0]
    C6 = [ rC/2, -rC*cos30, 0]
    H1 = [   rH,         0, 0]  # x,y,z coordinates for H1
    H2 = [ rH/2,  rH*cos30, 0]  # x,y,z coordinates for H2
    H3 = [-rH/2,  rH*cos30, 0]  # ...
    H4 = [  -rH,         0, 0]
    H5 = [-rH/2, -rH*cos30, 0]
    H6 = [ rH/2, -rH*cos30, 0]
    r_i = np.array([C1,C2,C3,C4,C5,C6,H1,H2,H3,H4,H5,H6])
    _PMI = PMI(_m,r_i , printResults=printResults)  # Calculate principal moments (MOI)
    cTerm = npa([ca,ca,cc])*np.sqrt(_PMI)           # First order (rmI) correction term (only diag. elements)
                                                    # For rs fit, replace with cTerm = npa([ca,ca,cc])
    dTerm = npa([da,da,dc])*(np.prod(_m)/np.sum(_m))**(1/(2*len(_m)-2)) # Second order (rmII) correction terms for a,b,c axes
                                                    # For rs, rm1 fits, remove dTerm and da,dc parameters.
    Irm2 = _PMI + cTerm + dTerm                     # Inertial moment with rm1 and rm2 correction terms
    return np.array(Irm2)                           # Return A,B,C-axis MOI for fit
# Watson's rm2 fit.
def fit_rm2(isotopeList, parameters={'rC':1.4,'rCH':1.,'ca':0.01,'cc':0.01,'da':0.01,'dc':0.01}, printResults='unspecified data'):
    """Fit of rCC, rCH bond length and mass-weighted c,d rovibrational correction terms.
    """
    pNames = list(parameters.keys())                          # Unpack parameter names
    startValues = list(parameters.values())                   # ... and start values
  # The Residuals function will be minimized; (the difference between measured and calculated MOI)
  # Residuals =                     I_exp     -  I_calc(atoms,  r)[axis]    * weight
    Residuals = lambda r,isoList: [(iso[2][0] - bzMOI_rm2(iso[0],*r)[iso[1]]) * iso[3] for iso in isoList]
    Fit = leastsq(Residuals, startValues, args=(isotopeList), full_output=1)  # Fit optimization of parameters
    (popt, pcov, infodict, errmsg, ier) = Fit                 # Unpack fit results
    if printResults:                                          # Optional: print and plot results
        title = f"\nWatson's rs2 fit of benzene inertial moments, {printResults}"
        print(title + '\n' + '-'*len(title))
        fitResiduals = npa(Residuals(popt,isotopeList))       # Uncertainty calculation ...
        #fitResiduals = infodict['fvec'] # -- Same residual data
        s_sq = (fitResiduals**2).sum() / (len(isotopeList)-len(popt)) # ... reduced chi**2
        rcov = pcov * s_sq                                    # ... covariances (cf. scipy.optimize.curve_fit)
        pSigma = np.sqrt(np.diag(rcov))                       # ... estimated 1-sigma parameter uncertainties
        pCostain = [np.sqrt((0.0015/r)**2 + u**2) for u,r in zip(pSigma,popt)]  # Costain errors
        fitResults = [f'{n:>8} = {p:.4f}({s:.4f}) [{sC:.4f}]' for n,p,s,sC in zip(pNames,popt,pSigma,pCostain)]
        print('Fitted parameters [A] (1-sigma uncertainties) [Costain uncertainties]:\n'+'\n'.join(fitResults))
        plotFitResults(isotopeList, fitResults[:2], fitResiduals, title=f"Watson's rs2 fit, {printResults}")
    return popt
f= fit_rm2(fullList_ABC, printResults='All Data A,B constants')   # Need to fit a,b,c, axis to obtain cc,dc parameters