# =============================================================================
# begin of file *script4.py
# =============================================================================
"""Micro plastic analysis.
"""

# Changes:
# - pep8 editing: structure, blank lines, spaces, line length,
#                 line continuation
# - unified file names
# - some python: x < i <= y, for instead of while
# - little rearranging

# script3:
# - build command line tool
# - precalc image size
# - define common fcns
# - split analysis and main fcn for gui use

# script4:
# - use os.path for file name manipulations
# - renaming: global->bulk, local->particle
# - create analyse_data fcn
# - write images as 8bit inverted

# module imports
# =============================================================================

# standard library imports
import os.path                     # for path operations

# external third party imports
import numpy as np                 # for scientific computing
import pandas as pd                # for data anlysis
import SimpleITK as sitk           # for image analysis

# local imports
from tools_for_MP_analysis import closing4x4pixel

# constants, global vars
# =============================================================================

# Generation of dataframe for size analysis
_C = ['single'] + [str(i) for i in range(25, 525, 25)] + ['large']
_S = len(_C)*[0]
EMPTY = pd.DataFrame(_S, index=_C, columns=['s'])

# Polymer class dependent storage (polymer, bio or other)
# This type of code is dependent on the database!
# Database design is the reason for all the if cases

BULK_SET = set(list(range(1, 60)))
OTHER_SET = {5, 19, 33}
BIO_SET = {13, 14, 15, 16, 17, 18, 21, 51, 53, 57}
POLYMER_SET = BULK_SET - OTHER_SET - BIO_SET

# fcn defs
# =============================================================================

def data2image(data, size=None):
    """Create gray value image from data frame.

    data: data frame with columns 'x', 'y', 'g'
    size: tuple of width and height

    image: size width x height image
    """
    if size is None:
        width, height = max(data.x) + 1, max(data.y) + 1
    else:
        width, height = size

    # create empty image of given size and data type (enough for 11-bit)
    image = sitk.Image(int(width), int(height), sitk.sitkUInt16)
    for idx in data.index:
        try:
            image[int(data.x[idx]), int(data.y[idx])] = int(data.g[idx])
        except:
            #print('An error occured. Not all Pixels set!')
            pass

    return image


def write_image(img, name):
    Img = sitk.RescaleIntensity(img) #, outputMinimum=0, outputMaximum=255)
    Img = sitk.Cast(Img, sitk.sitkUInt8)
    #Img = sitk.InvertIntensity(Img)
    sitk.WriteImage(Img, name)
    return


def analyse_bulk_data(data, size, name, save=False):
    """Analyse bulk data.

    Create polymer histogram and gray value image of global dataframe.

    data: data frame with columns 'x', 'y', 'g' and 'p'
    size: tuple of width and height
    name: name of data

    Return None.
    """
    # Write data in csv file
    if save:
        data.to_csv(name+"_image.csv", sep=';')

    # Create gray value image and write in png file
    img = data2image(data, size=size)
    save_name = name+"_all.png"
    print(save_name)
    write_image(img, save_name)
    
    # Determine polymer histogram and write in csv file
    p = data.p.value_counts().sort_index()
    p.to_csv(name+"_histogram.csv", sep=';')
    return


def analyse_particle_data(data, size, name, p):
    """Analyse particle data.

    Create particle gray value image, connected components histogram and
    data file.

    data:
    size:
    name:
    p:

    Return component histogram.
    """
    # Create gray value image and write in png file
    img = data2image(data, size=size)
    save_name = "%s%02d.png" % (name, p)
    write_image(img, save_name)

    # Find connected components with 8 pixel neighboorhood
    cimg = sitk.ConnectedComponent(img, True)
    rimg = sitk.RelabelComponent(cimg)

    # Append component column to data
    c = pd.Series(np.zeros(len(data)), index=data.index, name='c') # temporary
    data_c = pd.concat([data, c], axis=1)
    for i in data_c.index:
        x, y = int(data_c.x[i]), int(data_c.y[i])
        data_c.at[i, 'c'] = rimg[x, y]
    save_name = "%s_image%02d.csv" % (name, p)
    data_c.to_csv(save_name, sep=';')

    # Determine particle components histogram and write in csv file
    counts = data_c.c.value_counts()
    hist_c = counts.value_counts().sort_index()
    save_name = "%s_particle_histogram%02d.csv" % (name, p)
    hist_c.to_csv(save_name, sep=';')
    return hist_c


def size_analysis(hist_c, name, p):
    # Size class analysis
    sb = EMPTY.copy()
    for sidx in hist_c.index:
        area = 10.74*10.74*sidx
        # for 1 pixel
        j = 1
        counted = False
        sizeclass = 10.74*10.74
        if area == sizeclass:
            sb.s[j-1] = hist_c[sidx]
            counted = True
        j = j + 1
        # for values higher 1 pixel
        while j < 21 and not counted:
            sizeclass = (25*(j-1))*(25*(j-1))
            if area <= sizeclass:
                sb.s[j-1] += hist_c[sidx]
                counted = True
            j = j + 1
        # if still higher it will be targeted in an extra field
        if j == 22 and not counted:
            sb.s[j-1] += hist_c[sidx]
            counted = True
    # Still to introduce Blank correction!
    # Data output
    save_name = "%s_sizeclasses%02d.csv" % (name, p)
    sb.to_csv(save_name, sep=';')
    return sb
    
    
def analyse_data(data, img_size, name):
    
    analyse_bulk_data(data, img_size, name)

    print("  -- Analyse particles")

    # The different countings
    bulk = EMPTY.copy()
    polymer, bio, other = EMPTY.copy(), EMPTY.copy(), EMPTY.copy()

    #Particle analyses without closing
    hist_dict = {}
    for p in range(1, 60):
        data_p = data[data.p == p]
        if len(data_p.x) > 0:  # if data is not empty
            hist_c = analyse_particle_data(data_p, img_size, name, p)
            hist_dict[p] = hist_c

            sb = size_analysis(hist_c, name, p)

            bulk += sb
            if p in POLYMER_SET:
                polymer += sb
            if p in OTHER_SET:
                other += sb
            if p in BIO_SET:
                bio += sb
        else:
            pass
            # for later studies

    print("- ...done")

    # Data output
    bulk.to_csv(name+"_sizeclasses_bulk.csv", sep=';')
    polymer.to_csv(name+"_sizeclasses_polymer.csv", sep=';')
    bio.to_csv(name+"_sizeclasses_bio.csv", sep=';')
    other.to_csv(name+"_sizeclasses_other.csv", sep=';')
    
    return
    

def get_base_name(file_name):
    """Split file name and return base name part.

    file_name: a name of a file in the system

    base_name: base name of the file without dir and ext
    """
    dir_name, base_name = os.path.split(file_name)
    base_name, ext = os.path.splitext(base_name)
    return base_name


# class defs
# =============================================================================


# main fcn
# =============================================================================
def analyse(file_name, dir_name):
    """Data anlysis"""

    # get base name of data file
    base_name = get_base_name(dir_name)+'/'+get_base_name(file_name) 

    # open data file as pandas data frame
    original_data = pd.read_csv(file_name, delimiter=';')

    # get maximum image size
    img_size = max(original_data.x) + 1, max(original_data.y) + 1

    # -------------------------------------------------------------------------
    print("- Analyse original data")
    
    data = original_data
    
    # set save name head
    name = os.path.join(base_name + "_Polymer")
    name = os.path.normcase(name)
    
    analyse_data(data, img_size, name)

    # -------------------------------------------------------------------------

    print("- Closing data")
    closed_data = closing4x4pixel(original_data, 5) # with 5 iterations
    print("- ... done")

    # -------------------------------------------------------------------------
    print("- Analyse closed data")
    
    data = closed_data

    # set save name head
    name = os.path.join(base_name + "_closed_Polymer")
    name = os.path.normcase(name)

    analyse_data(data, img_size, name)

    return


# main fcn
# =============================================================================
def main():
    """Main analysis function
    """
    # get name of data file
    #data_file_name = input('data file? ') 
    data_file_name = "./36x36binning16.csv"
    # set result directory
    results_dir_name = "results2/"

    analyse(data_file_name, results_dir_name)

    return


# script entry
# =============================================================================
if __name__ == "__main__":
    main()

# =============================================================================
# end of file *script4.py
# =============================================================================
