import matplotlib.dates as mdates
from scipy.interpolate import griddata
import pandas as pd
import pdb
import numpy as np
import matplotlib.pyplot as plt 
#from datetime import datetime, timedelta
import datetime
import math
import argparse
import re
import xml.etree.ElementTree as ET
import gsw
import sys


def read_eng(input_filename, level0_filename):
   
   # Read the engineering file into a dataframe
   df = pd.read_csv(
       input_filename,
       skiprows=[0],
       names=['Time','Temperature','Conductivity','Pressure','V1','V2','V3','V4'],
       delimiter=r'[;,]',  # Specify delimiter if needed
       na_values='NAN',  # Handle missing values
       dtype={'RECORD': int},  # Specify data types if necessary
       engine='python'
   )
   
   if df['Time'].str.contains('T').all():
      df.rename(columns={'Time': 'DateTime'}, inplace=True)
   else: 
      # Extract the date from the filename
      # Use regular expression to find the date pattern (YYYY-MM-DD)
      match = re.search(r'\d{4}-\d{2}-\d{2}', input_filename)

      if match:
         date_str = match.group(0)
         print("Extracted date:", date_str)
         df['Date'] = date_str
         df['DateTime'] = pd.to_datetime(df['Date'] + "T" + df['Time'])
      else:
         print("Date not found in filename.")   
   
   df_eng = df[['DateTime','Temperature','Conductivity','Pressure','V1','V2','V3','V4']]
   
   # Write output to csv file
   df_eng.to_csv(level0_filename, sep=",", index=False)

   return df_eng

def find_elements_by_tag(data, tag):
    matches = []
    if data['tag'] == tag:
        matches.append(data)
    for child in data['children']:
        matches.extend(find_elements_by_tag(child, tag))
    return matches

# Recursive function to process each element
def parse_element(element):
    data = {
        "tag": element.tag,
        "attributes": element.attrib,
        "text": element.text.strip() if element.text else "",
        "children": []
    }
    # Process child elements recursively
    for child in element:
        data["children"].append(parse_element(child))
    return data

# Oxygen solubility function (Garcia and Gordon, 1992)
def oxsat(T, S):
    # Coefficients for oxygen solubility in seawater
    A0 = 2.00907
    A1 = 3.22014
    A2 = 4.05010
    A3 = 4.94457
    A4 = -0.256847
    A5 = 3.88767
    B0 = -0.00624523
    B1 = -0.00737614
    B2 = -0.0103410
    B3 = -0.00817083
    C0 = -0.000000488682
    
    Ts = np.log((298.15 - T) / (273.15 + T))  # Scaled temperature
    O2sol = np.exp(
        A0 + A1*Ts + A2*Ts**2 + A3*Ts**3 + A4*Ts**4 + A5*Ts**5 +
        S * (B0 + B1*Ts + B2*Ts**2 + B3*Ts**3) +
        C0 * S**2
    )
    return O2sol

def read_xmlcon(xmlcon_filename):
   # Parse the XML file
   tree = ET.parse(xmlcon_filename)
   root = tree.getroot()

   # Parse the root element
   xml_data = parse_element(root)

   # initialise a list to store the results
   sensor_cals = []

   file_version = xml_data['attributes'].get('SB_ConfigCTD_FileVersion')
   instrument_name = xml_data['children'][0]['children'][0]['text']

   for sensor in xml_data['children'][0]['children'][-1]['children']:
      sensor_type = sensor['children'][0]['tag']
      sensor_id = sensor['attributes'].get('SensorID')
      sensor_index = sensor['attributes'].get('index')
      calibration_date = next(
          (child['text'] for child in sensor['children'][0]['children'] if child['tag'] == 'CalibrationDate'), None
      )
      #print(f"Sensor Type: {sensor_type}, Sensor ID: {sensor_id}, Calibration Date: {calibration_date}")
      ScaleFactor = next(
          (child['text'] for child in sensor['children'][0]['children'] if child['tag'] == 'ScaleFactor'), None
      )
      #print(f"Sensor Type: {sensor_type}, Sensor ID: {sensor_id}, ScaleFactor: {ScaleFactor}")
      DarkVoltage = next(
          (child['text'] for child in sensor['children'][0]['children'] if child['tag'] == 'DarkVoltage'), None
      )
      Vblank = next(        
          (child['text'] for child in sensor['children'][0]['children'] if child['tag'] == 'Vblank'), None
      )
      #print(f"Sensor Type: {sensor_type}, Sensor ID: {sensor_id}, DarkVoltage: {DarkVoltage}")
      calibration_coefficients = next(        
          (child['text'] for child in sensor['children'][0]['children'] if child['tag'] == 'CalibrationCoefficients'), None
      )

      # Find Calibration Coefficients
      calibration_coefficients = None
      soc_values = []  # Initialize an empty lists 
      offset_values = []
      A_values = []
      B_values = []
      C_values = []
      D0_values =[]
      D1_values =[]
      D2_values =[]
      E_values =[] 
      Tau20_values = []
      H1_values =[] 
      H2_values =[] 
      H3_values =[] 
    
      # Loop through the children to find CalibrationCoefficients elements
      for child in sensor['children'][0]['children']:
         if child['tag'] == 'CalibrationCoefficients':
            calibration_coefficients = True  # Set to True if any calibration coefficients exist

            # Find the Soc value within CalibrationCoefficients
            soc = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'Soc'), None)
            offset = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'offset'), None)
            A = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'A'), None)
            B = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'B'), None)
            C = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'C'), None)
            D0 = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'D0'), None)
            D1 = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'D1'), None)
            D2 = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'D2'), None)
            E = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'E'), None)
            Tau20 = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'Tau20'), None)
            H1 = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'H1'), None)
            H2 = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'H2'), None)
            H3 = next((coeff['text'] for coeff in child['children'] if coeff['tag'] == 'H3'), None)

            if soc:
               soc_values.append(soc)  # Add Soc value to list
            if offset:
               offset_values.append(offset)   
            if A:
               A_values.append(A)   
            if B:
               B_values.append(B)   
            if C:
               C_values.append(C)   
            if D0:
               D0_values.append(D0)   
            if D1:
               D1_values.append(D1)   
            if D2:
               D2_values.append(D2)   
            if E:
               E_values.append(E)   
            if Tau20:
               Tau20_values.append(Tau20)   
            if H1:
               H1_values.append(H1)   
            if H2:
               H2_values.append(H2)   
            if H3:
               H3_values.append(H3)   

      # Create a dictionary to hold this sensor's data
      sensor_info = {
          'sensor_type': sensor_type,
          'sensor_id': sensor_id,
          'sensor_index': sensor_index,
          'calibration_date': calibration_date,
          'scale_factor': ScaleFactor,
          'dark_voltage': DarkVoltage,
          'Vblank': Vblank,
          'calibration_coefficients': calibration_coefficients,
          'Soc': soc_values,
          'offset': offset_values,
          'A': A_values,
          'B': B_values,
          'C': C_values,
          'D0': D0_values,
          'D1': D1_values,
          'D2': D2_values,
          'E': E_values,
          'Tau20': Tau20_values,
          'H1': H1_values,
          'H2': H2_values,
          'H3': H3_values
      }
      
      # Append the sensor's data to the sensor_data list
      sensor_cals.append(sensor_info)

   return sensor_cals

def create_level2(df_calibrated):

   df_level2 = pd.DataFrame()
   
   max_depth = float(df_calibrated['Depth'].max())
   min_depth = 0.5
   # Step 1: Define bin size (0.25 meters)
   bin_size = 0.25
   half_bin = bin_size / 2

   # Step 2: Calculate bin centers from 0.25 up to the maximum depth
   bin_centers = np.arange(min_depth, max_depth, bin_size)

   # Step 3: Calculate bin edges by creating intervals around each center
   bin_edges = np.concatenate(([bin_centers[0] - half_bin], bin_centers + half_bin))

   # Step 4: Create labels for each bin, centered on 0.25, 0.5, 0.75, etc.
   bin_labels = [f"{center - half_bin:.2f}-{center + half_bin:.2f}m" for center in bin_centers]

   # Step 5: Assign each depth value to a bin based on the centered bins
   df_calibrated['DepthBin'] = pd.cut(df_calibrated['Depth'], bins=bin_edges, labels=bin_centers, right=False)

   # Step 6: Group by 'DepthBin' and calculate the mean for each bin
   df_level2 = df_calibrated.groupby('DepthBin').mean().reset_index()
   
   # Get the date and time information from the original dataset
   DATETIME = df_calibrated['DateTime'][0]

   df_level2['DateTime'] = DATETIME

   return df_level2


def main(input_filename, xmlcon_filename, level0_filename, level1_filename):

   uM_conv = 44.661

   df_eng = read_eng(input_filename, level0_filename)

   cals = read_xmlcon(xmlcon_filename)

   # convert the temperature, conductivity, pressure into 
   # temperature, salinity, depth
   # Calculate salinity (PSU) from conductivity, temperature, and pressure
   df_eng['Salinity'] = gsw.SP_from_C(df_eng['Conductivity']*10., df_eng['Temperature'], df_eng['Pressure'])
   
   # calculate depth (m) from pressure (and latitude)
   latitude = 50.25
   df_eng['Depth'] = -gsw.z_from_p(df_eng['Pressure'], latitude)
   
   # Absolute Salinity (SA)
   longitude = -4.0
   SA = gsw.SA_from_SP(np.array(df_eng['Salinity']), np.array(df_eng['Pressure']), longitude, latitude)

   # Step Conservative Temperature (CT)
   CT = gsw.CT_from_t(SA, np.array(df_eng['Temperature']), np.array(df_eng['Pressure']))

   # Step 4: Density (rho)
   rho = gsw.rho(SA, CT, np.array(df_eng['Pressure']))/1000.

   # Apply the calibrations to the raw voltages for the sensors other than T, C, P
   # Loop over the calibration information - the order they appear in the 
   # XMLCON file is the order they are in the data
   for cal in cals:
      #print(f"Sensor Type: {cal['sensor_type']}")
      #print(f"Sensor Index: {cal['sensor_index']}")
      columnID = {cal['sensor_index']}
      columnID = int(next(iter(columnID))) + 1
      #print(df_eng.iloc[:,columnID])
      if cal['sensor_type'] == "OxygenSensor":
         df_eng['Oxygen_Cal'] = df_eng.iloc[:,columnID]
         # Calculate oxygen solubility
         Oxsat_value = oxsat(df_eng['Temperature'], df_eng['Salinity'])
         #print(cal)
         H1 = float(cal['H1'][0])
         H2 = float(cal['H2'][0])
         H3 = float(cal['H3'][0])

         # Dynamic response correction
         response_correction = 1 + H1 * df_eng['Temperature'] + H2 * df_eng['Temperature']**2 + H3 * df_eng['Temperature']**3
         #print(response_correction)

         # Solubility correction
         E = float(cal['E'][0])
         solubility_correction = 1 + E * (df_eng['Temperature'] - 20)
         #print(solubility_correction)

         # Apply the calibration equation
         Soc = float(cal['Soc'][1])
         offset = float(cal['offset'][1])
         A = float(cal['A'][0])
         B = float(cal['B'][0])
         C = float(cal['C'][0])
         D0 = float(cal['D0'][0])
         D1 = float(cal['D1'][0])
         D2 = float(cal['D2'][0])
         
         # convert from ml/l to uM/kg
         
         
         df_eng['Oxygen_Cal'] = (
            Soc * (df_eng.iloc[:,columnID] + offset) *
            (1.0 + A * df_eng['Temperature'] + B * df_eng['Temperature']**2 + C * df_eng['Temperature']**3) *
            Oxsat_value *
            np.exp((E * df_eng['Pressure']) / (df_eng['Temperature'] + 273.15)) * 
            (uM_conv/rho)
         )
         
      if cal['sensor_type'] == "FluoroWetlabCDOM_Sensor":
         Vblank = float(next(iter({cal['Vblank']})))
         scale_factor = float(next(iter({cal['scale_factor']})))
         df_eng['FluoroWetlabCDOM_Cal'] = scale_factor*(df_eng.iloc[:,columnID] - Vblank)
      if cal['sensor_type'] == "FluoroWetlabECO_AFL_FL_Sensor":
         Vblank = float(next(iter({cal['Vblank']})))
         scale_factor = float(next(iter({cal['scale_factor']})))
         df_eng['FluoroWetlabECO_AFL_FL_Cal'] = scale_factor*(df_eng.iloc[:,columnID] - Vblank)
      if cal['sensor_type'] == "TurbidityMeter":
         dark_voltage = float(next(iter({cal['dark_voltage']})))
         scale_factor = float(next(iter({cal['scale_factor']})))
         df_eng['Turbidity_Cal'] = scale_factor*(df_eng.iloc[:,columnID] - dark_voltage)
         
   return df_eng

if __name__=='__main__':
   parser = argparse.ArgumentParser(
   description=__doc__,
   formatter_class=argparse.RawDescriptionHelpFormatter
   )

   parser.add_argument("-i", "--ifile", type=str, required=True, help="--ifile <Input Filename>")
   parser.add_argument("-L0", "--L0", type=str, required=True, help="--L0 <Level0 Output Filename>")
   parser.add_argument("-L1", "--L1", type=str, required=True, help="--L1 <Level1 Output Filename>")
   parser.add_argument("-L2", "--L2", type=str, required=True, help="--L2 <Level2 Output Filename>")
   parser.add_argument("-xmlcon", "--xmlcon", type=str, required=True, help="--xmlcon <XMLCON Filename>")

   args = parser.parse_args()

   if args.ifile:
      INFILE = args.ifile
      print("Input file:", INFILE)
   else:
      print("Input engineering file must be specified")
      sys.exit(0)
      
   if args.xmlcon:
      XMLCON = args.xmlcon
      print(" XMLCON file:", XMLCON)
   else:
      print("XMLCON file must be specified")
      sys.exit(0)

   if args.L0:
      L0_FILE = args.L0
      print("   Level0 Output file:", L0_FILE)
   else:
      print("Level0 Output filename must be specified")
      sys.exit(0)

   if args.L1:
      L1_FILE = args.L1
      print("    Level1 Output file:", L1_FILE)
   else:
      print("Level1 Output filename must be specified")
      sys.exit(0)

   if args.L2:
      L2_FILE = args.L2
      print("     Level2 Output file:", L2_FILE)
   else:
      print("Level2 Output filename must be specified")
      sys.exit(0)

   
   df_calibrated = main(INFILE, XMLCON, L0_FILE, L1_FILE)
   # Replace 'T' with a space only if 'T' exists in the DateTime column
   df_calibrated['DateTime'] = df_calibrated['DateTime'].astype(str)
   df_calibrated.loc[df_calibrated['DateTime'].str.contains('T', na=False), 'DateTime'] = (
       df_calibrated['DateTime'].str.replace('T', ' ')
   )

   # Define column headers
   columns = pd.MultiIndex.from_tuples([
    ("DateTime", "ISO8601"),
    ("Temperature", "degC"),
    ("Conductivity","S/m"),
    ("Pressure", "dbar"),
    ("V1", "Volts"),
    ("V2", "Volts"),
    ("V3", "Volts"),
    ("V4", "Volts"),
    ("Salinity", "PSU"),
    ("Depth", "m"),
    ("Oxygen", "uM/kg"),
    ("CDOM", "ppb"),
    ("Chlorophyll-a", "mg/m3"),
    ("Turbidity", "NTU")
    ])

   # Level2 file - aggregation and removal of data shallower than 1 m  
   df_level2 = create_level2(df_calibrated)
   
   df_calibrated = df_calibrated.drop('DepthBin', axis=1)

   # Assign the multi-level header to the DataFrame
   df_calibrated.columns = columns

   df_calibrated[["Temperature","Conductivity","Pressure","V1","V2","V3","V4",
                  "Salinity","Depth","Oxygen","CDOM","Chlorophyll-a","Turbidity"]] = df_calibrated[["Temperature","Conductivity","Pressure","V1","V2","V3","V4","Salinity","Depth","Oxygen","CDOM","Chlorophyll-a","Turbidity"]].applymap(lambda x: f"{x:.3f}")

   # Write Level1 output to csv file
   df_calibrated.to_csv(L1_FILE, sep=",", index=False)
   
   # Prepare L2 dataframe for output
   df_level2['Depth'] = df_level2['DepthBin']
   # Drop the voltage channels
   df_level2 = df_level2.drop(['V1','V2','V3','V4'], axis=1)
   # Re-order the columns
   neworder = ["DateTime","Temperature","Conductivity","Pressure","Salinity","Depth","Oxygen_Cal","FluoroWetlabCDOM_Cal","FluoroWetlabECO_AFL_FL_Cal","Turbidity_Cal"]
   df_level2 =df_level2.reindex(columns=neworder)

   columns = pd.MultiIndex.from_tuples([
    ("DateTime", "ISO8601"),
    ("Temperature", "degC"),
    ("Conductivity","S/m"),
    ("Pressure", "dbar"),
    ("Salinity", "PSU"),
    ("Depth", "m"),
    ("Oxygen", "uM/kg"),
    ("CDOM", "ppb"),
    ("Chlorophyll-a", "mg/m3"),
    ("Turbidity", "NTU")
    ])

   df_level2.columns = columns

   df_level2[["Temperature","Conductivity","Pressure",
                  "Salinity","Depth","Oxygen","CDOM","Chlorophyll-a","Turbidity"]] = df_level2[["Temperature","Conductivity","Pressure","Salinity","Depth","Oxygen","CDOM","Chlorophyll-a","Turbidity"]].applymap(lambda x: f"{x:.3f}")
   
   # Write Level2 output to csv file
   df_level2.to_csv(L2_FILE, sep=",", index=False)

   

   sys.exit(0)