{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 452,
   "metadata": {},
   "outputs": [],
   "source": [
    "import operator\n",
    "import math\n",
    "import random\n",
    "\n",
    "from scipy.integrate import odeint\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "from deap import algorithms\n",
    "from deap import base\n",
    "from deap import creator\n",
    "from deap import tools\n",
    "from deap import gp\n",
    "\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 458,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "87\n"
     ]
    }
   ],
   "source": [
    "def readNPZData(filename):\n",
    "    # Use pandas to read in the csv file\n",
    "    dateparse = lambda x: pd.datetime.strptime(str(x), '%m/%d/%Y')\n",
    "    data_df = pd.read_csv(filename, parse_dates=['Date'], date_parser=dateparse)\n",
    "    return data_df\n",
    "\n",
    "data = readNPZData(\"NPZData.csv\")\n",
    "NH4 = data['NH4']\n",
    "NO3 = data['NO3']\n",
    "P = data['Chl.Ave']\n",
    "C = data['Copepods']\n",
    "Z = data['Total.Zoo']\n",
    "date = data['Date']\n",
    "\n",
    "pd.set_option('chained_assignment', None)\n",
    "\n",
    "#perform interpolation for those data items not collected for model dates\n",
    "for i in range(0,len(NH4)):\n",
    "    if(np.isnan(NH4[i])):\n",
    "        NH4[i] = (NH4[i+1] + NH4[i-1])/2.0\n",
    "\n",
    "for i in range(0,len(NO3)):\n",
    "    if(np.isnan(NO3[i])):\n",
    "        NO3[i] = (NO3[i+1] + NO3[i-1])/2.0\n",
    "        \n",
    "for i in range(0,len(P)):\n",
    "    if(np.isnan(P[i])):\n",
    "        if(np.isnan(P[i+1])):\n",
    "            P[i] = (P[i+2] + P[i-1])/2.0\n",
    "        else:\n",
    "            P[i] = (P[i+1] + P[i-1])/2.0\n",
    "\n",
    "for i in range(0,len(C)):\n",
    "    if(np.isnan(C[i])):\n",
    "        if(np.isnan(C[i+1])):\n",
    "            C[i] = (C[i+2] + C[i-1])/2.0\n",
    "        else:\n",
    "            C[i] = (C[i+1] + C[i-1])/2.0 \n",
    "            \n",
    "for i in range(0,len(Z)):\n",
    "    if(np.isnan(Z[i])):\n",
    "        if(np.isnan(Z[i+1])):\n",
    "            Z[i] = (Z[i+2] + Z[i-1])/2.0\n",
    "        else:\n",
    "            Z[i] = (Z[i+1] + Z[i-1])/2.0\n",
    "\n",
    "            \n",
    "# Convert all measurements to micrgrams carbon per liter\n",
    "# For nitrate measures as micromolar, multiply by 12*106/16\n",
    "# For chlorophyll a, multiply by 40 (gC per g of Chl)\n",
    "# For zooplankton, multiply by 6*1e-3\n",
    "# Create empty vectors for the scaled values _S\n",
    "NO3_S = []\n",
    "NH4_S = []\n",
    "P_S = []\n",
    "Z_S = []\n",
    "date = []\n",
    "for t in range(0,len(NO3)):\n",
    "    S = NO3[t]*12*106/16\n",
    "    NO3_S.append(S)\n",
    "    S = NH4[t]*12*106/16\n",
    "    NH4_S.append(S)\n",
    "    P_S.append(P[t]*40)\n",
    "    Z_S.append(Z[t]*6*0.001)\n",
    "    date.append(t)\n",
    "\n",
    "print (len(Z_S))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 459,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-2.8499999999999996\n",
      "19.97\n"
     ]
    }
   ],
   "source": [
    "def readEnvironmentalData(filename):\n",
    "    # Use pandas to read in the csv file\n",
    "    dateparse = lambda x: pd.datetime.strptime(str(x), '%m/%d/%Y')\n",
    "    data_df = pd.read_csv(filename, parse_dates=['Date'], date_parser=dateparse)\n",
    "    return data_df\n",
    "\n",
    "environdata = readNPZData(\"SST_wind_sun_rain.csv\")\n",
    "Temperature = environdata['SST']\n",
    "Daylight = environdata['Daylight']\n",
    "Precip = environdata['Precip']\n",
    "Wind = environdata['W.max']\n",
    "Sun = environdata['Solar.Irrad']\n",
    "date = environdata['Date']\n",
    "day = environdata['Day']\n",
    "\n",
    "#mean.daylight = np.mean(Daylight)\n",
    "print(min(Daylight)-12)\n",
    "print(Sun[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 470,
   "metadata": {},
   "outputs": [],
   "source": [
    "#The npz model to be run via an ODE solver - Pass some fixed parameters in (as opposed to explicit variable assignment)\n",
    "def npz(t, initial):\n",
    "    nN = initial[0] # Nutrients biomass           \n",
    "    nP = initial[1] # phytoplankton biomass\n",
    "    nZ = initial[2] # Zooplankton biomass\n",
    "    γ = initial[3] # Zooplankton efficiency\n",
    "    Λ = initial[4] # Ivlev grazing constant\n",
    "    T = initial[5] # turbulence rate mixing in new nutrients and taking away phytoplankton\n",
    "    v = initial[6] # rate of nutrient uptake\n",
    "    mP = initial[7] # rate of Phytoplankton mortality\n",
    "    mZ = initial[8] # rate of Zooplankton grazing\n",
    "    Ni = initial[9] # Initial Nutrients\n",
    "    \n",
    "    #make sure the calculation doesn't extend past end of data record\n",
    "    if(t>612):\n",
    "        t=612\n",
    "    \n",
    "    #temperature Celsius - as acquired from environmental data source\n",
    "    Tc = (Temperature[math.floor(t)])\n",
    "\n",
    "    #hours of daylight per day (dl)\n",
    "    dl = (Daylight[math.floor(t)])\n",
    "    \n",
    "    k  = 1.0    #1/2 saturation constant for Nutrient update\n",
    "    #adjust half-saturation by temperature\n",
    "    k  = k*math.exp(0.063*Tc)\n",
    "\n",
    "    Rm = 1.5    #maximum zooplankton grazing rate\n",
    "    \n",
    "    q = 2**((Temperature[math.floor(t)+1]-Tc)/10) #Q10 biological metabolism equation\n",
    "    \n",
    "    #adjust grazing by temperature change\n",
    "    Rm = Rm * q\n",
    "    \n",
    "    #adjust grazing by daylight\n",
    "    Rm = Rm * (dl/24)\n",
    "    \n",
    "    #N = (Rain[math.floor(t)] * .001) * 4341 * 5 #Nutrient contribution from runoff water volume \n",
    "          #(rain per mm^2) * catchment km^2 * nutrients mg/l\n",
    "    N = 0\n",
    "    \n",
    "    #zG calculation can break when Phytoplankton is negative - not reasonable to have negative anyway\n",
    "    if(nP < 0):\n",
    "        nP = 0\n",
    "    \n",
    "    #now apply Miller Chapter 4 model\n",
    "    zG = Rm*(1-math.exp(-Λ*nP))\n",
    "    \n",
    "    df1 = ((-v*nN*nP)/(k+nN) + mP*nP + mZ*nZ + (1-γ)/2*nZ*zG) - T*nN + T*Ni + N # time rate of change of N\n",
    "    df2 = ((v*nP*nN)/(k+nN) - mP*nP - nZ*zG - T*nP)         # time rate of change of P\n",
    "    df3 = (γ*nZ*zG - mZ*nZ)                                 # time rate of change of Z\n",
    "    f = [df1,df2,df3,0,0,0,0,0,0,0]                         # vector of the time rates of change\n",
    "    \n",
    "    return f"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 461,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getTotalVariance(pop, no3, nh4, p, z):\n",
    "    nw = []\n",
    "    pw = []\n",
    "    zw = []\n",
    "    d = []\n",
    "    total_variance = 0\n",
    "    \n",
    "    j=0\n",
    "    for i in range(0,len(pop)-7):\n",
    "        if(i%7==0):\n",
    "            model_total = pop[i][0]+pop[i][1]+pop[i][2]\n",
    "            data_total = no3[j]+nh4[j]+p[j]+z[j]\n",
    "\n",
    "            nw.append((pop[i][0]/model_total)-(no3[j]+nh4[j])/data_total)\n",
    "            total_variance = total_variance + abs((pop[i][0]/model_total)-(no3[j]+nh4[j])/data_total)\n",
    "            pw.append((pop[i][1]/model_total)-p[j]/data_total)\n",
    "            total_variance = total_variance + abs((pop[i][1]/model_total)-p[j]/data_total)\n",
    "            zw.append((pop[i][2]/model_total)-z[j]/data_total)\n",
    "            total_variance = total_variance + abs((pop[i][2]/model_total)-z[j]/data_total)\n",
    "            d.append(date[i])\n",
    "            j=j+1\n",
    "            \n",
    "            #punish models that give negative Phytoplankton\n",
    "            if(pop[i][1] < 0):\n",
    "                total_variance = total_variance + 100\n",
    "\n",
    "            #punish models that give runaway Phytoplankton blooms\n",
    "            if(pop[i][1] > 1000000):\n",
    "                total_variance = total_variance + 100                \n",
    "                \n",
    "    return total_variance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 462,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Set up the goal of model parameter evolution\n",
    "creator.create(\"FitnessMax\", base.Fitness, weights=(1.0,))\n",
    "creator.create(\"Individual\", list, fitness=creator.FitnessMax)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 463,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.2681021989675888, 0.3543458179913236, 0.2232813466349094, 0.24526716989681863, 0.6562477031687555]\n"
     ]
    }
   ],
   "source": [
    "#Use the deap toolbox to create the population of parameter sets being evolved (from a genome)\n",
    "toolbox = base.Toolbox()\n",
    "toolbox.register(\"attr_float\", random.random)\n",
    "toolbox.register(\"individual\", tools.initRepeat, creator.Individual, toolbox.attr_float, n=5)\n",
    "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n",
    "\n",
    "print(toolbox.individual())\n",
    "\n",
    "def evalNPZ(individual, γ, Λ, T):\n",
    "\n",
    "    # Evaluate the solver with different parameter values\n",
    "    v = toolbox.individual()[0]\n",
    "    mP = toolbox.individual()[1]\n",
    "    Ni = toolbox.individual()[2]\n",
    "    Po = toolbox.individual()[3]\n",
    "    Zo = 2.1 - Ni - Po\n",
    "    mZ = toolbox.individual()[4]\n",
    "    \n",
    "    #set up odeint call\n",
    "    tmin = 0.0         #start time \n",
    "    tout = 1.0         # output step size\n",
    "    tmax = 612.0       # stop time (number of data points in groundtruth)\n",
    "    steps = tmax/tout  # number of output steps\n",
    "    t_span = [tmin, tmax]\n",
    "\n",
    "    time = np.linspace(tmin, tmax, steps) # start time, stop time, report out interval\n",
    "    initial = [Ni, Po, Zo, 0.5, 1.0, 0.02, v, mP, mZ, Ni]  # initial N, P, Z, γ, Λ, T, v, mP, mZ and Ni\n",
    "    pop = odeint(npz, initial, time, tfirst=True)  # scipy.integrate.odeint Runge Kutta 4.5\n",
    "\n",
    "    return 100.0/getTotalVariance(pop, NO3_S, NH4_S, P_S, Z_S),"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 464,
   "metadata": {},
   "outputs": [],
   "source": [
    "#shortcut version of using the genetic programming toolkit (use the long-hand one in the next cell for learning)\n",
    "population_size = 10\n",
    "\n",
    "def selElitistAndTournament(individuals, k_tournament, tournsize):\n",
    "    return tools.selBest(individuals, 3) + tools.selTournament(individuals, k_tournament, tournsize=3)\n",
    "\n",
    "toolbox.register(\"evaluate\", evalNPZ, γ=0.5, Λ=1.0, T=0.02)\n",
    "toolbox.register(\"select\", selElitistAndTournament, k_tournament=population_size-3, tournsize=3)\n",
    "toolbox.register(\"mate\", tools.cxTwoPoint)\n",
    "toolbox.register(\"mutate\", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)\n",
    "\n",
    "def main():\n",
    "    #uncomment random.seed here to track a repetitive computation process - otherwise run with new random each time\n",
    "    #random.seed(64)\n",
    "\n",
    "    population = toolbox.population(n=10)\n",
    "    hof = tools.HallOfFame(3)\n",
    "    \n",
    "    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)\n",
    "    stats_size = tools.Statistics(len)\n",
    "    mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)\n",
    "    mstats.register(\"avg\", np.mean)\n",
    "    mstats.register(\"std\", np.std)\n",
    "    mstats.register(\"min\", np.min)\n",
    "    mstats.register(\"max\", np.max)\n",
    "  \n",
    "    population, log = algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.1, ngen=40, stats=mstats, halloffame=hof, verbose=True)\n",
    "    \n",
    "    print(\"\\n\")\n",
    "    print(\"Best:     \", hof[0])\n",
    "    print(\"2nd Best: \", hof[1])\n",
    "    print(\"3rd Best: \", hof[2])\n",
    "    \n",
    "    return \"1 - successful run\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 471,
   "metadata": {},
   "outputs": [],
   "source": [
    "#long code version of using the genetic programming toolkit\n",
    "population_size = 40\n",
    "\n",
    "def selElitistAndTournament(individuals):\n",
    "    return tools.selBest(individuals, 2) + tools.selTournament(individuals, population_size-2, tournsize=3)\n",
    "\n",
    "toolbox.register(\"evaluate\", evalNPZ, γ=0.5, Λ=1.0, T=0.02)\n",
    "toolbox.register(\"select\", selElitistAndTournament)\n",
    "toolbox.register(\"mate\", tools.cxTwoPoint)\n",
    "toolbox.register(\"mutate\", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)\n",
    "\n",
    "def main():\n",
    "    #random.seed(64)\n",
    "\n",
    "    # create an initial population of individuals\n",
    "    population = toolbox.population(n=population_size)\n",
    "\n",
    "    CXPB = 0.5  #probability with which two individuals are mated\n",
    "    MUTPB = 0.2 #probability for mutating an individual\n",
    "    \n",
    "    print(\"Start of evolution\")\n",
    "    \n",
    "    # Evaluate the entire population\n",
    "    fitnesses = list(map(toolbox.evaluate, population))\n",
    "    for ind, fit in zip(population, fitnesses):\n",
    "        ind.fitness.values = fit\n",
    "    \n",
    "    print(\"  Evaluating %i individuals\" % len(population))\n",
    "\n",
    "    # Extracting all the fitnesses\n",
    "    fits = [ind.fitness.values[0] for ind in population]\n",
    "\n",
    "    # Variable keeping track of the number of generations\n",
    "    g = 0\n",
    "    \n",
    "    print(\"  Min  Max  Avg  StDev\")\n",
    "    \n",
    "    # Begin the evolution\n",
    "    while g < 40:\n",
    "        # A new generation\n",
    "        g = g + 1\n",
    "        \n",
    "        # Select the next generation individuals\n",
    "        offspring = toolbox.select(population)\n",
    "        # Clone the selected individuals\n",
    "        offspring = list(map(toolbox.clone, offspring))\n",
    "    \n",
    "        # Apply crossover and mutation on the offspring\n",
    "        for child1, child2 in zip(offspring[::2], offspring[1::2]):         \n",
    "            # cross two individuals with probability CXPB\n",
    "            if random.random() < CXPB:\n",
    "                outchild = toolbox.mate(child1, child2)\n",
    "                # fitness values of the children\n",
    "                # must be recalculated later\n",
    "                del child1.fitness.values\n",
    "                del child2.fitness.values\n",
    "\n",
    "        for mutant in offspring:\n",
    "            # mutate an individual with probability MUTPB\n",
    "            if random.random() < MUTPB:\n",
    "                toolbox.mutate(mutant)\n",
    "                del mutant.fitness.values\n",
    "    \n",
    "        # Evaluate the individuals with an invalid fitness\n",
    "        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]\n",
    "        fitnesses = map(toolbox.evaluate, invalid_ind)\n",
    "        for ind, fit in zip(invalid_ind, fitnesses):\n",
    "            ind.fitness.values = fit\n",
    "        \n",
    "        #print(\"  Evaluated %i individuals\" % len(invalid_ind))\n",
    "        \n",
    "        best_ind = tools.selBest(population, 1)[0]\n",
    "        #print(\"Best individual is %s, %s\" % (best_ind, best_ind.fitness.values))\n",
    "        \n",
    "        # The population is entirely replaced by the offspring\n",
    "        population[1:] = offspring\n",
    "        population[0] = tools.selBest(population, 1)[0]\n",
    "        \n",
    "        # Gather all the fitnesses in one list and print the stats\n",
    "        fits = [ind.fitness.values[0] for ind in population]\n",
    "        \n",
    "        length = len(population)\n",
    "        mean = sum(fits) / length\n",
    "        sum2 = sum(x*x for x in fits)\n",
    "        std = abs(sum2 / length - mean**2)**0.5\n",
    "        \n",
    "        print(g, \"{0:.2f}\".format(min(fits)), \"{0:.2f}\".format(max(fits)), \"{0:.2f}\".format(mean), \"{0:.2f}\".format(std))\n",
    "        \n",
    "    print(\"-- End of (successful) evolution --\")\n",
    "    \n",
    "    best_ind = tools.selBest(population, 3)\n",
    "    print(\"Best individual is %s, %s\" % (best_ind[0], best_ind[0].fitness.values))\n",
    "    print(\"2nd best ind'l is %s, %s \" % (best_ind[1], best_ind[1].fitness.values))\n",
    "    print(\"3rd best ind'l is %s, %s \" % (best_ind[2], best_ind[2].fitness.values))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Start of evolution\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:26: DeprecationWarning: object of type <class 'float'> cannot be safely interpreted as an integer.\n"
     ]
    }
   ],
   "source": [
    "main()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}