#Imagine we sample the nutrient content in an experimental tank ten times 
#across an afternoon.  We get the following results:
nut <- c(56.3, 50.6, 49.5, 57.8, 71.4, 50., 61.3, 60.7, 77.7, 70.4)
plot(nut)
#Assume there is no fundamental difference among the samples.  Find the average.

#We will assume there is obervational error with each sampling.
#Since these nutrient concentrations are continous numbers, we will assume
#that the observational error is simply normal or Gaussian

#1) simulate the data

#first, I'll draw the pdf with a guess for the mean and stdev
curve(dnorm(x,50.0,6.0),30,70)
simData <- rnorm(10,50.0,6.0)
plot(simData)

#we could guess and check to find an acceptable mean and standard deviation
#instead, we'll use maximum likelihood to find the best estimates

#In the simulation we use the probability density function to tell us the
#probability.  We can work backwards, using the same equation to find the
#likelihood of the parameters, given the data.  For example, take the probability
#density function (pdf) of the normal distribution from above.  If we say the
#m = 5.3, and s = 1.2, the pdf will tell us the probability density of getting 6.2 
#were we to randomly draw from the distribution.

dnorm(6.2,5.3,1.2)
curve(dnorm(x,5.3,1.2),1,9)

#We can turn that around and ask, "What is the likelihood of the mean being 5.3
#if a draw from the distribution resulted in 6.2?"  For right now, let us assume
#we know the standard deviation is 1.2.  The likelihood is actually proportional
#to the probability.  Since we will be concerned with relative likelihood, we will
#ignore the proportionality constant and assume they are equal.

dnorm(6.2,5.3,1.2)

#The number is actually the same, even the though the philosophy is the reverse.

#This time plot the likelihood across a range of m, because the data is fixed this
#time at 6.2

curve(dnorm(6.2,x,1.2),1,10)

#Where is the maximum likelihood?  Given a single value of 6.2, it is most likely
#that the mean is 6.2.  Your plot should peak at 6.2.

#In order to easily manipulate likelihood values, we transform the likelihood by
#taking the -log().  Negative log-likelihood values have the nice property that
#they can be summed across multiple events.  The other twist, is that you minimize
#the negative log-likelihood in order to find the maximum likelihood.

#This negative log-likelihood is called the support function because it is used to
#find support for the parameters, given the data.

sf <- function (z,m,s)
-dnorm(z,m,s,log = TRUE)

curve(sf(6.2,x,1.2),0,10)

sf(nut,60,3)
sum(sf(nut,60,3))

#create a single function for the negative log-likelihood of the data set
ll <- function (m,s)
-sum(dnorm(nut,m,s,log = TRUE))

library(bbmle)
source("mleFunctions.R")

guess <- list(m = 60.0, s = 3.0)
fit <- mle2(ll, start=guess, method="Nelder-Mead")
fit
-logLik(fit)

mean(nut)

comp <- compare(fit, name = "constant")

#redo analysis and include the time each data point was collected
time = c(2.3,2.6,3,3.1,3.3,3.4,3.4,4.1,4.6,4.8)
ponds <- data.frame(list("time" = time, "nutrients" = nut))
plot(ponds)

ll <- function (m1,m2,s) with(ponds,
-sum(dnorm(nutrients,m1+m2*time,s,log = TRUE)))
guess <- list(m1 = 60.0, m2 = 2.3, s = 3.0)
fit <- mle2(ll, start=guess, method="Nelder-Mead")

fit
-logLik(fit)

comp <- compare(fit, name = "linear", prev = comp)
comp

compare.likRatio(comp,1,2)