#Imagine we sample the nutrient content in an experimental tank ten times #across an afternoon. We get the following results: nut <- c(56.3, 50.6, 49.5, 57.8, 71.4, 50., 61.3, 60.7, 77.7, 70.4) plot(nut) #Assume there is no fundamental difference among the samples. Find the average. #We will assume there is obervational error with each sampling. #Since these nutrient concentrations are continous numbers, we will assume #that the observational error is simply normal or Gaussian #1) simulate the data #first, I'll draw the pdf with a guess for the mean and stdev curve(dnorm(x,50.0,6.0),30,70) simData <- rnorm(10,50.0,6.0) plot(simData) #we could guess and check to find an acceptable mean and standard deviation #instead, we'll use maximum likelihood to find the best estimates #In the simulation we use the probability density function to tell us the #probability. We can work backwards, using the same equation to find the #likelihood of the parameters, given the data. For example, take the probability #density function (pdf) of the normal distribution from above. If we say the #m = 5.3, and s = 1.2, the pdf will tell us the probability density of getting 6.2 #were we to randomly draw from the distribution. dnorm(6.2,5.3,1.2) curve(dnorm(x,5.3,1.2),1,9) #We can turn that around and ask, "What is the likelihood of the mean being 5.3 #if a draw from the distribution resulted in 6.2?" For right now, let us assume #we know the standard deviation is 1.2. The likelihood is actually proportional #to the probability. Since we will be concerned with relative likelihood, we will #ignore the proportionality constant and assume they are equal. dnorm(6.2,5.3,1.2) #The number is actually the same, even the though the philosophy is the reverse. #This time plot the likelihood across a range of m, because the data is fixed this #time at 6.2 curve(dnorm(6.2,x,1.2),1,10) #Where is the maximum likelihood? Given a single value of 6.2, it is most likely #that the mean is 6.2. Your plot should peak at 6.2. #In order to easily manipulate likelihood values, we transform the likelihood by #taking the -log(). Negative log-likelihood values have the nice property that #they can be summed across multiple events. The other twist, is that you minimize #the negative log-likelihood in order to find the maximum likelihood. #This negative log-likelihood is called the support function because it is used to #find support for the parameters, given the data. sf <- function (z,m,s) -dnorm(z,m,s,log = TRUE) curve(sf(6.2,x,1.2),0,10) sf(nut,60,3) sum(sf(nut,60,3)) #create a single function for the negative log-likelihood of the data set ll <- function (m,s) -sum(dnorm(nut,m,s,log = TRUE)) library(bbmle) source("mleFunctions.R") guess <- list(m = 60.0, s = 3.0) fit <- mle2(ll, start=guess, method="Nelder-Mead") fit -logLik(fit) mean(nut) comp <- compare(fit, name = "constant") #redo analysis and include the time each data point was collected time = c(2.3,2.6,3,3.1,3.3,3.4,3.4,4.1,4.6,4.8) ponds <- data.frame(list("time" = time, "nutrients" = nut)) plot(ponds) ll <- function (m1,m2,s) with(ponds, -sum(dnorm(nutrients,m1+m2*time,s,log = TRUE))) guess <- list(m1 = 60.0, m2 = 2.3, s = 3.0) fit <- mle2(ll, start=guess, method="Nelder-Mead") fit -logLik(fit) comp <- compare(fit, name = "linear", prev = comp) comp compare.likRatio(comp,1,2)