#HOMOGENEITY OF MULTIVARIATE DISPERSIONS MODIFIED FOR COMPOSITIONAL FIDELITY #ANALYSES (LIVING VERSUS DEATH ASSEMBLAGES) FOR EQUAL NUMBER OF LIVING AND DEATH #ASSEMBLAGES


#The function fidpartitioning has two arguments:
#distance -  is a distance structure returned by dist or vegdist
#livedead  - is a data frame with species in rows and samples in columns, starting #with living assemblage samples,
#followed by death assemblage samples
#The number of living and death assemblages must be equal (balanced design)
#vegan and ade4 packages are required
#the function is analogous to betadisper() in the vegan package but it returns #distances between individual assemblages 
#from one group (death assemblages) and centroid of assemblages from the second #group (living assemblages)
#the function thus returns (1) distances between death assemblages and centroid of #living assemblages,
#(2) distances between living assemblages and centroid of living assemblages, (3) #95% bootstrapped confidence intervals for 
#means, and (4) p value that evaluates whether death assemblages are significantly #less or more dispersed relative
#to the centroid of living assemblages than are living assemblages
#the number of runs for bootstrapping is 100 and for permutation test 1000
#References for libraries: ade4 package: 
#S. Dray and A.B. Dufour. 2007. The ade4 package: implementing the duality diagram #for ecologists. 
#Journal of Statistical Software 22(4):1-20.
#Thioulouse, J., D. Chessel, S. Dolédec, and J. M. Olivier. 1996. ADE-4: a #multivariate analysis and 
#graphical display software. Statistics and Computing 7:75-83.
#vegan package: Jari Oksanen, Roeland Kindt, Pierre Legendre, Bob O'Hara, Gavin L. #Simpson, M. Henry H. Stevens 
#and Helene Wagner (2008). vegan:  Community Ecology Package. R package version #1.13-1. vegan.r-forge.r-project.org/
#REFERENCE FOR FUNCTION: Tomasovych, A., and Kidwell S.M. 2010. Accounting for the #effects of biological variability and temporal autocorrelation in 
#assessing the preservation of species abundance. Paleobiology 

#LIBRARIES
require(vegan)
require(ade4)
##################################################################################################################################
#ARBITRARY RANDOMIZED INPUT TABLE WITH 8 LIVING ASSEMBLAGES AND 8 DEATH ASSEMBLAGES
livedead<-array(0, dim=c(50,16))
method="bray"
for (i in 1:ncol(livedead)) {
livedead[,i]<-runif(50,0,1)
}
livedead<-decostand(livedead, method="total", MARGIN=2)
###################################################################################################################################
#THIS IS THE FUNCTION

fidpartitioning<-function(livedead, method=method) {
livedead<-livedead; 
grouping<-c(rep(1,ncol(livedead)/2), rep(2,ncol(livedead)/2));  method=method;

if (method=="horn") {distance<-vegdist(t(livedead),method="horn")}
if (method=="bray") {distance<-vegdist(t(livedead),method="bray")}
if (method=="hellinger") {distance<-vegdist(t(sqrt(livedead)),method="bray")}
if (method=="sorenson") {distance<-vegdist(t(livedead),method="bray", binary=T)}
if (method=="jaccard") {distance<-vegdist(t(livedead),method="jaccard", binary=T)}
if (method=="gower") {distance<-vegdist(decostand(t(livedead), "log"), "altGower")}
if (method=="euclidean") {distance<-vegdist(t(livedead),method="euclidean")}

#PRINCIPAL COORDINATE ANALYSIS USING ADE4 PACKAGE
tol = -1;
distmat <- as.matrix(distance);n <- ncol(distmat)
row.w <- rep(1, n)
row.w <- row.w/sum(row.w)
delta <- -0.5 * bicenter.wt(distmat * distmat, row.wt = row.w, col.wt = row.w)
wsqrt <- sqrt(row.w)
delta <- delta * wsqrt
delta <- t(t(delta) * wsqrt)
eig <- eigen(delta, symmetric = TRUE)
lambda <- eig$values
w0 <- lambda[n]/lambda[1]
r <- sum(lambda > (lambda[1] * tol))
nf<-r
res <- list()
res$eig <- lambda[1:r]                  #list of eigenvalues
res$rank <- r
res$cw <- rep(1, r)
w <- t(t(eig$vectors[, 1:r]) * sqrt(abs(lambda[1:r])))/wsqrt            
w <- data.frame(w)
res$tab <- w
res$li <- data.frame(w[, 1:nf])         #eigenvectors - principal coordinates 

#ESTIMATE THE NUMBER OF POSITIVE AND NEGATIVE EIGENVALUES
positive<-sum(res$eig > 0); negative<-sum(res$eig < 0);

#CHECKING WHETHER DISTANCES IN PCO SPACE ARE EQUAL TO MATRIX DISTANCES
#d1<-designdist(res$li[,1:positive],method="(A+B-2*J)", terms=c("quadratic"))
#d2<-designdist(res$li[,(positive+1):(positive+negative)],method="(A+B-2*J)", terms=c("quadratic"))
#originaldistance<-sqrt(d1-d2)  #restoring distance from the complete PCO space

#ESTIMATE DISTANCES BETWEEN DEATH ASSEMBLAGES AND CENTROID OF LIVING ASSEMBLAGES (TOTAL LIVE-DEAD VARIATION)
#COMPUTE CENTROID LOCATION OF LIVING ASSEMBLAGES
#FOR POSITIVE EIGENVALUES
livecentroid1<-rep(NA, length(NA))
for (i in 1:positive) 
                {livecentroid1[i]<-mean(res$li[1:(ncol(livedead)/2),i])
                }
#FOR NEGATIVE EIGENVALUES
if (negative > 0) 
                {livecentroid2<-rep(NA, length(NA))
                        for (i in (positive+1):(positive+negative)) 
                        {livecentroid2[i]<-mean(res$li[1:(ncol(livedead)/2),i])
                        }
                }

#DISTANCES BETWEEN DEATH ASSEMBLAGES AND CENTROID OF LIVING ASSEMBLAGES FOR POSITIVE EIGENVALUES
if (length(livecentroid1) == 1) 
                {n1<-append(livecentroid1, res$li[((ncol(livedead)/2)+1):ncol(livedead),1:positive])
                }
if (length(livecentroid1) > 1) 
                {n1<-rbind(livecentroid1, res$li[((ncol(livedead)/2)+1):ncol(livedead),1:positive])
                }
nn1<-designdist(n1,method="(A+B-2*J)", terms=c("quadratic"))    #distances between centroid and all assemblages (living and death) 
positiveLDdistances<-as.vector(nn1[1:(ncol(livedead)/2)])       #extract distances between centroid of LA and death assemblages only

#DISTANCES BETWEEN DEATH ASSEMBLAGES AND CENTROID OF LIVING ASSEMBLAGES FOR NEGATIVE EIGENVALUES
if (negative == 0) 
                {LCdead<-sqrt(positiveLDdistances)
                }
if (negative > 1) 
                {n2<-rbind(livecentroid2[(positive+1):(positive+negative)], res$li[((ncol(livedead)/2)+1):ncol(livedead),(positive+1):(positive+negative)])
                nn2<-designdist(n2,method="(A+B-2*J)", terms=c("quadratic"))    #distances between centroid and all assemblages (living and death) 
                negativeLDdistances<-as.vector(nn2[1:(ncol(livedead)/2)])       #extract distances between centroid of LA and death assemblages only
                LCdead<-sqrt(abs(positiveLDdistances-negativeLDdistances))
                }
if (negative == 1)      
                {n2<-append(livecentroid2[(positive+1):(positive+negative)], res$li[((ncol(livedead)/2)+1):ncol(livedead),(positive+1):(positive+negative)])
                nn2<-designdist(n2,method="(A+B-2*J)", terms=c("quadratic"))
                negativeLDdistances<-as.vector(nn2[1:(ncol(livedead)/2)])
                LCdead<-sqrt(abs(positiveLDdistances-negativeLDdistances))
                }

#DISTANCES BETWEEN LIVING ASSEMBLAGES AND CENTROID OF LIVING ASSEMBLAGES FOR POSITIVE EIGENVALUES
if (length(livecentroid1) == 1) 
                {n1<-append(livecentroid1, res$li[1:(ncol(livedead)/2),1:positive])}
if (length(livecentroid1) > 1) 
                {n1<-rbind(livecentroid1, res$li[1:(ncol(livedead)/2),1:positive])}
nn1<-designdist(n1,method="(A+B-2*J)", terms=c("quadratic"))    #all distances between centroid and among samples for positive eigenvalues
positiveLLdistances<-as.vector(nn1[1:(ncol(livedead)/2)])       

#DISTANCES BETWEEN LIVING ASSEMBLAGES AND CENTROID OF LIVING ASSEMBLAGES FOR NEGATIVE EIGENVALUES 
if (negative == 0) 
                {LClive<-sqrt(positiveLLdistances)
                }
if (negative > 1) 
                {n2<-rbind(livecentroid2[(positive+1):(positive+negative)], res$li[1:(ncol(livedead)/2),(positive+1):(positive+negative)])
                nn2<-designdist(n2,method="(A+B-2*J)", terms=c("quadratic"))
                negativeLLdistances<-as.vector(nn2[1:(ncol(livedead)/2)])       
                LClive<-sqrt(abs(positiveLLdistances-negativeLLdistances))
                }
if (negative == 1) 
                {n2<-append(livecentroid2[(positive+1):(positive+negative)], res$li[1:(ncol(livedead)/2),(positive+1):(positive+negative)])
                nn2<-designdist(n2,method="(A+B-2*J)", terms=c("quadratic"))
                negativeLLdistances<-as.vector(nn2[1:(ncol(livedead)/2)])
                LClive<-sqrt(abs(positiveLLdistances-negativeLLdistances))}

#95% BOOTSTRAPPED CONFIDENCE INTERVALS FOR DISTANCES OF LIVING ASSEMBLAGES TO CENTROID OF LIVING ASSEMBLAGES-LClive
#AND FOR DISTANCES OF DEATH ASSEMBLAGES TO CENTROID OF LIVING ASSEMBLAGES - LCdead

n<-ncol(livedead)/2                     
LCdead.resample<-rep(NA,n);LClive.resample<-rep(NA,n);          
num.rep<-100                    
LCdeadmean.resample<-rep(NA,num.rep); LClivemean.resample<-rep(NA,num.rep);
for (i in 1:num.rep) {
        for (j in 1:n) {
                LCdead.resample[j]<-sample(LCdead,1,replace=T)
                LClive.resample[j]<-sample(LClive,1,replace=T)
                }
        LCdeadmean.resample[i]<-mean(LCdead.resample)
        LClivemean.resample[i]<-mean(LClive.resample)
        }

LCdeadci<-quantile(LCdeadmean.resample, c(0.025,0.975),na.rm=T)
LCliveci<-quantile(LClivemean.resample, c(0.025,0.975),na.rm=T)
                                                        
#TOTAL LIVE-DEAD VARIATION IN TERMS OF SUM OF SQUARED DISTANCES
SST = sum(LCdead^2)

#PREMORTEM VARIATION IN TERMS OF SUM OF SQUARED DISTANCES
SSL = sum(LClive^2)

#PERMUTEST USING VEGAN PACKAGE
control = permControl(nperm = 1000)
xdistances<-append(LCdead,LClive)
xgroup<-grouping
nobs <- length(xdistances)
mod <- lm(xdistances ~ xgroup)
mod.Q <- mod$qr
p <- mod.Q$rank
resids <- qr.resid(mod.Q, xdistances)
res <- numeric(length = control$nperm + 1)
res[1] <- summary(mod)$fstatistic[1]
  
for (i in seq(along = res[-1])) {
        perm <- permuted.index2(nobs, control = control)
        perm.resid <- resids[perm]
        f <- qr.fitted(mod.Q, perm.resid)
        mss <- sum((f - mean(f))^2)
        r <- qr.resid(mod.Q, perm.resid)
        rss <- sum(r^2)
        rdf <- nobs - p
        resvar <- rss/rdf
        res[i + 1] <- (mss/(p - 1))/resvar
        }
pvalue <- sum(res >= res[1])/length(res)

out <- list("Distances between DAs and centroid of LA" = LCdead, "Distances between LAs and centroid of LA" = LClive, 
"Magnitude of total live-dead variation (mean centroid of LA-DA distance)" = mean(LCdead), 
"Magnitude of premortem variation (mean centroid of LA-LA distance)" = mean(LClive), 
"Magnitude of postmortem variation (can be negative)" = mean(LCdead)-mean(LClive), 
"95% CI for mean centroid of LA-DA distance" = LCdeadci, "95% CI for mean centroid of LA-LA distance" = LCliveci, "F value"=res[1],
"P value for homogeneity of dispersions" = pvalue, "Premortem variation (sum of squared dissimilarities-SSL)"=SSL, 
"Total live-dead variation (sum of squared dissimilarities-SST)"=SST)
class(out) <- "fidelity"
out
}