(Dis)similarity of two populations

Measures of niche overlap are used to assess the similarity or dissimilarity of two populations. The overlap coefficient is a similarity measure related to the Jaccard index that computes the overlap between two sets. Here, we consider two different implementations of the overlap coefficient: Matusita's measure and Morisita's measure.

################################################################################
# Calculates the (dis)similarity between 2 univariate normal distributions
#
# sample1 : sample data 1
# sample2 : sample data 2
#
# OUTPUT - Similarity Measures
# prop      : Overlapping coefficient (Proportional similarity measure)
# rho       : Overlapping coefficient (Matusita's Measure)
# lambda    : Overlapping coefficient (Morisita's Measure)
#
# OUTPUT - Dissimilarity Measures
# hellinger : Hellinger distance
################################################################################
# Author  : Thomas Debray
# Version : 19 dec 2010
################################################################################
 
overlap <- function(sample1,sample2)
{
    prop      = NA
    lambda    = NA
    rho       = NA
    hellinger = NA
 
    # Estimate parameters
    s1.mean = mean(sample1)
    s1.std  = sd(sample1)
    s2.mean = mean(sample2)
    s2.std  = sd(sample2)
 
    # Prop
    fmin = function(x) min=min(x)
    integrand <- function(x) {
        dens = cbind(dnorm(x,mean=s1.mean,sd=s1.std),dnorm(x,mean=s2.mean,sd=s2.std))
        apply(dens,1,fmin)
    }
 
    tryCatch({
        prop = integrate(integrand,lower=-Inf,upper=Inf)$value
    }, error = function(ex) {
        prop = NA
    })
 
    # Rho
    integrand <- function(x) {
        sqrt(dnorm(x,mean=s1.mean,sd=s1.std)*dnorm(x,mean=s2.mean,sd=s2.std))
    }
 
    tryCatch({
        rho = integrate(integrand,lower=-Inf,upper=Inf)$value
    }, error = function(ex) {
        rho = NA
    })
 
    # Lambda
    f1f2 <- function(x) { 2*dnorm(x,mean=s1.mean,sd=s1.std)*dnorm(x,mean=s2.mean,sd=s2.std) }
    f1sq <- function(x) { dnorm(x,mean=s1.mean,sd=s1.std)**2 }
    f2sq <- function(x) { dnorm(x,mean=s2.mean,sd=s2.std)**2 }
 
    tryCatch({
        lambda = integrate(f1f2,lower=-Inf,upper=Inf)$value/
        (integrate(f1sq,lower=-Inf,upper=Inf)$value+
        integrate(f2sq,lower=-Inf,upper=Inf)$value)
    }, error = function(ex) {
        lambda = NA
    })
 
    # Hellinger
    f1f2hellinger <- function(x) { 0.5 * (sqrt(dnorm(x,mean=s1.mean,sd=s1.std))-
        sqrt(dnorm(x,mean=s2.mean,sd=s2.std)))**2 }
 
    tryCatch({
        hellinger = integrate(f1f2hellinger,lower=-Inf,upper=Inf)$value
    }, error = function(ex) {
        hellinger = NA
    })
 
    out <- list(prop=prop,rho=rho,lambda=lambda,hellinger=hellinger)
}

An example is given below.

a=rnorm(1000,5,10)
b=rnorm(45,9,8)
ovl = overlap(a,b)
plot(density(a),main="Niche Overlap",xlim=c(-30,40),ylim=c(0,0.05))
lines(density(b),col="green")
polygon(density(a)$x, density(a)$y, col = rgb(0,1,0, .5))
polygon(density(b)$x, density(b)$y, col = rgb(0.50,0,0.50, .5))
text(-30,0.05,paste("Proportional similarity measure:",round(ovl$prop*100,2),"%"),col="black",pos=4)
text(-30,0.048,paste("Matusita's Measure:",round(ovl$rho,2)),col="black",pos=4)
text(-30,0.046,paste("Morisita's Measure:",round(ovl$lambda,2)),col="black",pos=4)
text(-30,0.044,paste("Hellinger Distance:",round(ovl$hellinger,2)),col="black",pos=4)
Niche Overlap

 

[ Back ]