hwex.stats <- function(allele1, allele2, female){
  
  # Title: HWE X-linked statistical tests
  
  # Author: Dan Folie
  
    # a1 and a2 are vectors of alleles from female subjects and are of the same
    # length.  a3 is the vector of alleles from the male subjects.

  # Separate allele1 and allele2 into males and females:
  a1 <- allele1[female]
  a2 <- allele2[female]
  a3 <- allele1[!female]

  # Eliminate any NA values:
  na <- is.na(a1) | is.na(a2)
  a1 <- a1[!na]
  a2 <- a2[!na]
  a3 <- a3[!is.na(a3)]
  
  # Test to see if we have no females:
  if(length(a1) == 0)
    stop("No females present")

  # Compute ob.f - the number of observed females of each genotype:
  n.f <- length(a1)
  t1 <- ifelse(a1 < a2, a1, a2)
  t2 <- ifelse(a1 > a2, a1, a2)
  avec <- factor(c(t1,t2,a3))     # Male vector a3 included to ensure that every
  t1 <- avec[1:n.f]               # distinct allele code is in the "levels" 
  t2 <- avec[(n.f+1):(2*n.f)]     # attribute of avec.
  tbl <- table(t1,t2)
  n.allele <- nrow(tbl)
  ob.f <- tbl[row(tbl) <= col(tbl)]

  # Compute ob.m - the number of observed males of each genotype:

  n.m <- length(a3)
  if(n.m > 0)
    { t3 <- avec[(2*n.f + 1):(2*n.f + n.m)]
      ob.m <- as.vector(table(t3))
    }
  else
    { ob.m <- NULL }

  # compute p, where p[i] is the probablility of the ith allele code appearing:

  allele.counts <- as.vector(table(avec))
  p <- allele.counts/sum(allele.counts)

  # compute ex.f - the expected number of females of each genotype:

  ex.f <- 2*p %o% p
  diag(ex.f) <- p^2
  ex.f <- ex.f[row(ex.f) <= col(ex.f)]
  ex.f <- n.f*ex.f

  # compute ex.m - the expected number of males of each genotype:

  if(n.m > 0)
    { ex.m <- n.m*p }
  else
    { ex.m <- NULL }

  # compute df, stat.chi, and pval.chi:
  
  # df is the degrees of freedom for Pearson's chi-squared test. 
  df <- n.allele*(n.allele + 1)/2 - 1

  # stat.chi is the chi-square goodness of fit statistic
  stat.chi <- sum((ob.f-ex.f)^2/ex.f) +
              if(n.m > 0) sum((ob.m-ex.m)^2/ex.m)
              else 0

  # pval.chi is the asymptotic p-value of the goodness of fit statistic
  pval.chi <- 1-pchisq(stat.chi,df)

  # compute the likelihood statistic of the observed genotypes:
  # h is the number of heterozygotes
  h <- n.f - sum(diag(tbl))
  ln.pobs <- h*log(2) - sum(lgamma(ob.f[ob.f > 0] + 1))

  # compute the rare-homozygotes statistics stat.rare and pval.rare:

  # stat.rare is the chi-square statistic for rare homozygotes
  u.score <- matrix(rep(-1,n.allele^2), nrow = n.allele)
  diag(u.score) <- (1-p)/p
  u.score <- u.score[row(u.score) <= col(u.score)]
  stat.rare <- sum(ob.f*u.score)^2/(n.f*(n.allele-1))

  # pval.rare is the asymptotic p-value for the chi-square statistic for rare
  # homozygotes

  pval.rare <- 1 - pchisq(stat.rare,1)

  return(list(stat.chi=stat.chi,
              pval.chi=pval.chi,
              ln.pobs=ln.pobs,
              df=df,
              ob.f=ob.f,
              ob.m=ob.m,
              ex.f=ex.f,
              ex.m=ex.m,
              u.score=u.score,
              stat.rare=stat.rare,
              pval.rare=pval.rare,
              n.f=n.f,
              n.m=n.m,
              n.allele=n.allele))
}
  
