#include <stdlib.h>
#include <S.h>
#include <math.h>

/* #$Author: sinnwell $ */

/* # Update 2/24/11 by JPS: change all long to int for R on 64-bit linux */

static int quicksort_permute(int n, double array_sort[], 
                             int array_permute[]);
static int hwe_stats_sim(int *a1,     	  int *a2, 		int *n, 
		         int *n_allele,  double *ex, 		double *u_score,
		         double *return_list,                   int **tbl);
static int ranAS183_seed(int iseed1, int iseed2, int iseed3);
static double ranAS183();

/* HWE_SIM

				   FUNCTION ARGUMENTS
This function hwe_sim takes as its arguments the following:

 a1_a2:	        Int pointer to an array of allele codes denoting the 
		concatenation of allele arrays a1 and a2.
 nsim:		Int pointer to an integer denoting the number of simulations to 
		run.
 obs_n:	        Int pointer to an integer denoting length(a1) (same as 
		length(a2)). 
 obs_n_allele:  Int pointer to an integer denoting the number of allele codes.
 obs_ex:	Double pointer to an array of the expected frequency of allele 
		pairs for each distinct pair of allele codes with order
		unimportant - i.e. allele code pair (2,3) is the same as (3,2).
 obs_u_score:   Double pointer to an array of the weights to be given to each 
		allele pair.
 iseed1:        A int pointer to the first random seed for the Wichman and Hill
                Random Number Generator.
 iseed2:        A int pointer to the second random seed for the Wichman and 
                Hill Random Number Generator
 iseed3:        A int pointer to the third random seed for the Wichman and Hill
                Random Number Generator.
 rc:		A int pointer that relays the function return code back to S.
		For a description of return codes, see RETURN CODES below.
 return_list:	An array of the three statistics returned by this function - 
		pval_chi, pval_like, and pval_rare in this order (notice that it
		is alphabetical order).

Note: If the allele concatenation al_a2 contains k distinct allele codes,
these codes must be mapped to the first k positive integers 1,2,...,k.  
These integers are used in the memory addressing of hwe_stats_sim(), a C
function called by hwe_sim(), and not using consecutive positive integers
that start at 1 will most likely cause a Bad Address Signal error in
hwe_stats_sim(), which will terminate the user's S session.

				   SIDE EFFECTS
The return_list argument will be modified to contain the pval_chi, pval_like, and
pval_rare statistics.

				   RETURN CODES
The following return codes may be returned:
	0:  Successful completion
       -1:  nsim points to a simulation number less than or eqaul to zero
	    - function failed
       -2:  obs_n points to a degenerate array length (less than or equal to
	    zero) - function failed
       -3:  obs_n_allele points to a degenerate array length (less than or equal to
	    zero) - function failed
       -4:  System memory low -  funtion failed
*/

void hwe_sim(int *a1_a2,         int *nsim,           int *obs_n, 
 	     int *obs_n_allele,  double *obs_ex,       double *obs_u_score,
	     int *iseed1,	  int *iseed2,		int *iseed3, 
	     int *rc,		  double *return_list)
{
  double rej_chi = 0.0, rej_like = 0.0, rej_rare = 0.0;
  double obs_ln_pobs, obs_stat_chi, obs_stat_rare;
  int a1_a2_length;				
  int i, j, k;
  int *b1, *b2;			/* Holds permuted alleles	       */
  int *array_permute;			/* Holds integers that we will permute */
  double *array_sort;	      		/* Holds random numbers that we sort   */
  double return_list_sim[3];		/* Holds stats from hwe_stats_sim      */
  int **tbl;				/* Scratch space for hwe_stats_sim     */
  int *b1_obs, *b2_obs;                /* Boundary pointers for a1_a2      */


  if(*nsim <= 0)			/* Check for non-sensical parameters   */ 
    {   *rc = -1;
	return; }
  if(*obs_n <= 0)
    { 	*rc = -2;
    	return; }
  if(*obs_n_allele <= 0)
    {	*rc = -3;
	return; }

  a1_a2_length = 2 * (*obs_n);
					/* Allocate Memory */

  array_sort = (double* ) Salloc(a1_a2_length, double);
  array_permute = (int *) Salloc(a1_a2_length, int);
  b1 = (int *) Salloc(*obs_n, int);
  b2 = (int *) Salloc(*obs_n, int);

  tbl = (int **) Salloc(*obs_n_allele, int *);
  if(!tbl)
    {	*rc = -4;
	return; }

  for(i = 0; i < *obs_n_allele; i++)
    { tbl[i] = (int *) Salloc(*obs_n_allele, int);
      if(!tbl[i]) 
    {	*rc = -4;		
        return; }
    }
  


					/* Check for memory allocation errors  */

  if(!array_sort || !array_permute  || !b1 || !b2)
    {	*rc = -4;
	return; }
  
  b1_obs = a1_a2;
  b2_obs = a1_a2 + (*obs_n);		/* Split a1_a2 into a1 and a2 so that */
					/* the observed statistics may be     */
					/* gathered.			      */ 

  hwe_stats_sim(b1_obs, b2_obs, obs_n, obs_n_allele, 
		obs_ex, obs_u_score, return_list_sim, tbl);



  obs_ln_pobs = return_list_sim[0];    /* Gather observed statistics          */
  obs_stat_chi = return_list_sim[1];
  obs_stat_rare = return_list_sim[2];


  ranAS183_seed(*iseed1, *iseed2, *iseed3);

  for(i = 0; i < *nsim; i++)
    { 
      for(j = 0; j < a1_a2_length; j++)	/* Populate our random number array    */
	array_sort[j] = ranAS183();


	
					/* Permute 1,2,...,*n_allele based on  */
					/* the ordinal magnitude of the	       */
					/* random elements of array_sort       */
      quicksort_permute(a1_a2_length, array_sort, array_permute);

					/* Rearrange a1_a2 based on array_sort */
					/* and array_permute		       */

      for(k = 0; k < *obs_n; k++)      
	{
	  b1[k] = a1_a2[(array_permute[(2*k)]) - 1];
	  b2[k] = a1_a2[(array_permute[(2*k+1)]) - 1];
	   }

      hwe_stats_sim(b1, b2, obs_n, obs_n_allele, obs_ex, obs_u_score,
		    return_list_sim, tbl);

				        /* Chi-square test		       */
					/* Likelihood Test		       */
					/* Rare homozygotes test	       */

      rej_chi  = rej_chi + ((return_list_sim[1] >= obs_stat_chi) ? 1 : 0);
      rej_like = rej_like + ((return_list_sim[0] <= obs_ln_pobs) ? 1 : 0);
      rej_rare = rej_rare + ((return_list_sim[2] >= obs_stat_rare) ? 1 : 0);
    }

  return_list[0] = rej_chi / *nsim;	/* return values for pval_chi,	       */
  return_list[1] = rej_like / *nsim;	/* pval_like, and pval_rare	       */
  return_list[2] = rej_rare / *nsim;

  *rc = 0;
  return;
}

static void qs(int left, int right, double array_sort[], int array_permute[]);

/* QUICKSORT_PERMUTE and QUICKSORT_ORDINAL 
  
				   FUNCTION ARGUMENTS 
These functions, quicksort_permute and quicksort_ordinal, take the following 
arguments:

n: 	       A int integer giving the length of the array to be sorted
array_sort:    A double array of length n to be sorted
array_permute: A int array of length n that will contain the 
	       sequence 1,2,...n permuted to the order corresponding
	       to the sorting of array_sort (see user's manual below).
	       This order will vary slightly depending on whether 
	       quicksort_permute or quicksort_ordinal is used.
temp           A int array of length *n that is used for scratch space (used in
               quicksort_ordinal() only).

				   SIDE EFFECTS
The array array_sort will be returned in sorted order from lowest to highest. 
And array_permute will be returned containg a permutation of 1,2,...,n.  The 
particular permutation varies depending upon which method is called (see user's
manual).

				   RETURN CODES
The following return codes may be returned:
	0:     Successful completion
       -1:     Array length degenerate (lesss than or equal to zero)

				   USER MANUAL
These functions, quicksort_permute and quicksort_ordinal, both use
the main qs(...) engine to perform a quicksort on the elements of 
array_sort (shorthand for "array to be sorted") and return a permutation
of the elements of array_permute ("array to be permuted").  The primary 
difference should be made apparent by the following:

let a[] = {0.8, 0.4, 0.5, 0.2, 0.7, 0.1, 0.6, 0.3}
    b[] = {1,   2,   3,   4,   5,   6,   7,   8  }  

then 

quicksort_permute(8, a, b,) returns

   a[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8}
   b[] = {6,   4,   8,   2,   3,   7,   5,   1  }

and quicksort_ordinal(8, a, b) returns 

   a[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8}
   b[] = {8,   4,   5,   2,   7,   1,   6,   3  }

It should be noted that the array of 1,2,...,n - in this case b[] - does 
not need to contain the integers at the time of the call to either 
of the above functions.  It must simply be a int array of length n.  

*/


static int quicksort_ordinal(int n, double array_sort[], int array_permute[],
		             int temp[])
{
  int i; 
  
  if(n <= 0)
    return -1;			      /* non-sensical array length	       */
  
  for(i = 0; i < n; i++)	      /* Initialize array_permute to 1,2,...,n */
    array_permute[i] = i + 1;
  qs(0, n - 1, array_sort, array_permute);
	
  for(i = 0; i < n; i++)
	temp[array_permute[i] - 1] = i + 1;
  for(i = 0; i < n; i++)
    	array_permute[i] = temp[i];

  return 0;
}

static int quicksort_permute(int n, double array_sort[], int array_permute[])
{
  int i; 
  
  if(n <= 0)
    return -1;			      /* non-sensical array length	       */
  for(i = 0; i < n; i++)	      /* Initialize array_permute to 1,2,...,n */
    array_permute[i] = i + 1;
  qs(0, n - 1, array_sort, array_permute);
  return 0;
}


static void qs(int left, int right, double array_sort[], int array_permute[])
{
  int i,j, temp_permute;
  double mid, temp_sort;

  i = left; j = right;
  mid = array_sort[(left + right) / 2];

  do {
    while(array_sort[i] < mid && i < right) i++;
    while(mid < array_sort[j] && left < j) j--;
    
    if(i<=j) {
      temp_sort = array_sort[i];		/* swap elements	       */
      array_sort[i] = array_sort[j];
      array_sort[j] = temp_sort;
					     
      temp_permute = array_permute[i];		/* comes along for the ride... */
      array_permute[i] = array_permute[j];
      array_permute[j] = temp_permute;
      
      i++; j--;
    }
  } while(i <= j);

  if(left < j) 
    qs(left, j, array_sort, array_permute);
  if(i < right) 
    qs(i, right, array_sort, array_permute);

  return; 
}


static double alngam(double xvalue,int *ifault);  /* log gamma(x) function     */

/* HWE_STATS_SIM

				   ARGUMENTS
This function hwe_stats_sim takes the following arguments:

        a1,a2:	  Int pointers to arrays of allele codes denoting alleles
	n:	  Int pointer to an integer denoting the length of a1 and a2
	n_allele: Int pointer to an integer denoting the number of unique allele 
		  codes
	ex:       Double pointer to an array of the expected frequency of allele 
		  pairs for each distinct pair of allele codes with order 
		  unimportant-i.e. allele code pair (2,3) is the same as (3,2)
        u_score:  Double pointer to an array of the weights to be given to each 
		  allele pair
	return_list: Double pointer to an array of length three that will 
		     contain the following statistics in alphabetical order - 
		     ln_pobs, stat_chi, and stat_rare.
        tbl:      An (*n_allele)-by-(*n_allele) sized table of int that is used
		  to accumulate the number of code pair (i,j) present in a1 and
		  a2 in table position tbl[i][j].

				   SIDE EFFECTS               
The argument "return_list" is the only argument that is modified by 
hwe_stats_sim.  All other arguments remain unchanged.

				   RETURN CODES
The following return codes may be returned:

	0:  Successful completion
       -1:  n points to a degenerate array length - i.e. less than or equal to 
	    zero - function failed
       -2:  n_allele points to a degenerate array length - i.e. less than or 
	    equal to zero - function failed
       -3:  The number of occurences of a one or more allele code pairs was too 
	    large to be passed to alngam, which computes the logarithm of the 
	    gamma function of its argument, xvalue (see function declaration at
	    the very top of this file).  This code signifies the that argument 
	    xvalue was larger than 1.0e305.  Of course, this implies that you 
	    have more allele code pairs than there are subatomic particles in the 
	    known universe (current estimates are 1.0e79) - function failed.  
       -4:  The system is low on memory - function failed.
*/

static int hwe_stats_sim(int *a1,     	  int *a2, 		int *n, 
		         int *n_allele,  double *ex, 		double *u_score,
		         double *return_list,                   int **tbl)
{
  double ln_pobs = 0.0, stat_chi = 0.0, stat_rare = 0.0;
  double temp_ex = 0.0, temp_ob  = 0.0, temp_u_score = 0.0;
  double temp_rare_sum = 0.0, temp_gamma_sum =0.0, tbl_i_j = 0.0;
  double table_sum = 0.0, table_diag_sum = 0.0, h = 0.0;
  int alngam_return_code = 0;
  int i,j,k,row,col;

 
  if(*n <= 0)				/* Degenerate code or allele lengths   */
    return -1;
  if(*n_allele <= 0)
    return -2;

  for(i = 0; i < *n_allele; i++)	/* Zero out tbl since tbl will be used */
    for(j = 0; j < *n_allele; j++)	/* as an accumulator		       */
      tbl[i][j] = 0;

 					/* k's range is 0,1,...,n-1	       */
				        /* i.e. length of a1 and a2	       */
					/* We will store the number of pairs   */
					/* of the form (code_i, code_j) in     */
  for(k = 0; k < *n; k++)		/* table position tbl[i][j]	       */
    { row = a1[k] - 1;	col = a2[k] - 1;	
      tbl[row][col] = tbl[row][col] + 1;/* allele codes are 1 based, not 0     */
      }

				        /* Since (code_i, code_j) for fixed    */
					/* i and j is considered the same as   */
					/* (code_j, code_i), we will store     */
					/* the total number of (i,j) + (j,i)   */
					/* codes in tbl[i][j] where tbl is     */
  for(i = 0; i < *n_allele; i++)        /* upper triangular i.e. i <= j.       */		
    for(j = 0; j < i; j++)		
      tbl[j][i] = tbl[j][i] + tbl[i][j];

  /********************************compute stat_chi*****************************/  

  k = 0;				/* k's range is sum(1,2,...n_allele)   */
  for(j = 0; j < *n_allele; j++)
    for(i = 0; i <=j; i++)
      { 
	tbl_i_j = tbl[i][j];		

 
	/**************************computing stat_chi********************/
	temp_ex = ex[k];	temp_ob = tbl_i_j;
        stat_chi = stat_chi + ((temp_ob - temp_ex)*(temp_ob - temp_ex))/temp_ex;


	/**************************used for ln_pobs **********************/
	table_sum = table_sum + tbl_i_j;
	if(i == j)
	  table_diag_sum = table_diag_sum + tbl_i_j;


	/**************************used for stat_rare********************/
	temp_ob = tbl_i_j;
        temp_gamma_sum = temp_gamma_sum + 
	  (temp_ob > 0 ? alngam((temp_ob + 1) , &alngam_return_code) : 0 ); 
        if(alngam_return_code == 2)
	  return -3;			/* Argument for alngam too large -     */
					/* function failed		       */

	temp_rare_sum += u_score[k]*tbl_i_j;
        k++;
      }
 
  /********************************compute ln_pobs******************************/

  h = table_sum - table_diag_sum;    
  ln_pobs = h*log(2) - temp_gamma_sum;

  /********************************compute stat_rare****************************/

  temp_rare_sum *= temp_rare_sum;		
  stat_rare  = temp_rare_sum / (*n * (*n_allele - 1));

  return_list[0] = ln_pobs;		/* Return computed statistics via      */
  return_list[1] = stat_chi;		/* return_list			       */
  return_list[2] = stat_rare;
  
  return 0;

}

/*  SCCS     @(#)ranAS163.c	1.1    06/18/97     */
/*
     Algorithm AS 183 Appl. Statist. (1982) vol.31, no.2

     Returns a pseudo-random number rectangularly distributed
     between 0 and 1.   The cycle length is 6.95E+12 (See page 123
     of Applied Statistics (1984) vol.33), not as claimed in the
     original article.

     ix, iy and iz should be set to integer values between 1 and
     30000 before the first entry. To do this, 
     first call ranAS183_seed(iseed1,iseed2,iseed3), where iseed#
     are 3 int int seeds between 1 and 30000. The 3  seeds are
     saved, but ix,iy,iz can change.

    Translated from fortran to C.
*/

static int ix, iy, iz;

static int ranAS183_seed(int iseed1, int iseed2, int iseed3)
{
  int error;

  error=1;
  if( (iseed1 >=1 && iseed1 <=30000) && (iseed2 >=1 && iseed2 <=30000) && 
      (iseed3 >=1 && iseed3 <=30000)) error=0;
  if(error) return (error);
  ix = iseed1;
  iy = iseed2;
  iz = iseed3;
  return (error);
}

static double ranAS183()
{
   double u;

   ix = (171*ix) % 30269;
   iy = (172*iy) % 30307;
   iz = (170*iz) % 30323;
   u  = (double)ix/30269.0 + (double)iy/30307.0 + (double)iz/30323.0;
   return ( u - (int) u );
}

/*** This function computes the logarithm of the gamma function.
**** Algorithm AS 245 gives an accuracy of about
**** 10-12 significant decimal digits except for small regions around X = 1 and
**** X = 2, where the function goes to zero.
****
****     ALGORITHM AS245  APPL. STATIST. (1989) VOL. 38, NO. 2
****
***/  

#include <math.h>

static double alngam(double xvalue,int *ifault) {

      double x, x1, x2, y, lngam;
/*
**     Coefficients of rational functions
*/
     double R1[9] = {
          -2.66685511495, -24.4387534237,
          -21.9698958928,  11.1667541262,
     	   3.13060547623, 0.607771387771,
     	   11.9400905721,  31.4690115749,
     	   15.2346874070 };

     double R2[9] = {
           -78.3359299449, -142.046296688,
            137.519416416,  78.6994924154,
            4.16438922228,  47.0668766060,
            313.399215894,  263.505074721,
            43.3400022514 };

     double R3[9] = {
           -212159.572323,  230661.510616,
            27464.7644705, -40262.1119975,
           -2296.60729780, -116328.495004,
           -146025.937511, -24235.7409629,
           -570.691009324 };

     double R4[5] = {
           0.279195317918525,  0.4917317610505968,
           0.0692910599291889, 3.350343815022304,
           6.012459259764103 };

/*
**    Fixed constants
*/

#define ALR2PI 0.918938533204673
#define FOUR   4.0
#define HALF   0.5
#define ONE    1.0
#define ONEP5  1.5
#define TWELVE 12.0
#define ZERO   0.0

/*
**     Machine-dependant constants.
**     A table of values is given at the top of page 399 of the paper.
**     These values are for the IEEE double-precision format for which
**     B = 2, t = 53 and U = 1023 in the notation of the paper.
*/
#define XLGE  5.10e6 
#define XLGST 1.0e305

      x = xvalue;
      lngam = ZERO;
/*
**    Test for valid function argument
*/
      *ifault = 2;
      if (x >= XLGST) return lngam;
      *ifault = 1;
      if (x <= ZERO) return lngam;
      *ifault = 0;
/*
*     Calculation for 0 < X < 0.5 and 0.5 <= X < 1.5 combined
*/
      if (x < ONEP5) { 
	if (x < HALF) { 
	  lngam = -log(x);
	  y = x + ONE;
/*
*     Test whether X < machine epsilon
*/
	  if (y == ONE) return lngam;
        }
	else {
	  lngam = ZERO;
	  y = x;
	  x = (x - HALF) - HALF;
	}
	lngam = lngam + x * ((((R1[4]*y + R1[3])*y + R1[2])*y
                     + R1[1])*y + R1[0]) / ((((y + R1[8])*y + R1[7])*y
                     + R1[6])*y + R1[5]);
	return lngam;
      }
/*
*     Calculation for 1.5 <= X < 4.0
*/
      if (x < FOUR) {
	y = (x - ONE) - ONE;
	lngam = y * ((((R2[4]*x + R2[3])*x + R2[2])*x + R2[1])*x
                   + R2[0]) / ((((x + R2[8])*x + R2[7])*x + R2[6])*x
                   + R2[5]);
	return lngam;
      }
/*
*     Calculation for 4.0 <= X < 12.0
*/
      if (x < TWELVE) {
	lngam = ((((R3[4]*x + R3[3])*x + R3[2])*x + R3[1])*x + R3[0]) /
                 ((((x + R3[8])*x + R3[7])*x + R3[6])*x + R3[5]);
	return lngam;
      }
/*
*     Calculation for X >= 12.0
*/
      y = log(x);
      lngam = x * (y - ONE) - HALF * y + ALR2PI;
      if (x > XLGE) return lngam;
      x1 = ONE / x;
      x2 = x1 * x1;
      lngam = lngam + x1 * ((R4[2]*x2 + R4[1])*x2 + R4[0]) /
                   ((x2 + R4[4])*x2 + R4[3]);
      return lngam;
    }
