/*  SCCS     @(#)gassoc.c	1.23    06/04/01     */
/*
** COPYRIGHT (c) 1995 Mayo Clinic.
**
** File: gassoc.c
** Authors: Jon Kosanke/Charley Rowland
**     
** History:
** ----------------------------------------------------------------------------
** 12/04/00  David Tines       Added gen as a valid command line argument.   
**                             Added compute_gen_or_tdt function. 
**
*/

#include <stdio.h>
#include <math.h>
#include <string.h>
#include "cline.h"      /***Function to parse command line arguments***/
#include "geno.h"       /***Definition of genotype structure***/
#include "dlistheader.h"


#define VERSION 1.06



main( int argc, char *argv[]) {

  /****************************DECLARATIONS***********************************/
  long   **l2g(int dloc, int nliab, int mloc,FILE *prefile,long *n_used,
               char *markername);
  double gammad(double x,double p,int *ifault);
  double alnorm(double x,int upper);
  long   *lvector(long n);
  double *dvector(long n);
  double **dmatrix(long nr, long nc);
  long   **lmatrix(long nr, long nc);   
  void   qksort(long *a,int first,int last);
  long   err_check(FILE *in,long *n_cases,long *n_pmiss,long *n_inc,
                   long *n_blank);
  int    ranAS183_seed(int iseed1, int iseed2, int iseed3);

  double log_likely(double *beta,long **x,long **x1,
                    long **x2,long **x3,long **x4,
                    long n_cases,long n_alleles);

  int max_like(long n_cases,long n_alleles,long maxiter,int iter,double *beta,
               long **x,long **x1,long **x2,long **x3,long **x4,
               double **v,double *logptr);

  void  score(double *score_tdt,double *score_dom,double *score_rec,
              double *pscore_tdt,double *pscore_dom,double *pscore_rec,
              int *rank_tdt,int *rank_dom,int *rank_rec,              
              long n_cases,long n_alleles,long **x,long **x1,long **x2,
              long **x3,long **x4);  

  void simsetup(double *betasim, long n_alleles,struct geno *indata,
              long **x1,long **x2,long **x3,long **x4,long n_cases, 
              double **psim,double **v_tdt , double *mtot_tdt,
              double **v_dom,double *mtot_dom,double **v_rec,double *mtot_rec);
            
  void simulate(double **psim,long n_alleles,long n_cases,
                long **x1,long **x2,long **x3,long **x4,
                double *mtot_tdt,double **v_tdt,double *simscore_tdt,
                double *mtot_dom,double **v_dom,double *simscore_dom,
                double *mtot_rec,double **v_rec,double *simscore_rec);

  long getindex(long j,long baseline);
  double *int_input(FILE **in,int *dloc,int *nliab,int *mstart,int *mstop,
                    int *iter,long *maxiter,long *sim,int *seeda,int *seedb,
                    int *seedc,int *simout,char *outfilename,int *bsim,
                    int *ofile,char *outresults,char *grrmarker, 
                    long *grrallele, DList *word_list);

  void readpar(FILE *par,int *dloc,int *nliab,int *mstart,int *mstop,
               DList *word_list);
/* New functions below needed for GASSOC version 1.06 */
  void tdtcode(long **x, long i, int baseline, int c1code, int c2code);

  void gencode(int allele, int a1, int a2, int *x);

  void compute_gen_or_tdt(char *word, struct geno *indata, int n_cases, 
                          int n_alleles, long ***x, long ***x1, long ***x2, 
                          long ***x3, long ***x4, long alleleofinterest, 
                          double *beta, long *unique_alleles, long grrallele,
                          long maxiter, int iter);
/* New functions above needed for GASSOC version 1.06 */

  DList word_list; 
  DListElmt *element;

  FILE *in,*out_score,*par;
  char infilename[25]="in.pre";
  char parfile[25]="in.par";
  char score_fname[25],outresults[25];
  char simdef[] = "sim.out";
  char line[79];
  char *compute_type,*markername,*grrmarkerfound,*grrallelefound,
       *optionalgrrparams; /* New params for GASSOC version 1.06 */
  char grrmarker[25]="NONE"; /* New params for GASSOC version 1.06 */

  struct geno *indata;
  

  long   **x,**x1,**x2,**x3,**x4,**datamatx,
         *all_alleles,*unique_alleles,*acounts,
         a,b,c,d,e,f,grrallele,genallelecode,
         n_cases,n_alleles,n_unique,nbetas,
         i,j,k,
         n_rejtdt,n_rejdom,n_rejrec,
         sim,maxiter,simiter,marker,max,baseline,index;

  double **v,**vprint,**vsim_tdt,**vsim_dom,**vsim_rec,**psim,
         *beta,*betasim,*mtot_tdt,*mtot_dom,*mtot_rec,
         lnlold,lnl,lr_stat,lr_p,
         score_tdt,score_dom,score_rec,pscore_tdt,pscore_dom,pscore_rec,
         simscore_tdt,simscore_dom,simscore_rec,sim_ptdt,sim_pdom,sim_prec;

  int  converge=0,rank_v,rank_tdt,rank_dom,rank_rec,iter,simout,bsim,bpos,
       seeda,seedb,seedc,ifault,pmiss,ex_flag,dloc,nliab,mstart,mstop,ofile,
       size;
  /*************************END DECLARATIONS**********************************/

  /****Default settings for input options*****/
  dloc   = 1;                 /**position of disease locus relative to marker loci**/
  nliab  = 1;                 /**number of liability classes**/
  mstart = 2;                 /**position of first marker locus to analyze**/
  mstop  = 2;                 /**position of last marker to analyze**/
  iter   = 0;                 /**print iteration history**/
  maxiter=25;                 /**maximum number of iterations**/
  sim    = 0;                 /**simulate distribution of score statistic (sim# of times)**/
  seeda  =-1;                 /**seed1 for random number generator**/
  seedb  =-1;                 /**seed2 for random number generator**/
  seedc  =-1;                 /**seed3 for random number generator**/
  simout = 0;                 /**print distribution of score statistic to file <score_fname>**/
  bsim   = 0;                 /**specify betas other than null hypothesis (B=0)**/  
  /* New code below for GASSOC version 1.06 */
  grrallele = 0;

  
  compute_type = "TDT";
  markername = "NONAME";
  grrmarkerfound = "FALSE"; 
  grrallelefound = "FALSE";
  optionalgrrparams = "FALSE";

  dlist_init(&word_list, free);    
  /* New code above for GASSOC version 1.06 */
  /***IF NO COMMAND LINE ARGUMENTS ==> INTERACTIVE INPUT***/
  if (argc==1) {
    betasim=int_input(&in,&dloc,&nliab,&mstart,&mstop,&iter,&maxiter,&sim,
                      &seeda,&seedb,&seedc,&simout,score_fname,&bsim,&ofile,
                      outresults,grrmarker,&grrallele,&word_list);

    if (ofile==1) {
      freopen(outresults,"w",stdout);
    }
  }
  /***OTHERWISE READ ANY COMMAND LINE ARGUMENTS***/
  else {

    if (get_flag(argc,argv,"h",0,&i) > 0) {
      printf("\nUsage: \t%s\n",argv[0]);
      printf("\n  [-ith]               : print iteration history(default=No)");
      printf("\n  [-maxiter#]          : # = maximum iterations for convergence(default=25)");
      printf("\n  [-inf<infile>]       : infile is input data file name (default=in.pre)");
      printf("\n  [-par<parfile]       : input parameter file (default=in.par)");
      printf("\n  [-sim#]              : # = iterations for simulation loop (default=0)");
      printf("\n  [-seeda#]            : # = seed1 for random no. generator (required)");
      printf("\n  [-seedb#]            : # = seed2 for random no. generator (required)");
      printf("\n  [-seedc#]            : # = seed3 for random no. generator (required)");
      /* New code below for GASSOC version 1.06 */
      printf("\n  [-grrmarker<marker>] : label for desired marker that matches the marker in the parameter file");
      printf("\n  [-grr<allele>]       : \"high risk\" allele of interest for GRR analysis");
      /* New code above for GASSOC version 1.06 */
      printf("\n");
      exit(1);
    } 

    get_flag(argc,argv,"inf",4,infilename);
    in=fopen(infilename,"r");
    if (in == NULL) {
      printf("Input data file not found\n");
      exit(1);
    }

    get_flag(argc,argv,"par",4,parfile);
    par=fopen(parfile,"r");
    if (par == NULL) {
      printf("Input parameter file not found\n");
      exit(1);
    }
    readpar(par,&dloc,&nliab,&mstart,&mstop,&word_list); 

    get_flag(argc,argv,"ith",0,&iter);      
    get_flag(argc,argv,"maxiter",5,&maxiter);
    get_flag(argc,argv,"sim",5,&sim);
    get_flag(argc,argv,"seeda",1,&seeda);
    get_flag(argc,argv,"seedb",1,&seedb);
    get_flag(argc,argv,"seedc",1,&seedc);
     /* New code below for GASSOC version 1.06 */
    get_flag(argc,argv,"grrmarker",4,grrmarker);
    get_flag(argc,argv,"grr",1,&grrallele);
     /* New code above for GASSOC version 1.06 */
   
    
    simout=0;
    bsim=0;

  }
  /***************************SET UP DATA*************************************/
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
  printf("*************************************************\n");
  printf("*    gassoc Version %4.2f\n",VERSION);
  printf("*************************************************\n\n");

  if (sim>0) {
    if (ranAS183_seed(seeda,seedb,seedc) == 1) {
      printf("Error: seed(s) for random number generator incorrect or not found\n");
      printf("3 integer seeds between 1 and 30000 are required for simulation\n");
      printf("For help: gassoc -h\n");
      exit(1);
    }
  }
  /* New code below for GASSOC version 1.06 */
  if ((grrallele > 0) || (strcmp("NONE",grrmarker) != 0)){
     optionalgrrparams = "TRUE";
     element=dlist_head(&word_list);
   }
  if (strcmp("TRUE",optionalgrrparams)==0){
     while (1) {
        if (strcmp(element->key_word,grrmarker)==0)
           grrmarkerfound = "TRUE";
        if (dlist_is_tail(element)){
           break;}
        else{
           element=dlist_next(element);
	 }
     }  
  }

  if (strcmp("TRUE",optionalgrrparams)==0){
     if (strcmp("FALSE",grrmarkerfound)==0){
        if (strcmp("NONE",grrmarker)==0)
           printf("ERROR: -grrmarker flag isn't defined on the command line\n");
        else   
           printf("ERROR: Marker %s isn't found in -par file %s\n",grrmarker,parfile);
        exit(1);
      }
  }

  if (strcmp("TRUE",optionalgrrparams)==0) {
     element=dlist_head(&word_list);

     if (mstart < dloc) {
        for (i=1; i<mstart; i++)
           element=dlist_next(element);
      }
      else {
         for (i=(dloc+1); i<mstart; i++)
           element=dlist_next(element);
       }
   }

  /***READ DATA AND LOOP THROUGH MARKER ALLELES***/
  for (marker=mstart; marker<=mstop; marker++) {
    if (strcmp("TRUE",optionalgrrparams)==0){
       if (strcmp(element->key_word,grrmarker)!=0){ 
          element = dlist_next(element);
          continue;
        }
    }

    if (marker!=dloc) {
      if (strcmp("TRUE",optionalgrrparams)==0){
         if ((grrallele > 0) && (strcmp(element->key_word,grrmarker)==0)){
            markername = grrmarker;
          }
      }
    /* New code above for GASSOC version 1.06 */

      datamatx=l2g(dloc,nliab,marker,in,&n_cases,markername);
      /***RESET POINTER TO BEGINNING OF DATA FILE***/
      rewind(in);
      /***ALLOCATE MEMORY FOR GENO STRUCTURE AND VECTOR OF ALL ALLELES***/
      indata=(struct geno *)malloc((n_cases)*sizeof(struct geno));
      all_alleles=lvector((n_cases)*4);  


      /***READ CASES AND ASSIGN ALLELES TO CHILD, MOTHER, FATHER***/
      /***KEEP ARRAY OF ALL ALLELES***/
      for (i=0; i<(n_cases); i++) {
        indata[i].c1=datamatx[i][5];
        indata[i].c2=datamatx[i][6];
        indata[i].m1=datamatx[i][3];
        indata[i].m2=datamatx[i][4];
        indata[i].f1=datamatx[i][1];
        indata[i].f2=datamatx[i][2];
        all_alleles[4*i]=datamatx[i][1];
        all_alleles[4*i+1]=datamatx[i][2]; 
        all_alleles[4*i+2]=datamatx[i][3];
        all_alleles[4*i+3]=datamatx[i][4];
      }
      /***SORT ALL_ALLELES ARRAY BY ALLELE TYPE***/  
      qksort(all_alleles,0,4*n_cases-1); 

      /***COUNT THE NUMBER OF UNIQUE ALLELES***/
      n_unique=1;
      for (i=1; i<4*n_cases; i++) {
        if (all_alleles[i]!=all_alleles[i-1]) n_unique++;
      }
      /***CREATE AN ARRAY OF ONLY THE UNIQUE ALLELES***/
      /***AND COUNT # OF EACH TYPE OF ALLELE        ***/
      acounts=lvector(n_unique);
      for (i=0; i<n_unique; i++) {
        if (i==0)
          acounts[i]=1;
        else
          acounts[i]=0;
      }
      unique_alleles=lvector(n_unique);
      unique_alleles[0]=all_alleles[0];
      n_unique=1;
      for (i=1; i<4*n_cases; i++) {
        if (all_alleles[i]!=all_alleles[i-1]) {
          acounts[n_unique]++;
          unique_alleles[n_unique]=all_alleles[i];
          n_unique++;   
        }
        else 
          acounts[n_unique-1]++;
      }

      /***DETERMINE BASELINE ALLELE (MOST FREQUENT ALLELE)***/
      max=0;
      baseline=0;
      for (i=0; i<n_unique; i++) {
        if (acounts[i] > max) {
          max=acounts[i];
          baseline=i;
        }
      }

      /* New code below for GASSOC version 1.06 */     
      if (grrallele > 0) {
        for (j=0; j<n_unique; j++) {
          if (grrallele==unique_alleles[j]){
             genallelecode=j;
             grrallelefound = "TRUE";
	   }
        }
      }

      if (strcmp("TRUE",optionalgrrparams)==0){
         if ((grrallele > 0) && (strcmp(element->key_word,grrmarker)==0))
            compute_type="GRR";
      }

      if (strcmp("TRUE",optionalgrrparams)==0){
         if (strcmp("FALSE",grrallelefound)==0){
            if (grrallele==0)
               printf("ERROR: -grr flag isn't defined on the command line\n");
            else   
               printf("ERROR: Allele %d isn't found in -par file %s\n",grrallele,parfile);
	  }
      }


      if (strcmp("TRUE",optionalgrrparams)==0){
         if ((strcmp("FALSE",grrallelefound)==0) || (strcmp("FALSE",grrmarkerfound)==0) )
            exit(1);
      }
      /* New code above for GASSOC version 1.06 */

      /**ASSIGN UNIQUE ALLELE CODE TO EACH ALLELE**/ 
      for (i=0; i<n_cases; i++) {
        for (j=0; j<n_unique; j++) {
          if (indata[i].c1==unique_alleles[j]) indata[i].c1code=j; 
          if (indata[i].c2==unique_alleles[j]) indata[i].c2code=j;
          if (indata[i].m1==unique_alleles[j]) indata[i].m1code=j;
          if (indata[i].m2==unique_alleles[j]) indata[i].m2code=j;
          if (indata[i].f1==unique_alleles[j]) indata[i].f1code=j;
          if (indata[i].f2==unique_alleles[j]) indata[i].f2code=j;
        }
      }

      /* New code below for GASSOC version 1.06 */
      if (strcmp(compute_type,"GRR") == 0){
         n_alleles=2;    

         compute_gen_or_tdt(compute_type, indata, n_cases, n_alleles,
                            &x, &x1, &x2, &x3, &x4,genallelecode, beta, 
                            unique_alleles,grrallele,maxiter,iter);
         compute_type="TDT";
       } 

      n_alleles=n_unique-1;

      compute_gen_or_tdt(compute_type, indata, n_cases, n_alleles,
                         &x, &x1, &x2, &x3, &x4, baseline, beta, 
                         unique_alleles,grrallele,maxiter,iter);


      /* New code above for GASSOC version 1.06 */

      if (strcmp(compute_type,"TDT") == 0){
      /***COMPUTE SCORE STATISTICS***/
      score(&score_tdt,&score_dom,&score_rec,&pscore_tdt,&pscore_dom,&pscore_rec,
            &rank_tdt,&rank_dom,&rank_rec,n_cases,n_alleles,x,x1,x2,x3,x4);
      

      /***SIMULATION OPTION***/
      if (sim > 0) {

        /***ALLOCATE NEEDED VECTORS AND MATRICES TO BE USED FOR SIMULATION***/ 
        nbetas=(n_alleles+((n_alleles+1)*n_alleles)/2);
        if (bsim<=0) {
          betasim=dvector(nbetas);
          for (i=0;i<nbetas;i++) {
            betasim[i]=0;
          }
        }
        else {
          if (bsim!=nbetas) {
            printf("Error: Number of alleles specified with bsim does not match data");
            exit(1);
          }
        }

        psim=dmatrix(n_cases,4);

        mtot_tdt=dvector(n_alleles);
        mtot_dom=dvector(n_alleles);
        mtot_rec=dvector(n_alleles);

        vsim_tdt=dmatrix(n_alleles,n_alleles);
        vsim_dom=dmatrix(n_alleles,n_alleles);
        vsim_rec=dmatrix(n_alleles,n_alleles);
   
        simsetup(betasim,n_alleles,indata,x1,x2,x3,x4,n_cases,psim,
                 vsim_tdt,mtot_tdt,vsim_dom,mtot_dom,vsim_rec,mtot_rec);

        if (simout) {
          if (score_fname[0]) 
            out_score=fopen(score_fname,"w");
          else
            out_score=fopen(simdef,"w");
        }

        n_rejtdt=n_rejdom=n_rejrec=0;
        for (simiter=1; simiter<=sim; simiter++) {

          simulate(psim,n_alleles,n_cases,
                   x1,x2,x3,x4,
                   mtot_tdt,vsim_tdt,&simscore_tdt,
                   mtot_dom,vsim_dom,&simscore_dom,
                   mtot_rec,vsim_rec,&simscore_rec);

          if (simscore_tdt >= score_tdt) 
            n_rejtdt++;
          if (simscore_dom >= score_dom)
            n_rejdom++;
          if (simscore_rec >= score_rec)
            n_rejrec++;
          if (simout)
            fprintf(out_score,"%15.4f\t%15.4f\t%15.4f\n",simscore_tdt,simscore_dom,
                    simscore_rec);
        }

        if (simout)
        fclose(out_score);
        sim_ptdt=(double)n_rejtdt/(double)sim;
        sim_pdom=(double)n_rejdom/(double)sim;
        sim_prec=(double)n_rejrec/(double)sim;
      }

      /****DISPLAY FINAL OUTPUT****/

      
      printf("\nScore Statistics:\n");
      if (sim==0) {
        /***PRINT SCORE STATISTIC AND PVALUE***/
          printf("\n                 Score      df   P-value\n");
          if (rank_tdt > 0)
            printf("  GTDT: %15.4f      %d   %11.9f\n",score_tdt,rank_tdt,pscore_tdt);
          else
            printf("  GTDT: ***Unable to compute***\n");
          if (rank_dom > 0)
            printf("  GDOM: %15.4f      %d   %11.9f\n",score_dom,rank_dom,pscore_dom);
          else
            printf("  GDOM: ***Unable to compute***\n");
          if (rank_rec > 0)
            printf("  GREC: %15.4f      %d   %11.9f\n",score_rec,rank_rec,pscore_rec);
          else
            printf("  GREC: ***Unable to compute***\n");
      }

      else {
        /***PRINT SCORE STATISTIC AND PVALUE***/
        printf("\n                 Score      df   P-value          Sim P-value(Simulations=%d)\n",sim);
        if (rank_tdt > 0)
          printf("  GTDT: %15.4f      %d   %11.9f      %11.9f\n",score_tdt,rank_tdt,
                 pscore_tdt,sim_ptdt);
        else
          printf("  GTDT: ***Unable to compute***\n");
        if (rank_dom > 0)
          printf("  GDOM: %15.4f      %d   %11.9f      %11.9f\n",score_dom,rank_dom,
                 pscore_dom,sim_pdom);
        else
          printf("  GDOM: ***Unable to compute***\n");
        if (rank_rec > 0)
          printf("  GREC: %15.4f      %d   %11.9f      %11.9f\n",score_rec,rank_rec,
                 pscore_rec,sim_prec);  
       else
          printf("  GREC: ***Unable to compute***\n");
       printf("\nNote: Seeds used for random# generation were %d, %d, %d\n",seeda,seedb,seedc);
      }
    }
   }
   if (strcmp("TRUE",optionalgrrparams)==0){
      if (strcmp(element->key_word,grrmarker)==0)
          exit(1);
   }
   markername = "NONAME";
  }
  exit(1);
}
  
























