/*  SCCS     @(#)l2g.c	1.7    05/10/01     */
/*************************************************************************
*  Function:     l2g()  
*
*  Purpose:      Read data from .pre file for a specified marker 
*
*  Arguments:    dloc:      relative column position of disease locus
*                nliab:     Number of liability classes
*                mloc:      relative position of marker locus
*                *prefile:  pointer to the data file
*                *n_used:   number of valid cases for analysis
*                            
*  Returns:      pointer to matrix of data
*             
*************************************************************************/
#include <stdio.h>
#include <string.h>
#include "geno.h"

/* Added parameter markername it's needed for GASSOC version 1.06 */
long **l2g(int dloc, int nliab, int mloc,FILE *prefile,long *n_used,
           char *markername) {

#define MAXLINE 700         /***Max Line length in .pre data file  ***/
#define N_REQARGS 5         /***# of required args before loci cols***/

/****************************DECLARATIONS****************************/
long locate(long *xx,long n, long x);
long *lvector(long n);
long **lmatrix(long nr, long nc);   
void sort_lmatx(long **a,int first,int last,int sortcol,int ncol);

struct pedigree {
  int first;
  int last;
};
struct pedigree *pedlist;
char line[MAXLINE];
char *token;
int  dcol,m1_col,m2_col,maxarg,missing,first,pedcnt,pedsize,p,p_miss;
long nlines,ped,i,j,k,**workmatx,**datamatx,n_affect,n_miss,n_bad,curped,
     *person;
/********************************************************************/

/***COUNT # OF LINES IN INPUT FILE****/
/***Note: LINES LESS THAN 20 CHARACTERS WILL BE IGNORED***/
nlines=0;
while (fgets(line,MAXLINE,prefile)) { 
 if (strlen(line) > 14)
   nlines++;
} 
rewind(prefile);

/******ALLOCATE ARRAYS AND DATA MATRIX******/
workmatx=lmatrix(nlines,7);
datamatx=lmatrix(nlines,7);

/*****DETERMINE COLUMNS FOR DISEASE STATUS AND MARKER ALLELES******/
dcol=(2*dloc)-1;

if (mloc < dloc) {  
  m1_col=(2*mloc)-1;
  maxarg=dcol;
}

else {
  if (mloc > dloc) {
    if (nliab == 1)
      m1_col=(mloc-1)*2;
    else
      m1_col=(2*mloc)-1;
    maxarg=m1_col+1;
  }
  else {
    printf("mloc=dloc: execution terminated\n");
    exit(1);
  }
}
m2_col=m1_col+1;


/******READ INPUT FILE AND ASSIGN VALUES TO WORK MATRIX********/
i=0;
while (fgets(line,MAXLINE,prefile)) {
  if (strlen(line) > 14) {
    for (j=1;j<=(maxarg+N_REQARGS);j++) {
      if (j==1)
        token=strtok(line," \t");
      else
        token=strtok(NULL," \t");
      if (j<=4)
        workmatx[i][j-1]=atol(token); 
      if (j==(dcol+N_REQARGS))   workmatx[i][4]=atol(token);
      if (j==(m1_col+N_REQARGS)) workmatx[i][5]=atol(token);
      if (j==(m2_col+N_REQARGS)) workmatx[i][6]=atol(token);
    }
    i++;
  }
}

/*******SORT WORK MATRIX BY PEDIGREE*********/
sort_lmatx(workmatx,0,(nlines-1),0,7);

/*******COUNT PEDIGREES**********/
pedcnt=1;
curped=workmatx[0][0];
for (i=1;i<nlines;i++) { 
  if (workmatx[i][0]!=curped) {
    pedcnt++;
    curped=workmatx[i][0];
  }     
}

/******MARK FIRST AND LAST INDEX OF EACH PEDIGREE AND SORT BY PERSON******/
pedlist=(struct pedigree *)malloc((pedcnt)*sizeof(struct pedigree));
pedcnt=0;
pedlist[0].first=0;
curped=workmatx[0][0];
for (i=1;i<nlines;i++) {
  if (workmatx[i][0]!= curped) {
    pedlist[pedcnt].last=i-1;
    pedcnt++;
    pedlist[pedcnt].first=i;
    curped=workmatx[i][0];
  }
  else {
    if (i==(nlines-1)) {
      pedlist[pedcnt].last=i;
      pedcnt++;
    }
  }
}

for (i=0;i<pedcnt;i++)
  sort_lmatx(workmatx,pedlist[i].first,pedlist[i].last,1,7);
   
p_miss  =0;
n_affect=0;
n_miss  =0;
n_bad   =0;
*n_used =0;

for (p=0; p<pedcnt; p++) {

  pedsize=(pedlist[p].last-pedlist[p].first)+1;
  person=lvector(pedsize);
  for (i=0;i<pedsize;i++) {
    person[i]=workmatx[i+pedlist[p].first][1];
  }
  for (i=pedlist[p].first; i<=pedlist[p].last; i++) {

    /***KEEP ONLY DISEASE CASES***/
    if (workmatx[i][4] == 2) {

      n_affect++;

      if ((workmatx[i][2]==0) || (workmatx[i][3]==0)) {
        p_miss++;
        continue;
      }

      if ((workmatx[i][5]==0) || (workmatx[i][6]==0)) {
        n_miss++;
        continue;
      }

      /***SEARCH FOR FATHER ALLELES***/
      j=pedlist[p].first+locate(person,pedsize,workmatx[i][2]);
      if (j < 0) {
        p_miss++;
        continue;
      }

      if ((workmatx[j][5]==0) || (workmatx[j][6]==0)) {
        p_miss++;
        continue;
      }

      /***SEARCH FOR MOTHER ALLELES***/
      k=pedlist[p].first+locate(person,pedsize,workmatx[i][3]);
      if (k < 0) {
        p_miss++;
        continue;
      }

      if ((workmatx[k][5]==0) || (workmatx[k][6]==0)) {
        p_miss++;
        continue;
      }

      if ( ! (( ((workmatx[i][5]==workmatx[j][5]) ||
                (workmatx[i][5]==workmatx[j][6])) && 
               ((workmatx[i][6]==workmatx[k][5]) ||
                (workmatx[i][6]==workmatx[k][6]))  )  || 
             ( ((workmatx[i][5]==workmatx[k][5]) || 
                (workmatx[i][5]==workmatx[k][6])) && 
               ((workmatx[i][6]==workmatx[j][5]) ||
                (workmatx[i][6]==workmatx[j][6]))  )   )  ) { 
        printf("Pedigree %d, Person %d: genotype inconsistent with parents\n ",workmatx[i][0],workmatx[i][1]);
        n_bad++;
        continue;
      }

      /***VALID CASE: INCREASE COUNTER AND STORE IN DATA MATRIX***/
      datamatx[(*n_used)][0]=workmatx[i][1];
      datamatx[(*n_used)][1]=workmatx[j][5];
      datamatx[(*n_used)][2]=workmatx[j][6];
      datamatx[(*n_used)][3]=workmatx[k][5];
      datamatx[(*n_used)][4]=workmatx[k][6];
      datamatx[(*n_used)][5]=workmatx[i][5];
      datamatx[(*n_used)][6]=workmatx[i][6];


      (*n_used)++;
    }  /***END IF***/
  }    /***END FOR***/
  free(person);
}

  printf("\n\n==================================================================\n");
  /* New code below needed for GASSOC version 1.06 */
  if (strcmp(markername,"NONAME") == 0)
     printf("\nANALYSIS FOR Marker, Locus = %d\n",mloc);
  else
     printf("\nANALYSIS FOR Marker - %s, Locus =  %d\n",markername,mloc);
  /* New code above needed for GASSOC version 1.06 */
  printf("=======================\n");
  printf("\nSummary Info:\n");
  printf("\n   # of valid lines in input file: %d\n",nlines);
  printf("\n   # of affected cases: %d\n",n_affect);
  printf("   # of affected cases used in analysis:  %d\n",*n_used);
  printf("   # of affected cases not used: %d\n",n_affect-*n_used);
  printf("      # not used due to missing parent or missing parent alleles: %d\n",p_miss);
  printf("      # not used due to case missing alleles: %d\n",n_miss);
  printf("      # not used due to inconsistent parent/case alleles: %d\n",n_bad); 

  return(datamatx);
  

}


void sort_lmatx(long **a,int first,int last,int sortcol,int ncol) {

 void splitmat(long **a,int first,int last,int sortcol,int ncol,int *split_pt);

 int split_pt;
 if (first<last) {
   splitmat(a,first,last,sortcol,ncol,&split_pt);
   sort_lmatx(a,first,split_pt-1,sortcol,ncol);
   sort_lmatx(a,split_pt+1,last,sortcol,ncol);
 }
}

void splitmat(long **a,int first,int last,int sortcol,int ncol,int *split_pt)

{

  void swaprow(long *row1,long *row2,int ncol);

  int right,left;
  long v;

  v=a[first][sortcol];
  right=first+1;
  left=last;

  do {
    while ((right<=left) && (a[right][sortcol]<=v)) {
      right=right+1;
    }
    while ((right<=left) && (a[left][sortcol]>v)) {
      left=left-1;
    }
    if (right<=left) {
      swaprow(a[right],a[left],ncol);
      right=right+1;
      left=left-1;
    }
  }  while (right <= left);
  swaprow(a[first],a[left],ncol);
  *split_pt=left;
}


void swaprow(long *row1,long *row2,int ncol) {
  long *lvector(long n);
  long *temp;
  int i;

  temp=lvector(ncol);
  for (i=0;i<ncol;i++) {
    temp[i]=row1[i];
    row1[i]=row2[i];
    row2[i]=temp[i];
  }
}






	      




	 








