/************************************************************************
*  It scanns the PDB file  one line after one looking for meaningful fields
*
*  Global variables (BUFFER,...) are local to this file.
*  BUFFER contains only ATOM record lines.
*
*  Meaningful fields are:
*
*  > TER or MODEL = a control on the protein is done
*
*  > ATOM         = if first chain flag is met atom coordinates are stored 
*                   if a new residue is met residue is loaded
*
*  > HETATM       = a flag is put to 1
*
*  
------------------
Attention!!! Recent modification!!! Now (september 2001) this is a main file
***************************************************************************/

#include "pdb.h"



#define NROWMAX 10000
#define NCOLMAX 85
#define LMAX    2000  // Max length of chain

#define ATOM   "ATOM  "
#define HETATM "HETATM"
#define TER    "TER   "
#define ENDMDL "ENDMDL"
#define END    "END   "

#define PDB_DIR "/pipe1/databases/pdb/uncompressed_files/"
#define PDB_IN  "pdb"
#define PDB_SUF ".ent"


char Pdb_code[6];
char Pdb_file[100];
char Xyz_file[100];
char Fsa_file[100];
char Seq_file[100];
char Amino_file[100];
char All_file[100];

int  Log = 0;


/***************************************************
             MAIN
****************************************/

main(int argc, char * argv[])
{
  void getoptions(int argc, char *argv[]);
  void load_chain(struct chain * chain);
  void chain_assignment(struct pdb *pdb);
  void free_pdb ( pdb * pdb);

  struct pdb PDB;

  getoptions(argc, argv);
  chain_assignment(&PDB);
  
  printf("I'm going to read PDB file <%s>...\n", Pdb_file);
  load_chain(&PDB.chain);

  printf("I'm going to create C-beta atoms\n");
  generate_beta(&PDB.chain);

  if(Xyz_file[0]!='\0'){
    printf("Results xyz in <%s>\n", Xyz_file);
    write_basic( Xyz_file, &PDB.chain);  
  }
  if(Fsa_file[0]!='\0'){
    printf("Fasta sequence in <%s>\n", Fsa_file);
    write_fsa( Fsa_file, &PDB.chain);  
  }
  if(Seq_file[0]!='\0'){
    printf("Fasta sequence in <%s>\n", Seq_file);
    write_seq( Seq_file, &PDB.chain);  
  }
  if(Amino_file[0]!='\0'){
    printf("Amino acids in <%s>\n", Amino_file);
    write_amino( Amino_file, &PDB.chain);  
  }
  if(All_file[0]!='\0'){
    printf("All atoms acids in <%s>\n", All_file);
    write_all_atoms( All_file, &PDB.chain);  
  }
}

// -- get options -- //

void getoptions(int argc, char *argv[])
{
  Pdb_code[0]   = '\0';
  Pdb_file[0]   = '\0';
  Xyz_file[0]   = '\0';
  Fsa_file[0]   = '\0';
  Seq_file[0]   = '\0';
  Amino_file[0] = '\0';
  All_file[0]   = '\0';

  char Usage[1000];
  strcpy(Usage, "\nOptions:\n");
  strcat(Usage, "  -code  =  PDB code [mandatory]\n");
  strcat(Usage, "  -file  =  PDB file [not necessary]\n");
  strcat(Usage, "  -xyz   =  Output xyz file\n");
  strcat(Usage, "  -fsa   =  Output fasta\n");
  strcat(Usage, "  -seq   =  Output sequence\n");
  strcat(Usage, "  -amino =  Output amino file\n");
  strcat(Usage, "  -all   =  Output all atoms file\n");
  strcat(Usage, "  -log   =  Int [1,0=default]\n");

  for(int j = 1; j < argc; j++){
    if      ( strcmp("-code",  argv[j])==0) sscanf( argv[++j], "%s", Pdb_code);
    else if ( strcmp("-file",  argv[j])==0) sscanf( argv[++j], "%s", Pdb_file);
    else if ( strcmp("-xyz",   argv[j])==0) sscanf( argv[++j], "%s", Xyz_file);
    else if ( strcmp("-fsa",   argv[j])==0) sscanf( argv[++j], "%s", Fsa_file);
    else if ( strcmp("-seq",   argv[j])==0) sscanf( argv[++j], "%s", Seq_file);
    else if ( strcmp("-amino", argv[j])==0) sscanf( argv[++j], "%s", Amino_file);
    else if ( strcmp("-all",   argv[j])==0) sscanf( argv[++j], "%s", All_file);
    else if ( strcmp("-log",   argv[j])==0) sscanf( argv[++j], "%d", &Log);
    else{
      printf( "Argv[%d] not recognized: <%s>\n", j, argv[j]);
      printf("%s\n", Usage);
      exit(0);
    }
  }

  if( Pdb_code[0]==0 ){
    printf("Pdb code mandatory!\n");
    printf("%s\n", Usage);
    exit(0);
  }

  printf("code  = %s\n", Pdb_code);
  printf("pdb   = %s\n", Pdb_file);
  printf("xyz   = %s\n", Xyz_file);
  printf("fsa   = %s\n", Fsa_file);
  printf("seq   = %s\n", Seq_file);
  printf("amino = %s\n", Amino_file);
  printf("all   = %s\n", All_file);
}

//-- --//
// Modified!!!

void chain_assignment(struct pdb *pdb)
{
  char code[5];

  strcpy(pdb->chain.code5, Pdb_code);

  strncpy(code, Pdb_code, 4);
  code[4]='\0';

  if( Pdb_code[4]=='\0' )  
    pdb->chain.id = ' ';
  else                   
    pdb->chain.id = Pdb_code[4];

  if(Pdb_file[0]=='\0') 
    sprintf(Pdb_file, "%s%s%s%s", PDB_DIR, PDB_IN, code, PDB_SUF);

}
/*-- ------------------
  --  load chain   --
---------------------- */
void load_chain(struct chain * chain)
{
  int   read_buffer( char BUFFER[][NCOLMAX], char chain ); 
  int   eval_row_pointers( char BUFFER[][NCOLMAX], int r1[], int r2[], int n);
  void  load_residue( char BUFFER[][NCOLMAX], struct residue * res, int i_min, int i_max);

  char BUFFER[NROWMAX][NCOLMAX];
  int nrow;
  int row1[LMAX];  
  int row2[LMAX];

  printf( "\n\nPdb <%s>\n", Pdb_file);
 
  // Read buffer
  nrow = read_buffer(BUFFER, chain->id);
  if(nrow==0){
    printf("No lines read!\n");
    exit(0);
  }
  printf("nrow: %d\n", nrow);
  printf("\nI'm going to evaluate pointers\n");
  chain->l =  eval_row_pointers( BUFFER, row1, row2, nrow);

  chain->seq    = new char[ chain->l + 1 ];
  chain->seq[0] = '\0';
 
  chain->res = new residue[ chain->l ];
  
  // Loading atomic coordinates 
  for(int i=0; i<chain->l ; i++){
    chain->res[i].flag   = 0;
    load_residue(BUFFER, &chain->res[i], row1[i], row2[i]);
    chain->seq[i] = chain->res[i].ctype;
  }
  chain->seq[chain->l] = '\0';
 

  printf( "###\nLength = %d \n", chain->l);
  printf( "\nSequence : <%s>\n\n", chain->seq);
  printf( "END\n");
}


/*--------------------------------------------------------
    READ ATOMIC COORDINATES - STORE IN BUFFER

    return 0  = file not opened
    return -1 = error 
    return -2 = error
  ---------------------------------------------------------*/

int read_buffer( char BUFFER[NROWMAX][NCOLMAX], char chain)
{
  char buffer[NCOLMAX];

  FILE *fp = fopen_read (Pdb_file);

  int nrow=0;
  while( fgets(buffer, NCOLMAX, fp) ){
    if( chain == buffer[21] ){
      if( strncmp(buffer, ATOM, 6) == 0 && strncmp(buffer+17, "ACE", 3)!=0 ){
	strcpy(BUFFER[nrow], buffer);  //buffer -> BUFFER
	nrow++;
      }
      else if( strncmp(buffer, TER   , 3) == 0
	    || strncmp(buffer, ENDMDL, 6) == 0 
	    || strncmp(buffer, END   , 3) == 0 ){
	break;
      }
    }
    if(nrow >= (NROWMAX-5) ){
      printf("ERRORE BUFFER piccolo: %d %d\n", nrow, NROWMAX);
      exit(-1);
    }
  }
  fclose(fp);

  return nrow;
}
/*-------------------------------------------------
DETERMINE FIRST AND LAST A.A. LINES E PDB LENGTH
--------------------------------------------------*/

int eval_row_pointers( char BUFFER[][NCOLMAX], int row1[], int row2[], int nrow )
{
  int l = 0;

  for(int i = 1  ;  i < nrow;  i++){
    if( strncmp( BUFFER[i-1] +22, BUFFER[i] +22, 5) != 0){ 
      row1[l+1] = i;
      row2[l]   = i-1;
      l++;    
    }
  }
  row1[0] = 0;
  row2[l] = nrow - 1;
  l++;
  
  if(Log == 1){
    printf("\n---First rows:---\n\n");
    for( int i=0; i < l; i++){   
      printf( "%4d <%s>", row1[i]+1, BUFFER[row1[i]] );
      printf( "%4d <%s>", row2[i]+1, BUFFER[row2[i]]  );
    }
  }

  return l;
}


/*--------------------------------------------------
  ATOM RECORD FROM HERE 

  Initializes a new residue.
  All atomic flags are initialized to 0.
----------------------------------------------------*/

void load_residue( char BUFFER[][NCOLMAX], residue * res, int i_min, int i_max)
{
  void  read_atom( char * buffer, atom *atom);

  //Read sequence
  strncpy(res->stype, BUFFER[i_min]+17, 3);
  res->stype[3]='\0';

  res->ctype = conv_3lett_1lett(res->stype);

  //Read pdb serial number
  strncpy(res->num, BUFFER[i_min]+22, 4);
  res->num[4]='\0';


  res->flag = 1;

  res->N.flag  = 0;
  res->CA.flag = 0;
  res->C.flag  = 0;
  res->O.flag  = 0;
  res->CB.flag = 0;

  for (int i = i_min ; i<=i_max; i++){
    if      ( ! strncmp(BUFFER[i]+12, " N  ", 4) ) 
       read_atom(BUFFER[i], &res->N);
    else if ( ! strncmp(BUFFER[i]+12, " CA ", 4) )
      read_atom(BUFFER[i], &res->CA);
    else if ( ! strncmp(BUFFER[i]+12, " C  ", 4) )
      read_atom(BUFFER[i], &res->C);
    else if ( ! strncmp(BUFFER[i]+12, " O  ", 4) )
      read_atom(BUFFER[i], &res->O);
    else if ( ! strncmp(BUFFER[i]+12, " CB ", 4) )
      read_atom(BUFFER[i], &res->CB);
  }

  int natm=0;
  for (int i = i_min ; i<=i_max; i++){
    sscanf(BUFFER[i]+30, "%lf %lf %lf", &res->atm[natm][0], &res->atm[natm][1], &res->atm[natm][2]);
    strncpy(res->tatm[natm], BUFFER[i]+12, 4); 
    res->tatm[natm][4] = '\0';
    //printf("%s %f\n",res->tatm[natm], res->atm[natm][0]);
    natm++;
    if(natm >= NATM_PRES){
      printf("Increase Number of AToMs Per RESidue\n%s\n", BUFFER[i_min]);
      exit(0);
    }
  }
  res->natm = natm;
}

/*--------------------------------------------------------------------
  Reads atomic coordinates for backbone atmos and CB. 
  When coordinates are read atomic flag is put to 1.
-----------------------------------------------------------------*/

void read_atom(char * buffer, atom * a)
{
  if(sscanf(buffer+30, "%lf %lf %lf", &a->V[0], &a->V[1], &a->V[2]) !=3 )
    {
    printf("\n ERRORE lettura coordinate atomiche \n");
    printf("%s\n", buffer);
    return;
  }
  a->flag = 1;
}

/*---------------------------*/

void read_hetatm(char *file, struct pdb *pdb)
{
  char buffer[NCOLMAX];

  FILE *fp = fopen_read(file);
  for(int i=0; ; i++){
    fgets(buffer, NCOLMAX, fp);
    if( strncmp(buffer, HETATM, 6) == 0){
      if( strncmp(buffer+17, "HOH", 3) != 0) ;
      else{ 
	sscanf(buffer+30, "%lf %lf %lf");
      }
    }
  }
  fclose(fp);
}

/*VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV*/
