      program gener_net 

C***********************************************************************
C                                   
C     Analisis of Sequences and Aminoacid Probabilities (ASAP)
C     
C     Program GENER-NET to generate database on the NET
C     Get ready to update files in INTERNET
C     Marc A. Marti-Renom, last revision: April'99
C                                   
C***********************************************************************

C********************* Variable definition *****************************

      implicit real*8 (a-h,o-z)
      
      character fastaseq*2500,label*70,seq*200,aa*1,process*10
      character file*300,line*1000,header*80,footer*80
      character output1*80,output2*80,output3*80,output4*80
      character o_htxt*80,o_ltxt*80,link_htxt*80,link_ltxt*80
      character o_1aa*80,o_2aa*80,o_3aa*80,o_4aa*80
      character link_1aa*80,link_2aa*80,link_3aa*80,link_4aa*80
      character alfile4*38
      character aleatseq*5,testseq*5
      character case*27,color*7

      integer naa,error,lfastaseq,lfile,lline,lprocess
      integer total_al,indev
      
      parameter (min=4,maxprint=8,maxaa=23,maxseq=8001,max=160001)
      
      dimension label(maxseq),fastaseq(maxseq),lfastaseq(maxseq)
      dimension naa(maxaa),aa(maxaa),per_aa(maxaa)
      dimension aleatseq(max),score(min,max),calc_prob(min,max)
      dimension sdev(min),sdevdev(min),avedev(min),sd_dev(min)
      dimension indev(min,maxprint)
      
C********************* Initializing ************************************

      do i=1,8000
         do j=1,2500
            fastaseq(i)(j:j) = ' '
         enddo
         lfastaseq(i) = 0
      enddo
      do i=1,min
	 naa(i) = 0
	 per_aa(i) = 0.0
	 sdev(i) = 0.0
	 sdevdev(i) = 0.0
	 avedev(i) = 0.0
	 sd_dev(i) = 0.0
	 do j=1,maxprint
	    indev(i,j) = 0
         enddo
         do n=1,max
            calc_prob(i,n) = 1.0
         enddo
      enddo

      aa(1) = 'A'
      aa(2) = 'B'
      aa(3) = 'C'
      aa(4) = 'D'
      aa(5) = 'E'
      aa(6) = 'F'
      aa(7) = 'G'
      aa(8) = 'H'
      aa(9) = 'I'
      aa(10) = 'K'
      aa(11) = 'L'
      aa(12) = 'M'
      aa(13) = 'N'
      aa(14) = 'P'
      aa(15) = 'Q'
      aa(16) = 'R'
      aa(17) = 'S'
      aa(18) = 'T'
      aa(19) = 'V'
      aa(20) = 'W'
      aa(21) = 'Y'
      aa(22) = 'Z'
      total_res = 0.0
      column = 0.0
      percent = 0.0
      k = 0
      i = 0

C************************** I/O files ***********************************

      read(5,'(Q,A)')lprocess,process
      output1 = '../GenerNet/tmp/'//process(1:lprocess)//'1aa_comp.txt'
      output2 = '../GenerNet/tmp/'//process(1:lprocess)//'2aa_comp.txt'
      output3 = '../GenerNet/tmp/'//process(1:lprocess)//'3aa_comp.txt'
      output4 = '../GenerNet/tmp/'//process(1:lprocess)//'4aa_comp.txt'
      o_htxt  = '../GenerNet/tmp/'//process(1:lprocess)//'s99_hseq.txt'
      o_ltxt  = '../GenerNet/tmp/'//process(1:lprocess)//'s99_lseq.txt'
      header  = '../GenerNet/xtras/header.txt'
      footer  = '../GenerNet/xtras/footer.txt'
      alfile4 = '../GenerNet/xtras/aleat4.seq'
      
      open(unit=4,file=alfile4,status='old')
      
      open(unit=7,file=header,status='old')
      open(unit=8,file=footer,status='old')
      
      open(unit=11,file=output1,status='unknow')
      open(unit=12,file=output2,status='unknow')
      open(unit=13,file=output3,status='unknow')
      open(unit=14,file=output4,status='unknow')
      open(unit=21,file=o_htxt,status='unknow')
      open(unit=22,file=o_ltxt,status='unknow')

C---- Reajust to txt output links      
      o_htxt = process(1:lprocess)//'s99_hseq.txt'
      o_ltxt = process(1:lprocess)//'s99_lseq.txt'
      o_1aa  = process(1:lprocess)//'1aa_comp.txt'
      o_2aa  = process(1:lprocess)//'2aa_comp.txt'
      o_3aa  = process(1:lprocess)//'3aa_comp.txt'
      o_4aa  = process(1:lprocess)//'4aa_comp.txt'
      do j=1,80
	 if (o_htxt(j:j) .eq. ' ') then
	    link_htxt = o_htxt(1:j-1)
	    link_ltxt = o_ltxt(1:j-1)
	    link_1aa  = o_1aa(1:j-1)
	    link_2aa  = o_2aa(1:j-1)
	    link_3aa  = o_3aa(1:j-1)
	    link_4aa  = o_4aa(1:j-1)
         endif
      enddo

C************************** Read FASTA files ****************************

C---- Read inputs (file name, and FASTA sequences)
      read(5,'(Q,A)')lfile,file
      do while (.true.)
190      read(5,'(A)',END=200,ERR=1000)seq
	 if(seq(1:1) .eq. ">") then
	    k = k + 1
	    jj = 0
	    label(k) = seq
         else
            do j=1,400
	       if (seq(j:j) .ne. " ") then
		  jj = jj + 1
		  fastaseq(k)(jj:jj) = seq(j:j)
		  lfastaseq(K) = lfastaseq(K) + 1
               else
		  goto 190
	       endif
            enddo
         endif
      enddo
200   continue
      total_seq = k
C      write(6,*)'Content-Type: text/html\n\n'
C      write(6,*)'<pre>DBG>Readed',total_seq,' sequences'


C---- Start calculating one residue composition
      do k=1,total_seq
C      write(6,*)'DBG> Working in sequence:',label(k)
C      write(6,*)'DBG> lsequence:',lfastaseq(K)
C      write(6,*)'DBG> sequence:',fastaseq(K)
         do i=1,lfastaseq(k)
C      write(6,*)' AA:',fastaseq(k)(i:i)
               if (fastaseq(k)(i:i) .eq. 'A') naa(1)  = naa(1) + 1
	       if (fastaseq(k)(i:i) .eq. 'B') naa(2)  = naa(2) + 1
	       if (fastaseq(k)(i:i) .eq. 'C') naa(3)  = naa(3) + 1
	       if (fastaseq(k)(i:i) .eq. 'D') naa(4)  = naa(4) + 1
	       if (fastaseq(k)(i:i) .eq. 'E') naa(5)  = naa(5) + 1
	       if (fastaseq(k)(i:i) .eq. 'F') naa(6)  = naa(6) + 1
	       if (fastaseq(k)(i:i) .eq. 'G') naa(7)  = naa(7) + 1
	       if (fastaseq(k)(i:i) .eq. 'H') naa(8)  = naa(8) + 1
	       if (fastaseq(k)(i:i) .eq. 'I') naa(9)  = naa(9) + 1
	       if (fastaseq(k)(i:i) .eq. 'K') naa(10) = naa(10) + 1
	       if (fastaseq(k)(i:i) .eq. 'L') naa(11) = naa(11) + 1
	       if (fastaseq(k)(i:i) .eq. 'M') naa(12) = naa(12) + 1
	       if (fastaseq(k)(i:i) .eq. 'N') naa(13) = naa(13) + 1
	       if (fastaseq(k)(i:i) .eq. 'P') naa(14) = naa(14) + 1
	       if (fastaseq(k)(i:i) .eq. 'Q') naa(15) = naa(15) + 1
	       if (fastaseq(k)(i:i) .eq. 'R') naa(16) = naa(16) + 1
	       if (fastaseq(k)(i:i) .eq. 'S') naa(17) = naa(17) + 1
	       if (fastaseq(k)(i:i) .eq. 'T') naa(18) = naa(18) + 1
	       if (fastaseq(k)(i:i) .eq. 'V') naa(19) = naa(19) + 1
	       if (fastaseq(k)(i:i) .eq. 'W') naa(20) = naa(20) + 1
	       if (fastaseq(k)(i:i) .eq. 'Y') naa(21) = naa(21) + 1
	       if (fastaseq(k)(i:i) .eq. 'Z') naa(22) = naa(22) + 1
	       if (fastaseq(k)(i:i) .eq. '/') error = error + 1
         enddo
         total_res = total_res + lfastaseq(k) - error
C      write(6,*)' lfastaseq(k) & error:',lfastaseq(k),error
C      write(6,*)' total_res:',total_res
         error = 0
      enddo
210   continue
      do i=1,22
	 per_aa(i) = naa(i)/total_res
         total_per = total_per + per_aa(i)
      enddo

C*********** Calculate TWO, THREE, FOUR and FIVE aa distribution ******

C---- Read aleatory sequences 
      do n=1,160000
         read(4,'(A4)')aleatseq(n)
      enddo

C---- Start calculating 2, 3 and 4 AA composition
      do k=1,total_seq
         do i=1,(lfastaseq(k) - 4)
            do n=2,4
               testseq = fastaseq(k)(i:(n-1)+i)
               number = 0
               do j=1,n
                  if (testseq(j:j) .eq. 'A') num_aa = 0
                  if (testseq(j:j) .eq. 'C') num_aa = 1
                  if (testseq(j:j) .eq. 'D') num_aa = 2
                  if (testseq(j:j) .eq. 'E') num_aa = 3
                  if (testseq(j:j) .eq. 'F') num_aa = 4
                  if (testseq(j:j) .eq. 'G') num_aa = 5
                  if (testseq(j:j) .eq. 'H') num_aa = 6
                  if (testseq(j:j) .eq. 'I') num_aa = 7
                  if (testseq(j:j) .eq. 'K') num_aa = 8
                  if (testseq(j:j) .eq. 'L') num_aa = 9
                  if (testseq(j:j) .eq. 'M') num_aa = 10
                  if (testseq(j:j) .eq. 'N') num_aa = 11
                  if (testseq(j:j) .eq. 'P') num_aa = 12
                  if (testseq(j:j) .eq. 'Q') num_aa = 13
                  if (testseq(j:j) .eq. 'R') num_aa = 14
                  if (testseq(j:j) .eq. 'S') num_aa = 15
                  if (testseq(j:j) .eq. 'T') num_aa = 16
                  if (testseq(j:j) .eq. 'V') num_aa = 17
                  if (testseq(j:j) .eq. 'W') num_aa = 18
                  if (testseq(j:j) .eq. 'Y') num_aa = 19
                  number = number + (num_aa*(20**(n-j)))
               enddo
               number = number + 1
               score(n,number) = score(n,number) + 1
            enddo
         enddo
      enddo
C      write(6,*)'DBG>Calculated scores: ',score(n,number)

C---- Calculate expected number in database
      do i=2,4
         total_al = 20**i
         do n=1,total_al
            do j=5-i,4
               do k=1,22
                  if (aleatseq(n)(j:j) .eq. aa(k))then
                     calc_prob(i,n) = calc_prob(i,n) * per_aa(k)
		     goto 220
                  endif
               enddo
220         continue
            enddo
         enddo
      enddo

C---- Caculate Statistics
      do i=2,4
         count = 0
         sdev(i) = 0.0
         sdevdev(i) = 0.0
         avedev(i) = 0.0
         sd_dev(i) = 0.0
         total_al = 20**i
         do n=1,total_al
            calc_num = calc_prob(i,n)*(total_res-((i-1)*total_seq))
            if (score(i,n) .eq. 0.0 ) then
               count = count + 1
               goto 225
            endif
            dev = log10(score(i,n)) - log10(calc_num)
	    sdev(i) = sdev(i) + dev
	    sdevdev(i) = sdevdev(i) + (dev*dev)
225         continue
         enddo
         avedev(i) = sdev(i)/(total_al-count)
         sd_dev(i) = sqrt(sdevdev(i)/(total_al-count))
      enddo
      
C---- Header Text output HIGH and LOW represented sequences
      write(21,'(2A)')'  SeqNu   Seq    Exp Prob    Exp Num',
     &' Real Prob   Real Num   Z score  '
      write(21,'(2A)')'-------   ---- ------------ ----------',
     &' ------------ -------- ------------'
      write(22,'(2A)')'  SeqNu   Seq    Exp Prob    Exp Num',
     &' Real Prob   Real Num   Z score  '
      write(22,'(2A)')'-------   ---- ------------ ----------',
     &' ------------ -------- ------------'
      
      do i=2,4
         total_al = 20**i
         do n=1,total_al
            calc_num = calc_prob(i,n)*(total_res-((i-1)*total_seq))
            if (score(i,n) .lt. 1.0) then
	       dev = log10(0.5) - log10(calc_num)
	       if (calc_num .lt. 1.0) then
		  dev = log10(0.5) - log10(0.5)
               endif
	    else
	       dev = log10(score(i,n)) - log10(calc_num)
	    endif
	    diff = score(i,n) - calc_num
	    z_score = dev - avedev(i)
	    z_score = z_score/sd_dev(i) 
	    abs_z_score  = abs(z_score)
	    if (diff .lt. 0 .and. abs_z_score .gt. 2.33) then 
	       indev(i,1) = indev(i,1) + 1
C---- Text output LOW represented sequences
	       write(22,110)n,aleatseq(n)(5-i:4),calc_prob(i,n),
     &           calc_num,score(i,n)/(total_res-((i-1)*total_seq)),
     &           score(i,n),z_score
            else if (diff .lt. 0 .and. abs_z_score .gt. 1.65) then
	       indev(i,2) = indev(i,2) + 1
            else if (diff .lt. 0 .and. abs_z_score .gt. 1.28) then
	       indev(i,3) = indev(i,3) + 1
	    else if (diff .gt. 0 .and. abs_z_score .gt. 2.33) then 
	       indev(i,7) = indev(i,7) + 1
C---- Text output HIGH represented sequences
               write(21,110)n,aleatseq(n)(5-i:4),calc_prob(i,n),
     &           calc_num,score(i,n)/(total_res-((i-1)*total_seq)),
     &           score(i,n),z_score
            else if (diff .gt. 0 .and. abs_z_score .gt. 1.65) then
	       indev(i,6) = indev(i,6) + 1
            else if (diff .gt. 0 .and. abs_z_score .gt. 1.28) then
	       indev(i,5) = indev(i,5) + 1
            else
	       indev(i,4) = indev(i,4) + 1
            endif
         enddo
      enddo

C********************** Write HTML output ******************************

C---- HTML Header
      do while (.true.)
	 read(7,'(Q,A)',END=230,ERR=1000)lline,line
	 write(6,'(A)')line(1:lline)
      enddo
230   continue
      
C---- General data from the database
      write(6,*)'<center>'
      write(6,*)'<br><font size=+1>Calculated statistics for your ',
     &'database<font color="#663366"> ',file(1:lfile),
     &'</b></font></font><br>'
      
C---- General results from the database
      write(6,'(A)')'<hr>'
      write(6,'(A)')'<center><font color="#003300"><font size=+1>'
      write(6,'(A)')'<br>General data for your database</b></font>'
      write(6,'(A)')'</font><br><p>'
      write(6,'(A,F6.4,A)')'Total Probability: <font color="#663366">'
     & ,total_per,'<br></font>'
      write(6,'(A,F10.1,A)')'Total Sequences: <font color="#663366">'
     & ,total_seq,'<br></font>'
      write(6,'(A,F10.1,A)')'Total Residues: <font color="#663366">'
     & ,total_res,'<br></font>'

C---- Table of probablities of ONE residue
      write(6,'(A)')'<hr>'
      write(6,*)'<br><font color="#003300"><font size=+1>Residue ',
     &'probablity (over 1) for your database',
     &'</b></font></font><br></center><p>'
      write(6,'(A)')'<table COLS=4 WIDTH=100% NOSAVE>'
      ii = 0
      do i=1,22
	 ii = ii + 1
         if (ii .eq. 1 ) then
	    write(6,'(A)')'<tr><td>'
         else
	    write(6,'(A)')'<td>'
         endif
	 write(6,'(A,A3,A,F6.4,A)')'<center><tt>',aa(i),
     &        ':<font color="#663366">',per_aa(i),'</font>'
	 if (ii .lt. 4) then
	    write(6,'(A)')'</td>'
	 else
	    write(6,'(A)')'</td></tr>'
            ii = 0
         endif
      enddo
      write(6,'(A)')'</table>'

C---- Print Statistics
      write(6,'(A)')'<hr>'
      write(6,*)'<br><font color="#003300"><font size=+1>',
     &'Statistics for your database',
     &'</b></font></font><br></center><p>'
      do i=2,4
	 total_al = 20**i
         write(6,'(2A,I1,A)')'<br><font color="#003300"><font ',
     &   'size=+1>Statistics for ',i,' residues in your ',
     &   'database:</b></font></font>'
         write(6,'(2A,F8.5,A)')'<br> Average deviation: ',
     &   '<font color="#663366">',avedev(i),'</font>'
         write(6,'(2A,F8.5,A)')'<br> Standard deviation: ',
     &   '<font color="#663366">',sd_dev(i),'</font>'
         write(6,'(A)')'<br></center><p>'
         do j=1,7
	    if(j .eq. 1) then
	       color = '#FF0000'
	       case = 'Low represented  (0.01 sig)'
            endif
	    if(j .eq. 2) then
	       color = '#FF6666'
	       case = 'Low represented  (0.05 sig)'
            endif
	    if(j .eq. 3) then
	       color = '#FFCCCC'
	       case = 'Low represented  (0.10 sig)'
            endif
	    if(j .eq. 4) then
	       color = '#CCCCCC'
	       case = 'Normal represented         '
            endif
	    if(j .eq. 5) then
	       color = '#99FFCC'
	       case = 'High represented (0.10 sig)'
            endif
	    if(j .eq. 6) then
	       color = '#66FF99'
	       case = 'High represented (0.05 sig)'
            endif
	    if(j .eq. 7) then
	       color = '#33FF33'
	       case = 'High represented (0.01 sig)'
            endif
C            write(6,*)'<pre>DBG: indev(i,j): ',indev(i,j),i,j
C	     write(6,*)'<pre>DBG: total_al: ',total_al
	    col = indev(i,j)*500/total_al
	    per = indev(i,j)*100/total_al
C 	     write(6,*)'<pre>DBG: C_P: ',col,per
            write(6,'(A)')'<table BORDER=0 NOSAVE >'
            write(6,'(A)')'<tr NOSAVE><td WIDTH="200" NOSAVE>'
            write(6,'(3A)')'<div align=right><font size=-1>',
     &      case,'</font></div></td>'
            write(6,'(A,F4.0,3A)')'<td WIDTH="',col,
     &           '" BGCOLOR="',color,
     &           '" NOSAVE><font size=-1>&nbsp;</font></td>'
            write(6,'(A,F6.1,A,I6,A)')'<td><font size=-1>',per,
     &           '% (',indev(i,j),' seq)</font></td></tr></table>'
	 enddo
      enddo
      write(6,'(A)')'<hr>'
      write(6,*)'<br><font color="#003300"><font size=+1>',
     &'Text output files to download before <i>ONE</i> day:',
     &'</b></font></font><br></center><p>'
      write(6,'(4A)')'<font size=-1>',
     &           '<a href="http://guitar.rockefeller.edu/',
     &           '~marcius/GenerNet/tmp/',link_htxt,'">',
     &           'High represented sequences</a></font><br>'
      write(6,'(4A)')'<font size=-1>',
     &           '<a href="http://guitar.rockefeller.edu/',
     &           '~marcius/GenerNet/tmp/',link_ltxt,'">',
     &           'Low represented sequences</a></font><br>'
      write(6,'(4A)')'<font size=-1>',
     &           '<a href="http://guitar.rockefeller.edu/',
     &           '~marcius/GenerNet/tmp/',link_1aa,'">',
     &           'Download table file for 1 AA !! </a></font><br>'
      write(6,'(4A)')'<font size=-1>',
     &           '<a href="http://guitar.rockefeller.edu/',
     &           '~marcius/GenerNet/tmp/',link_2aa,'">',
     &           'Download table file for 2 AA !! </a></font><br>'
      write(6,'(4A)')'<font size=-1>',
     &           '<a href="http://guitar.rockefeller.edu/',
     &           '~marcius/GenerNet/tmp/',link_3aa,'">',
     &           'Download table file for 3 AA !! </a></font><br>'
      write(6,'(4A)')'<font size=-1>',
     &           '<a href="http://guitar.rockefeller.edu/',
     &           '~marcius/GenerNet/tmp/',link_4aa,'">',
     &           'Download table file for 4 AA !! </a></font><p>'

C---- date and run time
      write(6,*)'<hr WIDTH="100%"><br>'
      call dattim(6)
      call porcpu(cputime)
      write(6,*)'<p>CPU-TIME nim.',cputime/60.d0
      write(6,'(A)')'<p>END SEARCH<br><hr WIDTH="100%"></body></html>'
      
C---- HTML Footer
      do while (.true.)
	 read(8,'(Q,A)',END=240,ERR=1000)lline,line
	 write(6,'(A)')line(1:lline)
      enddo
240   continue

C********************** Write output files *****************************

C---- Single AA results
      write(11,'(A)')'* Your Database aminoacid composition.'
      write(11,'(2A)')'* ',file(1:lfile)
      write(11,'(A)')'* ----Do not edit this file---'
      write(11,'(A)')'*'
      do i=1,22
         write(11,'(A,A1,A,F6.4)')'pp_',aa(i),' = ',per_aa(i)
      enddo
      write(11,'(A,F10.1)')'to_seq = ',total_seq
      write(11,'(A,F10.1)')'tot_aa = ',total_res

C---- Two, Three and Four AA results
      do i=2,4
      write(10+i,'(A)')'* Your Database aminoacid composition.'
      write(10+i,'(2A)')'* ',file(1:lfile)
      write(10+i,'(A)')'* ----Do not edit this file---'
      write(10+i,'(A)')'*'
      write(10+i,'(2F10.6)')avedev(i),sd_dev(i)
	 total_al = 20**i
	 do n=1,total_al
	    if (score(i,n) .lt. 1.0) score(i,n) = 0.0
            calc_num = calc_prob(i,n)*(total_res-((i-1)*total_seq))
            write(10+i,'(1X,F9.2,1X,F8.1)')calc_num,score(i,n)
         enddo
      enddo

C********************** Formats & Errors *******************************
      goto 999

110   FORMAT(I7,3X,A,3X,F10.8,2X,F9.2,3X,F10.8,3X,F7.1,2X,F8.5)

1000  write (6,*) ' '
      write (6,'(A,A10)') ' # Error in file:',seqref
      write (6,*) '# Check the referrnce file.'
      write (6,*) '# End of the program.'
1100  write (6,*) ' '
      write (6,'(A)') ' # Error in fastaseq file.'
      write (6,'(A,I)') ' # Error in line:',line
      write (6,*) '# Check the fastaseq file.'
      write (6,*) '# End of the program.'

C********************** End of the program *****************************
999   end

C********************** Subroutines ************************************

C-----------------------------------------------------------------------
C---- DATTIM data and time of the process
      subroutine dattim(io)
C-----------------------------------------------------------------------

      implicit double precision (a-h,o-z)
      character cdate*9,ctime*8

      call date(cdate)
      call time(ctime)
      write(6,1100)cdate,ctime

      return

1100  FORMAT(1X,'date ',A9,3X,' time ',A8)

      end

C-----------------------------------------------------------------------
C---- PORCPU
      subroutine porcpu(t1)
C-----------------------------------------------------------------------

      implicit double precision (a-h,o-z)
      external etime
      real time(2)

      T0 = etime(time)
      T1 = time(1) + time(2)

      return
      end

C********************** End of the program *****************************

