      program compnet

C***********************************************************************
C                                   
C     Program COMPARE DISTR on the NET
C     Analisis of Sequence and Aminoacid Probability
C                                    
C     The Rockefeller University
C     Marc A. Marti-Renom, last revision: April'97
C                                   
C***********************************************************************

C********************* Variable definition *****************************

      implicit real*8 (a-h,o-z)
      real*8    z_score,abs_z_score
      character line*300,input*11,bgcolor*7,blank
      character*100 dataref2,dataref3,dataref4,title*24
      character header*80,footer*80,sig*8,label*100,case*35
      character name_diff_seq*4,sequence*4

      integer db,database

      parameter (min=3,maxprint=8,maxdiff=5000,max=160001)

      dimension title(min),database(min)
      dimension dataref2(min),dataref3(min),dataref4(min)
      dimension sdev(min),sdevdev(min),avedev(min),sd_dev(min)
      dimension score(min,min,max),indev(min,maxprint)
      dimension name_diff_seq(min,maxdiff),scoreA_diff_seq(min,maxdiff)
      dimension scoreB_diff_seq(min,maxdiff)
      dimension zscore_diff_seq(min,maxdiff),max_diff(min)

C********************* Initializing variables **************************
      
      do j=1,2
         do i=2,4
	    total_al = 20**i
	    do n=1,total_al
               score(j,i,n) = 0
            enddo
	    do n=1,8
               indev(i,n) = 0
            enddo
         enddo
      enddo
      k = 0
      header  = '../CompNet/xtras/header.txt'
      footer  = '../CompNet/xtras/footer.txt'

C********************* I/O data files **********************************
      
      do db=1,2
         read(5,'(I1)')database(db)
         
	 if (database(db) .eq. 1) then
	    do i=2,4
	       read(5,'(A)')blank
	       read(5,'(A)')blank
	       read(5,'(A)')blank
	       read(5,'(A)')blank
	       read(5,'(A)')blank
	       total_al = 20**i
	       do n=1,total_al
	          read(5,'(11X,F8.1)')score(db,i,n)
	          if (score(db,i,n) .eq. 0.0) score(db,i,n) = 0.9
               enddo
            enddo
            title(db) = '     your own           '
         endif
         
	 if (database(db) .eq. 2) then
            dataref2(db) = '../CompNet/data/yeast/yeast.bs2.dir'
	    dataref3(db) = '../CompNet/data/yeast/yeast.bs3.dir'
            dataref4(db) = '../CompNet/data/yeast/yeast.bs4.dir'
            title(db) = 'Saccharomyces cerevisiae'
         endif

         if (database(db) .eq. 3) then
            dataref2(db) = '../CompNet/data/ecoli/ecoli.bs2.dir'
            dataref3(db) = '../CompNet/data/ecoli/ecoli.bs3.dir'
            dataref4(db) = '../CompNet/data/ecoli/ecoli.bs4.dir'
            title(db) = 'Escherichia coli'
         endif
      enddo

C---- Open data files
      
      open(unit=7,file=header,status='old')
      open(unit=8,file=footer,status='old')

C---- Read scores from databases
      do db=1,2
         if (database(db) .ne. 1) then
	    open(unit=12,file=dataref2(db),form='formatted',
     &        access='direct',recl=19,status='old')
            open(unit=13,file=dataref3(db),form='formatted',
     &        access='direct',recl=19,status='old')
            open(unit=14,file=dataref4(db),form='formatted',
     &        access='direct',recl=19,status='old')
            do i=2,4
               total_al = 20**i
               do n=1,total_al
                  read(10+i,300,REC=n,ERR=1100)score(db,i,n)
	          if (score(db,i,n) .eq. 0.0) score(db,i,n) = 0.9
               enddo
            enddo
         endif
      enddo

C********************** Calculate differences in databases *************
      do i=2,4
         sdev(i) = 0.0
         sdevdev(i) = 0.0
         avedev(i) = 0.0
         sd_dev(i) = 0.0
         total_al = 20**i
         do n=1,total_al
            dev = log10(score(1,i,n)) - log10(score(2,i,n))
            sdev(i) = sdev(i) + dev
            sdevdev(i) = sdevdev(i) + (dev*dev)
         enddo
         avedev(i) = sdev(i)/total_al
         sd_dev(i) = sqrt(sdevdev(i)/total_al)
      enddo
      
      do i=2,4
         total_al = 20**i
	 k = 0
         do n=1,total_al
            dev = log10(score(1,i,n)) - log10(score(2,i,n))
            diff = score(1,i,n) - score(2,i,n)
            z_score = dev - avedev(i)
            z_score = z_score/sd_dev(i)
            abs_z_score  = abs(z_score)
            if (diff .lt. 0 .and. abs_z_score .gt. 2.33) then
	       k = k + 1
               indev(i,1) = indev(i,1) + 1
	       call seq_num(n,i,sequence)
               name_diff_seq(i,k) = sequence
	       if (score(1,i,n) .lt. 1.0 ) then
	           scoreA_diff_seq(i,k) = 0.0
               else
		   scoreA_diff_seq(i,k) = score(1,i,n)
               endif
	       if (score(2,i,n) .lt. 1.0 ) then
	           scoreB_diff_seq(i,k) = 0.0
               else
		   scoreB_diff_seq(i,k) = score(2,i,n)
               endif
               zscore_diff_seq(i,k) = z_score
            else if (diff .lt. 0 .and. abs_z_score .gt. 1.65) then
               indev(i,2) = indev(i,2) + 1
            else if (diff .lt. 0 .and. abs_z_score .gt. 1.28) then
               indev(i,3) = indev(i,3) + 1
            else if (diff .gt. 0 .and. abs_z_score .gt. 2.33) then
               indev(i,7) = indev(i,7) + 1
	       k = k + 1
	       call seq_num(n,i,sequence)
               name_diff_seq(i,k) = sequence
	       if (score(1,i,n) .lt. 1.0 ) then
	           scoreA_diff_seq(i,k) = 0.0
               else
		   scoreA_diff_seq(i,k) = score(1,i,n)
               endif
	       if (score(2,i,n) .lt. 1.0 ) then
	           scoreB_diff_seq(i,k) = 0.0
               else
		   scoreB_diff_seq(i,k) = score(2,i,n)
               endif
               zscore_diff_seq(i,k) = z_score
            else if (diff .gt. 0 .and. abs_z_score .gt. 1.65) then
               indev(i,6) = indev(i,6) + 1
            else if (diff .gt. 0 .and. abs_z_score .gt. 1.28) then
               indev(i,5) = indev(i,5) + 1
            else
               indev(i,4) = indev(i,4) + 1
            endif
         enddo
	 max_diff(i) = k
      enddo

C********************** Write HTML output file *************************

C---- HTML Header
      do while (.true.)
         read(7,'(Q,A)',END=230,ERR=1000)lline,line
         write(6,'(A)')line(1:lline)
      enddo
230   continue

C---- General data from the database
      write(6,'(A)')'<p>'
      write(6,*)'<br><font size=+1>Calculated statistics of ',
     &'<font color="#663366"><i>',title(1),'&nbsp;vs&nbsp;',title(2),
     &'</i></font>databases</b><br>'
      do i=2,4
         write(6,'(A)')'<table BORDER COLS=2 WIDTH="100%" NOSAVE >'
         write(6,'(A)')'<tr BGCOLOR="#CCCCCC" NOSAVE>'
         write(6,'(A)')'<td NOSAVE>'
         write(6,'(A,I,A)')'<center><b>Average diff for '
     &   ,i,' residues</b></center>'
         write(6,'(A)')'</td>'
         write(6,'(A)')'<td>'
         write(6,'(A)')'<center><b>Standard error difference'
         write(6,'(A)')'</b></center></td>'
         write(6,'(A)')'</tr>'
         write(6,'(A)')'<tr>'
         write(6,'(A)')'<td>'
         write(6,'(A,F10.5,A)')'<center>',avedev(i),
     &   '</center></td>'
         write(6,'(A)')'<td>'
         write(6,'(A,F10.2,A)')'<center>',sd_dev(i),'</center>'
         write(6,'(A)')'</td>'
         write(6,'(A)')'</tr>'
         write(6,'(A)')'</table>'
      enddo

C---- Distribution of deviations
      do i=2,4
	 total_al=20**i
         write(6,*)'<br><font size=+1>Calculated distribution for  ',
     &   i,' residues'
         do j=1,7
            if(j .eq. 1) then
               bgcolor = '#FF0000'
               case = 'Low represented in DB A (0.01 sig) '
            endif
            if(j .eq. 2) then
               bgcolor = '#FF6666'
               case = 'Low represented in DB A (0.05 sig) '
            endif
            if(j .eq. 3) then
               bgcolor = '#FFCCCC'
               case = 'Low represented in DB A (0.10 sig) '
            endif
            if(j .eq. 4) then
               bgcolor = '#CCCCCC'
               case = 'Normal represented in both         '
            endif
            if(j .eq. 5) then
               bgcolor = '#99FFCC'
               case = 'High represented in DB A (0.10 sig)'
            endif
            if(j .eq. 6) then
               bgcolor = '#66FF99'
               case = 'High represented in DB A (0.05 sig)'
            endif
            if(j .eq. 7) then
               bgcolor = '#33FF33'
               case = 'High represented in DB A (0.01 sig)'
            endif
            col = indev(i,j)*500/total_al
            per = indev(i,j)*100/total_al
            write(6,'(A)')'<table BORDER=0 NOSAVE >'
            write(6,'(A)')'<tr NOSAVE><td WIDTH="200" NOSAVE>'
            write(6,'(3A)')'<div align=right><font size=-1>',
     &      case,'</font></div></td>'
            write(6,'(A,F4.0,3A)')'<td WIDTH="',col,
     &           '" BGCOLOR="',bgcolor,
     &           '" NOSAVE><font size=-1>&nbsp;</font></td>'
            write(6,'(A,F6.1,A,I6,A)')'<td><font size=-1>',per,
     &           '% (',indev(i,j),' seq)</font></td></tr></table>'
         enddo
         write(6,'(A)')'<hr><p>'
      enddo

C---- Write a table with 0.01 significative sequences
      do i=2,4
	 if (max_diff(i) .gt. 0) then
         write(6,'(A)')'<table BORDER COLS=4 WIDTH="100%" NOSAVE >'
         write(6,'(A)')'<tr BGCOLOR="#CCCCCC" NOSAVE>'
         write(6,'(A,I,A)')'<td NOSAVE><center><b>',i,
     &                     ' Residues sequence</b></center></td>'
         write(6,'(A)')'<td><center><b>Real number in DB A</b></td>'
         write(6,'(A)')'<td><center><b>Real number in DB B</b></td>'
         write(6,'(A)')'<td><center><b>Z-score</b></td>'
         write(6,'(A)')'</tr>'
         write(6,'(A)')'</table>'
         write(6,'(A)')'<b></b>'
         write(6,'(A)')'<table BORDER COLS=4 WIDTH="100%" NOSAVE >'
	 do k=1,max_diff(i)
            write(6,'(A)')'<tr>'
            write(6,'(3A)')'<td><center>',name_diff_seq(i,k),'</td>'
            write(6,'(A,F6.1,A)')'<td><center>',scoreA_diff_seq(i,k),
     &                           '</center></td>'
            write(6,'(A,F6.1,A)')'<td><center>',scoreB_diff_seq(i,k),
     &                           '</center></td>'
            write(6,'(A,F6.1,A)')'<td><center>',zscore_diff_seq(i,k),
     &                           '</center></td>'
            write(6,'(A)')'</tr>'
         enddo
         write(6,'(A)')'</table>'
         write(6,'(A)')'<p>'
      endif
      enddo

C---- date and run time
      write(6,*)'</font><hr WIDTH="100%"><br>'
      call dattim(6)
      call porcpu(cputime)
      write(6,*)'<p>CPU-TIME nim.',cputime/60.d0
      write(6,'(A)')'<p>END SEARCH<br><hr WIDTH="100%"></body></html>'

C---- HTML Footer
      do while (.true.)
         read(8,'(Q,A)',END=240,ERR=1000)lline,line
         write(6,'(A)')line(1:lline)
      enddo
240   continue

      goto 999

C******************* Errors and formats ******************************

1000  write(6,'(A)')'Content-type: html/text\n'
      write(6,'(A)')'WARNNING!!! Error in input file.'
      stop
1100  write(6,'(A)')'Content-type: html/text\n'
      write(6,'(A)')'WARNNING!!! Error in reference file.'
      write(6,'(A)')'Please send a Mail to the webmaster.'
      write(6,'(A,I)')'Error in unit file: ',i+10
      stop

300   FORMAT(13X,F7.0)

C******************* End of the program ******************************
999   continue
      end

C******************* Subrrutines *************************************

C---------------------------------------------------------------------
C---- SEQ_NUM
      subroutine seq_num(num,lseq,seq)
C---------------------------------------------------------------------

      character seq*4,aa*1
      integer number,lseq,ii,jj
      parameter (max_aa=20)
      dimension aa(max_aa)

      seq = '    '
      aa(1) = 'A'
      aa(2) = 'C'
      aa(3) = 'D'
      aa(4) = 'E'
      aa(5) = 'F'
      aa(6) = 'G'
      aa(7) = 'H'
      aa(8) = 'I'
      aa(9) = 'K'
      aa(10) = 'L'
      aa(11) = 'M'
      aa(12) = 'N'
      aa(13) = 'P'
      aa(14) = 'Q'
      aa(15) = 'R'
      aa(16) = 'S'
      aa(17) = 'T'
      aa(18) = 'V'
      aa(19) = 'W'
      aa(20) = 'Y'

      number = num
      do jj=1,lseq
         do ii=1,20
            if (number .le. (20**(lseq-jj))*ii) then
               seq(jj:jj) = aa(ii)
               goto 2000
            endif
         enddo
2000     continue
         number = number - (20**(lseq-jj)*(ii-1))
      enddo

      return
      end
C-----------------------------------------------------------------------
C---- DATTIM data and time of the process
      subroutine dattim(io)
C-----------------------------------------------------------------------

      implicit double precision (a-h,o-z)
      character cdate*9,ctime*8

      call date(cdate)
      call time(ctime)
      write(6,1100)cdate,ctime

      return

1100  FORMAT(1X,'date ',A9,3X,' time ',A8)

      end

C-----------------------------------------------------------------------
C---- PORCPU
      subroutine porcpu(t1)
C-----------------------------------------------------------------------

      implicit double precision (a-h,o-z)
      external etime
      real time(2)

      T0 = etime(time)
      T1 = time(1) + time(2)

      return
      end

C********************** End of the program *****************************
