      program seq_det

C***********************************************************************
C                                   
C     Analisis of Sequences and Aminoacid Probabilities (ASAP)
C
C     Program SEQDET-NET to search sequneces ina database on the NET
C     Get ready to update files in INTERNET
C     Marc A. Marti-Renom, last revision: April'99
C                                   
C***********************************************************************

C********************* Variable definitions ****************************
      
      implicit real*8 (a-h,o-z)
      character cap*9,seqdet*80,seq*80,def*80,label*80
      character matchseq*80,matchseqdef*80,nameseqmatch*80,title*24 
      character nameseq*80,sequence*3000
      character dataref*100,datastat*100
      character header*80,footer*80,line*300,aa*1

      parameter (min=22,max=9000,maxmatch=2000)

      dimension matchseq(maxmatch),lmatchseq(maxmatch)
      dimension matchseqdef(maxmatch),iniseq(maxmatch)
      dimension endseq(maxmatch),nameseqmatch(maxmatch)
      dimension label(max),llabel(max)
      dimension sequence(max),lsequence(max),def(max)
      dimension aa(min),per_aa(min)

      integer resp1,error,lseq,iniseq,endseq,totalmatch,num_seq,llabel

      header  = '../SeqDetNet/xtras/header.txt'
      footer  = '../SeqDetNet/xtras/footer.txt'
      calc_prob = 1.0
      aa(1) = 'A'
      aa(2) = 'B'
      aa(3) = 'C'
      aa(4) = 'D'
      aa(5) = 'E'
      aa(6) = 'F'
      aa(7) = 'G'
      aa(8) = 'H'
      aa(9) = 'I'
      aa(10) = 'K'
      aa(11) = 'L'
      aa(12) = 'M'
      aa(13) = 'N'
      aa(14) = 'P'
      aa(15) = 'Q'
      aa(16) = 'R'
      aa(17) = 'S'
      aa(18) = 'T'
      aa(19) = 'V'
      aa(20) = 'W'
      aa(21) = 'Y'
      aa(22) = 'Z'


C********************* I/O data files **********************************
      
      read(5,'(I1)')resp1 
      read(5,'(Q,A30)')lseqdet,seqdet 

C******************** Preparate input and output files *****************

C---- S. cerevisiae data
      if(resp1 .eq. 1)then
         datastat = '../SeqDetNet/data/yeast/yeast.stat'
         dataref = '../SeqDetNet/data/yeast/yeast.faa'
	 title = 'Saccharomyces serevisiae'
         
	 open(unit=1,file=datastat,status='old')
	 read (1,'(A)')line 
	 read (1,'(A)')line 
	 read (1,'(A)')line 
	 read (1,'(A)')line 
         do i=1,22
	    read (1,'(7X,F6.4)')per_aa(i) 
         enddo
         read (1,'(9X,I8)')num_seq
         read (1,'(9X,F10.1)')totalseq
         close(unit=1)
      endif

C---- E. coli data
      if(resp1 .eq. 2)then
         datastat = '../SeqDetNet/data/ecoli/ecoli.stat'
         dataref = '../SeqDetNet/data/ecoli/ecoli.faa'
	 title = 'Escherichia coli'
         
	 open(unit=1,file=datastat,status='old')
	 read (1,'(A)')line 
	 read (1,'(A)')line 
	 read (1,'(A)')line 
	 read (1,'(A)')line 
         do i=1,22
	    read (1,'(7X,F6.4)')per_aa(i) 
         enddo
         read (1,'(9X,I8)')num_seq
         read (1,'(9X,F10.1)')totalseq
         close(unit=1)
      endif
      
C************************** Read data files ****************************
      
      open(unit=1,file=dataref,status='old')
      open(unit=7,file=header,status='old')
      open(unit=8,file=footer,status='old')

C---- Search the FASTA sequences 
      lseq = 0
      i = 0
      k = 0
      do while (.true.)
         read(1,'(A80)',END=200,ERR=1000)seq
         if(seq(1:1) .eq. ">") then
	    k = k + 1
            jj = 0
	    do ii=5,80
	      if (seq(ii:ii) .eq. " " .or. seq(ii:ii) .eq. "|") then
	         label(k)(1:ii) = seq(1:ii)
		 llabel(k) = ii
	         def(k) = seq(ii:80)
		 goto 201
              endif
            enddo
201   continue
         else
            do j=1,70
               if (seq(j:j) .ne. " ") then
                  jj = jj + 1
                  sequence(k)(jj:jj) = seq(j:j)
                  lsequence(k) = lsequence(k) + 1
               endif
            enddo
         endif
      enddo
200   continue
      total_k = k
      do k=1,total_k
         do i=1,(lsequence(k) - 1)
	    if (sequence(k)(i:lseqdet-1+i) .eq. seqdet)then
	       n = n + 1
	       matchseq(n) = label(k)(1:llabel(k))
	       lmatchseq(n) = llabel(k)
	       matchseqdef(n) = def(k)
	       iniseq(n) = i
	       endseq(n) = lseqdet-1+i
	       nameseqmatch(n) = '-'//
     &         sequence(k)(i-1:lseqdet+i)//'-'
            endif
         enddo
      enddo
      totalmatch = n

C********************** Calculations  **********************************

      do i=1,lseqdet
         do j=1,22
            if (seqdet(i:i) .eq. aa(j))then
	         calc_prob = calc_prob * per_aa(j)
	    endif
         enddo
      enddo
      calc_num = calc_prob*(totalseq-((lseqdet-1)*num_seq))
      calc_prob = calc_prob*100

C********************** Write output file ******************************

C---- HTML Header
      do while (.true.)
         read(7,'(Q,A)',END=230,ERR=1000)lline,line
         write(6,'(A)')line(1:lline)
      enddo
230   continue

C---- General data from the database
      write(6,'(A)')'<br>&nbsp;'
      write(6,'(A)')'<center>'
      write(6,'(3A)')'<p><b><font size=+1>Output File for ',
     &'<font color="#663366"><i>',title,
     &'</i></font> database </b></center>'
      write(6,'(A)')'<p>'

      write(6,'(A)')'<p>'
      write(6,'(A)')'<hr WIDTH="100%">'
      write(6,*)'<br><font size=+1>Calculated statistics for your ',
     &'sequence: <b><font color="#663366">',seqdet(1:lseqdet),
     &'</font></b></font>' 
      write(6,'(A)')'<table BORDER COLS=2 WIDTH="100%" NOSAVE >'
      write(6,'(A)')'<tr BGCOLOR="#CCCCCC" NOSAVE>'
      write(6,'(A)')'<td NOSAVE>'
      write(6,'(2A)')'<center><b>Probability ((pAAi*pAAi+1*...*pAAn)',
     &'*100)</b></center>'
      write(6,'(A)')'</td>'
      write(6,'(A)')'<td>'
      write(6,'(2A)')'<center><b>&nbsp;Expected number in ',
     &'database</b></center>'
      write(6,'(A)')'</td>'
      write(6,'(A)')'</tr>'
      write(6,'(A)')'<tr>'
      write(6,'(A)')'<td>'
      write(6,'(A,F8.6,A)')'<center>',calc_prob,'</center>'
      write(6,'(A)')'</td>'
      write(6,'(A)')'<td>'
      write(6,'(A,F10.2,A)')'<center>',calc_num,'</center>'
      write(6,'(A)')'</td>'
      write(6,'(A)')'</tr>'
      write(6,'(A)')'</table>'
      write(6,'(A)')'<p><hr WIDTH="100%"><br><font size=+1>'
      write(6,*)'Real matches of your sequence: <font color="#663366">',
     &totalmatch,'</font></font>'
      write(6,'(A)')'<br><b><font size=+1></font></b>&nbsp;'

C---- Result table      
      write(6,'(A)')'<table BORDER COLS=4 WIDTH="100%" NOSAVE >'
      write(6,'(A)')'<tr BGCOLOR="#CCCCCC" NOSAVE>'
      write(6,'(A)')'<td NOSAVE><b>Match number</b></td>'
      write(6,'(A)')'<td><b>Matched sequence</b></td>'
      write(6,'(A)')'<td><b>Zone of matching</b></td>'
      write(6,'(A)')'<td><b>Matched sequence region</b></td>'
      write(6,'(A)')'</tr>'
      write(6,'(A)')'</table>'
      write(6,'(A)')'<b></b>'

      write(6,'(A)')'<table BORDER COLS=4 WIDTH="100%" NOSAVE >'
      do n=1,totalmatch
         write(6,'(A)')'<tr>'
         write(6,'(A,I,A)')'<td>',n,'</td>'
         write(6,'(8A)')'<td><a href="http://www.ncbi.nlm.nih.gov',
     &   '/htbin-post/Entrez/query?uid=',matchseq(n)(5:lmatchseq(n)-1),
     &   '&form=6&db=p&Dopt=g">',matchseq(n),'</a>  ',
     &   matchseqdef(n),'</td>'
         write(6,'(A,I4,A1,I4,A)')'<td>',iniseq(n),'-',endseq(n),'</td>'
         write(6,'(3A)')'<td>',nameseqmatch(n),'</td>'
	 write(6,'(A)')'</tr>'
      enddo
      write(6,'(A)')'</table>'

C---- date and run time
      write(6,*)'</font><hr WIDTH="100%"><br>' 
      call dattim(6)
      call porcpu(cputime)
      write(6,*)'<p>CPU-TIME nim.',cputime/60.d0 
      write(6,'(A)')'<p>END SEARCH<br><hr WIDTH="100%"></body></html>'

C---- HTML Footer
      do while (.true.)
         read(8,'(Q,A)',END=240,ERR=1000)lline,line
         write(6,'(A)')line(1:lline)
      enddo
240   continue

C********************** Formats & Errors *******************************
      
      goto 999

110   FORMAT(25X,A)
130   FORMAT(X)

1000  write (6,*) ' '
      write (6,'(A,A10)') ' # Error in file:',seqref
      write (6,*) '# Check the reference file.'
      write (6,*) '# End of the program.'
1100  write (6,*) ' '
      write (6,'(A)') ' # Error in sequence file.'
      write (6,'(A,I)') ' # Error in line:',line
      write (6,*) '# Check the sequence file.'
      write (6,*) '# End of the program.'

999   end

C********************** Subroutines ************************************

C----- Pass all characters to Capital letters     
      subroutine maymin(a)
C-----------------------------------------------------------------------
      
      byte asterisc
      character*1 aster,a
      equivalence (aster,asterisc)
      aster = a
      asterisc = asterisc .and. 'DF'X
      a = aster
      return
      end

C-----------------------------------------------------------------------

C---- DATTIM data and time of the process
      subroutine dattim(io)
C-----------------------------------------------------------------------
      
      implicit double precision (a-h,o-z)
      character cdate*9,ctime*8

      call date(cdate)
      call time(ctime)
      write(6,1100)cdate,ctime

      return

1100  FORMAT(1X,'date ',A9,3X,' time ',A8)

      end

C-----------------------------------------------------------------------

C-----------------------------------------------------------------------

C---- PORCPU                             
      subroutine porcpu(t1)
C-----------------------------------------------------------------------
      
      implicit double precision (a-h,o-z)
      external etime
      real time(2)

      T0 = etime(time)
      T1 = time(1) + time(2)

      return
      end

C********************** End of the program *****************************
