cf ----------------------------------------------------------------------------
cf
cf    RDALN() reads the sequences and/or their alignment from a file.
cf
cf    Input:
cf       ioi         input I/O channel
cf       code(nseq)  codes of proteins to be read in
cf       nseq        number of sequences to be read in the alignment
cf       alnfil      alignment file name
cf       maxseq      maximal number of seqs
cf       maxres      maximal length of a sequence and of an alignment
cf       rmgap       .true. if want to remove positions with gaps in all seqs
cf       rmgap2      .true. if want to remove all gaps (destroying alignment)
cf       form        'PAP', 'PIR', or 'QUANTA' for input file format
cf       what        'sequence structure structureX structureN structureM'
cf       atmdir      list of directories for atom files
cf
cf    Output:
cf       code(nseq), nseq         updated
cf       ialn(maxres,maxseq)      residue index alignment
cf       caln(maxres,maxseq)      residue type alignment
cf       ccaln(maxres,maxseq)     residue type alignment, gaps removed
cf       irestypn(maxres,maxseq)  integer codes for residue types, gaps removed
cf       invaln(maxres,maxseq)    inverted residue indices, gaps removed
cf       naln                     length of alignment
cf       nresn(maxres)            number of residues in the sequences
cf       resol(nseq)              X-ray resolution
cf       rfactr(nseq)             X-ray R-factor
cf       comment(mcomment2)*(80)  ... comments
cf       prottyp(maxseq)*(20)    ... type of the protein entry (sequence,
cf                                   structure, structureN, structureX,
cf                                   structureM)
cf       source(maxseq)*(maxfld) ... source organism
cf       range(2,maxseq)*(5)     ... the first and last residue number
cf       alnchn(2,maxseq)*(1)    ... the first and last res's chain id
cf       pdbcod(maxseq)*(maxfld) ... PDB code
cf       name(maxseq)*(maxfld)   ... protein name
cf       nsegmn(maxseq)          ... number of segments in each seq
cf       segidn(maxseg,maxseq)   ... segment names in each seq
cf       iress1n(maxseg,maxseq)  ... starting res ind for each segment
cf       iress2n(maxseg,maxseq)  ... ending res ind for each segment
cf
cf    The sequences in the alignment are ordered as in the input code() array.
cf    Exact protein codes have to be specified; also, 'all' is
cf    allowed. Several copies of the same protein code can also occur in
cf    code(). If input CODE() included 'all', then the codes of all proteins
cf    and their number are also returned in CODE() and NSEQ.
cf
cf ----------------------------------------------------------------------------

      subroutine rdaln(ioinp,ioinp2,iolog,alnfil,code,nseq,maxres,
     &  maxseq,ialn,naln,caln,rmgap,rmgap2,form,nresn,irestypn,invaln,
     &  ccaln,prottyp,pdbcod,name,source,range,alnchn,resol,
     &  rfactr,comment,ncomment,mcomment2,codein,what,segidn,
     &  maxseg,iress1n,iress2n,nsegmn,atmdir,water,hetatm,
     &  hydrogen,idummy1,idummy2,idummy3,dummy1,dummy5)
        implicit none
#include "reslib.cst"
        integer naln,ioinp,nseq,i,maxseq,maxres,iseq,ifind2word
        integer ialn(maxres,maxseq),maxseg,nsegmn(maxseq),iolog
        integer irestypn(maxres,maxseq),ncomment,mcomment2,ia,lenr2
        integer invaln(maxres,maxseq),nresn(maxseq),ioinp2,j
        integer iress1n(maxseg,maxseq),iress2n(maxseg,maxseq),ichr2int
        real resol(maxseq),rfactr(maxseq)
c ----- these are dummy arrays:
        integer idummy1(maxseq),idummy2(maxseq),idummy3(maxseq)
        character dummy1(maxres)*(*), dummy5(2*maxseq)*(*)
        character pdbcod(maxseq)*(*),name(maxseq)*(*)
        character source(maxseq)*(*),range(2,maxseq)*(*)
        character alnfil*(*), caln(maxres,maxseq)*(*)
        character ccaln(maxres,maxseq)*(*),alnchn(2,maxseq)*(*)
        character form*(*), code(maxseq)*(*),segidn(maxseg,maxseq)*(*)
        character what*(*),comment(mcomment2)*(*),str2chr*(1)
        character prottyp(maxseq)*(*),atmdir*(*),gapchr*(1)
        logical rmgap, rmgap2, codein(maxseq), recover, strgin, iowr
        logical water,hetatm,hydrogen
        external lenr2, ifind2word, strgin, recover, ichr2int
        external str2chr, iowr

        gapchr = str2chr(gapsym)

c ----- read the alignment (residue indices, character codes):

        if (strgin(form,'PIR')) then
          call rdpir(ioinp,ioinp2,iolog,alnfil,maxseq,maxres,ialn,caln,
     &    nseq,naln,code,prottyp,pdbcod,name,source,range,alnchn,resol,
     &    rfactr,comment,ncomment,mcomment2,codein,what,atmdir,water,
     &    hetatm,hydrogen,.false.,dummy1)
          if (recover(.false., 0)) return
          call defseg(caln,alnchn,iress1n,iress2n,maxseg,nseq,nsegmn,
     &                segidn,maxres,ialn,naln)
        end if

        if (strgin(form,'QUANTA')) then
          call rdqnt(ioinp,iolog,alnfil,maxseq,maxres,ialn,caln,nseq,
     &         naln,code,alnchn,range,iress1n,iress2n,maxseg,nsegmn,
     &         segidn,codein,idummy1,idummy2,idummy3,dummy1,dummy5)
          if (recover(.false., 0)) return
          call defaln(.false.,'structureX',nseq,code,prottyp,pdbcod,
     &                name,source,range,alnchn,resol,rfactr)
        end if

        if (strgin(form,'PAP').or.strgin(form,'INSIGHT')) then
          call rdpap(ioinp,iolog,alnfil,maxseq,maxres,ialn,caln,nseq,
     &               naln,code,codein,strgin(form,'INSIGHT'),idummy1)
          if (recover(.false., 0)) return
          call defaln(.true.,'structureX',nseq,code,prottyp,pdbcod,
     &                name,source,range,alnchn,resol,rfactr)
          call defseg(caln,alnchn,iress1n,iress2n,maxseg,nseq,nsegmn,
     &                segidn,maxres,ialn,naln)
        end if

c ----- maybe I should also assign default segment ID's if not assigned
c       in this routine

        do  i = 1, nseq
          if (.not. codein(i)) then
            iseq = ifind2word(code(i), code, nseq)
            if ((iseq.eq.0).or.(iseq.eq.i)) then
             write(iolog,'(2a/a/a,i3,a,a)')
     &       'rdaln___E> protein specified in ALIGN_CODES(i) in the ',
     &       'TOP file', '  was not found in the alignment file;',
     &       '  ALIGN_CODES(',i,'): ', code(i)(1:lenr2(code(i)))
             stop
            else
c ----------- a duplicate sequence entry was required; make a copy:
              do  ia = 1, naln
                ialn(ia,i) = ialn(ia,iseq)
                caln(ia,i) = caln(ia,iseq)
              end do
              prottyp(i) = prottyp(iseq)
              pdbcod(i) = pdbcod(iseq)
              name(i) = name(iseq)
              source(i) = source(iseq)
              resol(i) = resol(iseq)
              rfactr(i) = rfactr(iseq)
              alnchn(1,i) = alnchn(1,iseq)
              alnchn(2,i) = alnchn(2,iseq)
              range(1,i) = range(1,iseq)
              range(2,i) = range(2,iseq)
              codein(i) = .true.
            end if
          end if
        end do

c ----- remove consecutive chain break characters:
        call rmbrks(caln, maxres, nseq, naln)

c ----- remove the positions where there are gaps in all proteins:
        if (rmgap) call rmempty(ialn, caln, maxres, nseq, naln)

c ----- remove all gaps:
        if (rmgap2) call rm2empty(ialn, caln, maxres, nseq, naln)

c ----- get the number of residues in the sequences;
c       get the inverted alignment array;
c       get integer residue types;
c       get condensed residue type array;
        call ninvtyp(ialn,caln,maxres,invaln,irestypn,ccaln,naln,
     &               nseq,nresn)

c ----- correct the residue index of the C-terminus of the last segment
c       (for rdpir/rdpap only, should not change anything for rdqnt):
        do  i = 1, nseq
          iress2n(nsegmn(i),i) = nresn(i)
        end do

c ----- test the sequences:
        do  i = 1, nseq
         do  j = 1, naln
          if(ichr2int(caln(j,i)).gt.20 .and. caln(j,i).ne.gapchr)then
           if (iowr(3))
     &     write(iolog,'(a,a1,1x,2i5)') 
     &     'rdaln____> non-standard residue type,position,sequence: ',
     &     caln(j,i), j, i
          else
           if(ichr2int(caln(j,i)).lt.1)then
            write(iolog,'(a,a1,1x,2i5)')
     &      'rdaln___E> unknown residue type,position,sequence     : ',
     &      caln(j,i), j, i
           end if
          end if
         end do
        end do

        return
      end




cf ----------------------------------------------------------------------------
cf
cf    RDPIR() reads the horizontal PIR alignment.
cf
cf    Input:
cf           nseq           ... number of sequences to be read in
cf           code(nseq)*(*) ... codes of proteins to be read in
cf           io             ... I/O stream
cf           infile*(*)     ... input file name
cf           maxseq         ... maximal number of sequences
cf           maxres         ... maximal length of alignment in residues
cf           what*(*)       ... sequence and/or structure
cf    Output:
cf           naln           ... length of the alignment
cf           ialn(i,j)      ... residue index at position i, sequence j
cf           caln(i,j)*1    ... residue type at position i, sequence j
cf           codein(nseq)   ... true if sequence read in.
cf
cf ----------------------------------------------------------------------------


      subroutine rdpir(ioinp,ioinp2,iolog,infile,maxseq2,maxres,ialn,
     &      caln,nseq,naln,code,prottyp,pdbcod,name,source,range,alnchn,
     &      resol,rfactr,comment,ncomment,mcomment2,codein,what,
     &      atmdir,water,hetatm,hydrogen,openned,ccaln)
        implicit none
c ----- maximal number of residues in the whole PDB file (should be
c       larger than maxres at all times; how much larger only important 
c       when chain begining and/or end obtained from a PDB file
        integer maxres2
        parameter (maxres2 = 10000)
#include "reslib.cst"
#include "lenf.cst"
#include "mdt_aln.cst"
        integer ioinp,iolog,maxseq2,maxres,nseq,naln,ierr,lr
        integer i,j,lenr,naln2,lr1,ires,iseq,ipdbfnd,ifind2word
        integer ialn(maxres,maxseq2),ifind5word,nseq2,ioinp2
        integer ncomment,mcomment2,nbuffr,indexw,nres,lenr2,nres2
        real resol(maxseq2),rfactr(maxseq2)
        character pdbcod(maxseq2)*(*),name(maxseq2)*(*),gapchr*(1)
        character alnchn(2,maxseq2),fname*(lenf)
        character source(maxseq2)*(*),range(2,maxseq2)*(*)
        character caln(maxres,maxseq2)*(*),comment(mcomment2)*(*)
        character card*(maxlin), infile*(*), code(maxseq2)*(*)
        character cod*(maxcod),prottyp(maxseq2)*(*),what*(*)
        character buffr(mbuffr+5)*(100),atmdir*(*),str2chr*(1)
        character range2(2)*5, alnchn2(2)*1, ccaln(maxres)*(1)
        character ccaln2(maxres2)*(1), resnum(maxres2)*(5)
        character chain(maxres2)*(1)
        logical rdall, codein(maxseq2), notaa1, cmpr, recover, dopdb
        logical water,hetatm,hydrogen,openned
        external lenr, lenr2, notaa1, recover, ifind2word
        external ifind5word, str2chr

        if (maxres2 .lt. maxres) 
     &   stop 'rdpir__E> Increase MAXRES2.'

        gapchr = str2chr(gapsym)

c ----- read all the sequences?
        rdall = ifind5word('all', code, nseq).gt.0

        do  iseq = 1, maxseq2
          codein(iseq) = .false.
        end do

        nseq2 = 0
        naln = 0
        ncomment = 0

        if (.not. openned) then
          close(ioinp)
c ------- stop on error and report everything:
          call openf4(ioinp,infile,'OLD','SEQUENTIAL','FORMATTED',3,
     &                .true.,ierr,cmpr,iolog)
        end if

c ----- get to the next sequence:
10      read(ioinp, '(a)', err=910, end=110) card
        if (card(1:2) .eq. 'C;' .or. card(1:2) .eq. 'R;') then
          if (ncomment .ge. mcomment2) then
            write(*,'(a)')'rdpir___W> increase MCOMMENT and recompile'
          else
            ncomment = ncomment + 1
          end if
          comment(ncomment) = card
          go to 10
        end if
        if (card(1:4) .ne. '>P1;') go to 10

        cod = card(index(card, ';')+1:)
        call ljust(cod)

c ----- position of the current code in the list of specified codes
        if (rdall) then
          iseq = nseq2 + 1
        else
          iseq = ifind2word(cod, code, nseq)
        end if

        if (iseq.gt.maxseq2)
     &    write(*, '(a)') 'rdpir___E> increase MAXSEQ'

c ----- is the current sequence not specified in CODE()
        if (iseq .lt. 1) go to 10

c ----- read and parse the special comment line:
        read(ioinp, '(a)', err=910, end=910) card

        do  i = 1, mbuffr
          buffr(i) = ' '
        end do
        call str_sn5(card(1:), buffr, mbuffr+5, nbuffr, ':')
        do  i = 1, nbuffr
          call ljust(buffr(i))
        end do
        if (nbuffr.lt.mbuffr)
     &    write(*,'(2a/a/a,2i5/a)')
     &    'rdpir___W> not enough fields in the second line of the ',
     &    'sequence entry: ', card(1:lenr2(card)),
     &    '  Actual fields, should be: ', nbuffr, mbuffr,
     &    '  (Fields are separated by a colon, :)'
        if (nbuffr.gt.mbuffr)
     &    write(*,'(2a/a/a,2i5/a)')
     &    'rdpir___W> too many fields in the second line of the ',
     &    'sequence entry: ', card(1:lenr2(card)),
     &    '  Actual fields, should be: ', nbuffr, mbuffr,
     &    '  (Fields are separated by a colon, :)'
        prottyp(iseq)  = buffr(1)
        pdbcod(iseq)   = buffr(2)
        range(1,iseq)  = buffr(3)
        alnchn(1,iseq) = buffr(4)
        range(2,iseq)  = buffr(5)
        alnchn(2,iseq) = buffr(6)
        name(iseq)     = buffr(7)
        source(iseq)   = buffr(8)
        call str_r2(buffr(9), resol(iseq), ierr)
        if (ierr .ne. 0) resol(iseq) = -1
        call str_r2(buffr(10), rfactr(iseq), ierr)
        if (ierr .ne. 0) rfactr(iseq) = -1

c ----- is the entry of the selected type(s):
        if (indexw(what,prottyp(iseq)(1:lenr2(prottyp(iseq)))).lt.1
     &      .and. index(what,anyrid).lt.1) go to 10

c ----- do some alignment format checks:
        if (index(prottyp(iseq),'structure') .gt. 0) then
          if (lenr(pdbcod(iseq)) .lt. 1) then
            write(iolog,'(2a/a)')
     &        'rdpir___E> no PDB code in field 2 for structure: ',
     &        cod(1:lenr2(cod)),
     &        card(1:lenr2(card))
            stop
          end if

c          if (lenr(range(1,iseq)) .lt. 1) then
c            write(iolog,'(2a/a)')
c     &        'rdpir___E> no 1st residue in field 3 for structure: ',
c     &        cod(1:lenr2(cod)),
c     &        card(1:lenr2(card))
c            stop
c          end if
c          if (lenr(range(2,iseq)) .lt. 1) then
c            write(iolog,'(2a/a)')
c     &        'rdpir___E> no 2nd residue in field 5 for structure: ',
c     &        cod(1:lenr2(cod)),
c     &        card(1:lenr2(card))
c            stop
c          end if

        end if

        code(iseq) = cod

c ----- current number of sequences read in
        nseq2 = nseq2 + 1

        naln2 = 0
        ires = 0
15      read(ioinp, '(a)', err=910, end=910) card
          lr = lenr(card)

c          do  i = 1, lr
c            write(*,'(2i5,1x,a1,1x,l8)')
c     &         i, ichar(card(i:i)),card(i:i),.not. notaa1(card(i:i))
c          end do

c ------- should I get the sequence from a PDB coordinate file (when the
c         first character in the sequence is ENDSEQ ('*')):
          if (ires .eq. 0 .and. card(1:1) .eq. endseq) then
c --------- find the full PDB filename; stop on error, not verbose:
            call pdbnam(atmdir,pdbcod(iseq),fname,ipdbfnd,cmpr,1,0)
c --------- read the specified sequence range from the PDB file:
            call rdseqpdb(ioinp2,iolog,fname,maxres,nres,caln(1,iseq),
     &                    resnum,chain,range(1,iseq),alnchn(1,iseq),
     &                    water,hetatm,hydrogen,ierr)
            if (recover(.false., 0)) return
            if (ierr .gt. 0) then
              write(*, '(a,i4)')
     &        'rdpir___E> reading sequence from PDB file: ', ierr
              stop
            end if
            naln = max(naln,nres)
            do  i = 1, nres
              ialn(i,iseq) = i
            end do
            do  i = nres+1, maxres
              ialn(i,iseq) = 0
              caln(i,iseq) = gapchr
            end do
            codein(iseq) = .true.
            go to 10
          end if

          if (card(lr:lr) .eq. endseq) then
            lr1 = lr - 1
          else
            lr1 = lr
          end if
          do  i = 1, lr1
            naln2 = naln2 + 1
            if (naln2 .gt. maxres) then
              write(*,'(a/a,i6)')
     &          'rdpir___E> alignment is too long;',
     &          '  increase MAXRES and recompile; MAXRES: ',maxres
              if (recover(.true., 1)) return
            end if
            caln(naln2,iseq) = card(i:i)
            if (.not. notaa1(card(i:i))) then
              ires = ires + 1
              ialn(naln2,iseq) = ires
            else
              ialn(naln2,iseq) = 0
            end if
          end do
c ------- not finished with this sequence yet?
        if (lr .eq. lr1) go to 15

        naln = max(naln,naln2)
        codein(iseq) = .true.

        do  i = naln2+1, maxres
          caln(i,iseq) = gapchr
          ialn(i,iseq) = 0
        end do

c ----- Could we finish before reading the whole file?
        if (.not.rdall .and. nseq .eq. nseq2) go to 110

        go to 10
110     continue

        if (.not. openned) close(ioinp)

c ----- try to get the segment spec from the PDB file if first residue id is blank:
        do 130  iseq = 1, nseq2
         if (index(prottyp(iseq),'structure') .gt. 0) then
          dopdb = .false.
          if(lenr(range(1,iseq)).lt.1.or.range(1,iseq).eq.'.')then
            range2(1) = anyrid
            dopdb = .true.
          else
            range2(1) = range(1,iseq)
          end if
          if(lenr(range(2,iseq)).lt.1.or.range(2,iseq).eq.'.')then
            range2(2) = norid
            dopdb = .true.
          else
            range2(2) = range(2,iseq)
          end if
          if (alnchn(1,iseq) .eq. '.') then
            alnchn2(1) = anyrid
            dopdb = .true.
          else
            alnchn2(1) = alnchn(1,iseq)
          end if
          if (alnchn(2,iseq) .eq. '.') then
            alnchn2(2) = norid
            dopdb = .true.
          else
            alnchn2(2) = alnchn(2,iseq)
          end if
          if (dopdb) then
c --------- find the full PDB filename; stop on error, not verbose:
            call pdbnam(atmdir,pdbcod(iseq),fname,ipdbfnd,cmpr,1,0)
            call rdseqpdb(ioinp2,iolog,fname,maxres2,nres2,ccaln2,
     &                    resnum,chain,range2,alnchn2,water,hetatm,
     &                    hydrogen,ierr)
c --------- compress caln():
            nres = 0
            do  i = 1, naln
              ires = ialn(i,iseq)
              if (ires .gt. 0) then
                nres = ires
                ccaln(ires) = caln(i,iseq)
              end if
            end do
c --------- test all possible beginnings in PDB for match with alignment sequence ISEQ:
            do 120  i = 1, nres2
             if (i+nres-1 .le. nres2) then
              do  j = 1, nres
                if (ccaln(j) .ne. ccaln2(i+j-1)) go to 120
              end do
c ----------- got it:
              range(1,iseq)  = resnum(i)
              range(2,iseq)  = resnum(i+nres-1)
              alnchn(1,iseq) = chain(i)
              alnchn(2,iseq) = chain(i+nres-1)
              go to 130
             end if
120         continue
            write(iolog, '(a,i3/12x,a)')
     &        'rdpir___E> alignment sequence not found in PDB file: ',
     &        iseq, fname(1:lenr2(fname))
            stop
          end if
         end if
130     continue

        if (rdall) nseq = nseq2

        return

910     write(*,*) 'rdpir___E> reading in alignment'
        stop
      end



cf ----------------------------------------------------------------------------
cf
cf    RDPAP() routine reads the horizontal 'PAPER' alignment.
cf
cf    Format:
cf       Several blocks of sequences, one sequence (or part of it)
cf       in one line, no blank lines within blocks. All gaps, including
cf       overhangs, must have gap characters (see PARAMETER below).
cf       First NCODSPC characters in each sequence line are reserved
cf       for the sequence code. In fact, a sequence line is recognized
cf       by the presence of nonblank characters in this space. It does
cf       not matter how many non-sequence (blank, for example) lines
cf       are between contiguous blocks of sequences. The number of
cf       sequences and their order must be the same in each contiguous
cf       block, but the length of blocks can vary.
cf
cf    Input: io          ... I/O stream
cf           infile*(*)  ... input file name
cf           maxseq      ... maximal number of sequences
cf           maxres      ... maximal length of alignment in residues
cf                           ('all' for all)
cf    Output:
cf           code(i)*(*) ... codes of proteins read in
cf           nseq        ... number of sequences read in
cf           naln        ... length of the alignment
cf           ialn(i,j)   ... residue index at position i, sequence j
cf           caln(i,j)*1 ... residue type at position i, sequence j
cf           codein(i)   ... .T. if sequence read in
cf
cf ----------------------------------------------------------------------------


      subroutine rdpap(ioinp,iolog,infile,maxseq2,maxres,ialn,caln,
     &                 nseq,naln,code,codein,insght,icumul)
        implicit none
#include "mdt_aln.cst"
        integer ioinp,iolog,maxseq2,maxres,nseq,naln,ierr,ncodlen
        integer ipos,lenr,i,ifind2word,nseq2,iseq,lr,ipos0,lencod
        integer ialn(maxres,maxseq2), icumul(maxseq), ifind5word
        character card*(maxlin), infile*(*), code(maxseq2)*(*)
        character cod*(maxcod), caln(maxres,maxseq2)*(*)
        logical rdall, codein(maxseq2), notaa1, recover, cmpr
        logical insght
        external lencod, notaa1, recover, ifind2word, ifind5word

        do  iseq = 1, maxseq2
          icumul(iseq) = 0
        end do

        do  iseq = 1, maxseq2
          codein(iseq) = .false.
        end do

        if (insght) then
          ncodlen = lencod(ioinp,iolog,infile)
        else
          ncodlen = ncodspc
c          ncodlen = min(len(code(1)), ncodspc)
        end if

        call openf4(ioinp,infile,'OLD','SEQUENTIAL','FORMATTED',3,
     &              .true.,ierr,cmpr,iolog)

c ----- read all the sequences?
        rdall = ifind5word('all', code, nseq).gt.0

        naln = 0
c ----- stop on error and report everything:
        call openf4(ioinp,infile,'OLD','SEQUENTIAL','FORMATTED',3,
     &              .true.,ierr,cmpr,iolog)

c ----- get to the next block of aligned sequences:
10      read(ioinp, '(a)', err=910, end=110) card
        if (index(alncmnts, card(1:1)) .gt. 0) go to 10
        cod = card(1:ncodlen)
        if (lenr(cod) .eq. 0) go to 10

c ----- this is the first line of the contiguous block of aligned sequences:
        nseq2 = 0
        iseq = 0
        lr = lenr(card)
        ipos0 = naln - ncodspc
c ----- all the sequences in this block have to have the same number of chars
        naln  = naln + max(0, lr-ncodspc)
        if (naln .gt. maxres) then
          write(*,'(a)') 'rdpap___E> naln>MAXRES; increase MAXRES.'
          if (recover(.true., 1)) return
        end if
30      continue

          iseq = ifind2word(cod,code,nseq)
          if (iseq .eq. 0) then
            if (rdall) iseq = nseq2 + 1
          end if
          if (iseq.gt.maxseq2)
     &      stop 'rdpap____E> MAXSEQ; increase MAXSEQ'

c ------- is the current sequence not specified in CODE()
          if (iseq .lt. 1) go to 50

          nseq2 = nseq2 + 1
          code(iseq) = cod
          codein(iseq) = .true.

          do  i = ncodspc+1, lr
c   ------- alignment position:
            ipos = ipos0 + i
            caln(ipos,iseq) = card(i:i)
            if (.not. notaa1(card(i:i))) then
              icumul(iseq) = icumul(iseq) + 1
              ialn(ipos,iseq) = icumul(iseq)
            else
              ialn(ipos,iseq) = 0
            end if
          end do

50        continue

          read(ioinp, '(a)', err=910, end=110) card
          if (index(alncmnts, card(1:1)) .gt. 0) go to 10
          cod = card(1:ncodlen)

        if (lenr(cod) .gt. 0) go to 30
        go to 10

110     continue

        close(ioinp)

        if (rdall) nseq = nseq2
        return

910     write(*,*) 'rdpap___E>; reading in alignment'
        stop
      end



      integer function lencod(ioinp,iolog,infile)
        implicit none
        integer ioinp, iolog, ierr, l
        character infile*(*), card*255
        logical cmpr

        call openf4(ioinp,infile,'OLD','SEQUENTIAL','FORMATTED',3,
     &              .true.,ierr,cmpr,iolog)
        read(ioinp, *, end=20) card
        close(ioinp)
        l = index(card, ':') + 1
        if (l .lt. 2) then
          write(iolog, '(2a)')
     &      'lencod__E> no : in Insight alignment file: ', infile
          stop
        end if
        lencod = l
        return

20      continue
        write(iolog, '(2a)')
     &    'lencod__E> reading Insight alignment file: ',
     &    infile
        stop

      end

      subroutine rmempty(ialn, caln, maxres, nseq, naln)
        implicit none
#include "reslib.cst"
        integer maxres,nseq,naln,i,j,newaln
        integer ialn(maxres,nseq)
        character caln(maxres,nseq)*(*),gap1,str2chr*(1)

        gap1 = str2chr(gapsym)

        newaln = 0
        do 20  j = 1, naln
          do  i = 1, nseq
            if(caln(j,i).ne.gap1.and.caln(j,i).ne.' ') go to 15
          end do
c ------- at this position, there are gaps all the way through : remove it
          go to 20
c ------- at least one residue occurs at this position: keep it
15        newaln = newaln + 1
          do  i = 1, nseq
            ialn(newaln,i) = ialn(j,i)
            caln(newaln,i) = caln(j,i)
          end do
20      continue
        naln = newaln

        do  i = 1, nseq
          do  j = newaln+1, maxres
            ialn(j,i) = 0
            caln(j,i) = gap1
          end do
        end do

        return
      end



      subroutine rm2empty(ialn, caln, maxres, nseq, naln)
        implicit none
#include "reslib.cst"
        integer maxres,nseq,naln,i,j,newaln
        integer ialn(maxres,nseq)
        character caln(maxres,nseq)*(*), gap1*(1), str2chr*(1)

        gap1 = str2chr(gapsym)

        do  i = 1, nseq
          newaln = 0
          do  j = 1, naln
            if(caln(j,i).ne.gap1) then
              newaln = newaln + 1
              ialn(newaln,i) = ialn(j,i)
              caln(newaln,i) = caln(j,i)
            end if
          end do
          do  j = newaln+1, maxres
            ialn(j,i) = 0
            caln(j,i) = gap1
          end do
          naln = max(newaln,naln)
        end do

        return
      end


      subroutine rmbrks(caln, maxres, nseq, naln)
        implicit none
#include "reslib.cst"
#include "lenf.cst"
#include "reslib.cmn"
        integer nseq, naln, maxres, i, j
        character caln(maxres,nseq)*(*), brk1*(1), int2chr*(1)
        character gap1*(1)

        gap1 = int2chr(igaptyp)
        brk1 = int2chr(ibrktyp)

        do  i = 1, nseq
          do  j = naln, 2, -1
            if (caln(j-1,i).eq.brk1 .and. caln(j,i).eq.brk1)
     &        caln(j,i) = gap1
          end do
        end do

        return
      end


      subroutine cswap(i,j)
      implicit none
      character i*1, j*1, itemp*1
      itemp = i
      i = j
      j = itemp
      return
      end



cf ----------------------------------------------------------------------------
cf
cf    NINVTYP()
cf
cf    subroutine ninvtyp(ialn,caln,maxres,invaln,irestypn,ccaln,naln,
cf   &                   nseq,nresn)
cf
cf ----------------------------------------------------------------------------

      subroutine ninvtyp(ialn,caln,maxres,invaln,irestypn,ccaln,naln,
     &                   nseq,nresn)
        implicit none
        integer maxres
        integer nseq,naln,iseq,ipos,ires,irestypn(maxres,nseq)
        integer ialn(maxres,nseq), invaln(maxres,nseq), nresn(nseq)
        integer ichr2int
        character caln(maxres,nseq)*(*), ccaln(maxres,nseq)*(*)
        do  iseq = 1, nseq
          nresn(iseq) = 0
          ires = 0
          do  ipos = 1, naln
            ires = ialn(ipos,iseq)
            if (ires .gt. 0) then
              nresn(iseq) = ires
              invaln(ires,iseq)   = ipos
              ccaln(ires,iseq)    = caln(ipos,iseq)
              irestypn(ires,iseq) = ichr2int(caln(ipos,iseq))
            end if
          end do
        end do
        return
      end


cf ----------------------------------------------------------------------------
cf
cf    DEFALN() fills in the alignment data arrays with default values.
cf
cf    subroutine defaln(rng,prot,nseq,code,prottyp,pdbcod,name,source,
cf   &                  range,alnchn,resol,rfactr)
cf
cf ----------------------------------------------------------------------------

      subroutine defaln(rng,prot,nseq,code,prottyp,pdbcod,name,source,
     &                  range,alnchn,resol,rfactr)
        implicit none
#include "reslib.cst"
        integer i,nseq
        real resol(nseq),rfactr(nseq)
        character pdbcod(nseq)*(*),name(nseq)*(*)
        character source(nseq)*(*),range(2,nseq)*(*)
        character alnchn(2,nseq)*(*), code(nseq)*(*), prottyp(nseq)*(*)
        character prot*(*)
        logical rng

        do  i = 1, nseq
          prottyp(i)  = prot
          pdbcod(i)   = code(i)
          name(i)     = 'undefined'
          source(i)   = 'undefined'
          resol(i)    = -1.0
          rfactr(i)   = -1.0
          if (rng) then
            alnchn(1,i) = anyrid
            alnchn(2,i) = norid
            range(1,i)  = anyrid
            range(2,i)  = norid
          end if
        end do

        return
      end


cf ---------------------------------------------------------------------------
cf
cf    RDQNT reads an alignment file in the QUANTA format. Chain ID's
cf    are obtained as the first character of the QUANTA segment names.
cf    I am also assuming that the sequences in the QUANTA file are
cf    in fact contiguous residues in the coordinate files.
cf
cf    subroutine rdqnt(ioinp,iolog,infile,maxseq2,maxres,ialn,caln,
cf   &           nseq,naln,code,alnchn,range,iress1n,iress2n,maxseg,
cf   &           nsegmn,segidn,codein)
cf ---------------------------------------------------------------------------

      subroutine rdqnt(ioinp,iolog,infile,maxseq2,maxres,ialn,caln,
     &           nseq,naln,code,alnchn,range,iress1n,iress2n,maxseg,
     &           nsegmn,segidn,codein,iseq,ialn2,ichnbrk,cod,buffr)
        implicit none

#include "mdt_aln.cst"
#include "reslib.cst"
#include "lenf.cst"
#include "reslib.cmn"

        integer maxseq2,iseq(maxseq2),ialn2(maxseq2),ncode,ifirst,iline
        integer ioinp,maxres,nseq,naln,ierr,nbuffr,nchnbrk,nlines
        integer maxseg,ichnbrk(maxseq2),iress1n(maxseg,maxseq2)
        integer iress2n(maxseg,maxseq2),nsegmn(maxseq2),iolog
        integer ialn(maxres,maxseq2),ifind2word,is,nb,ip,icb,ifind5word
        character segidn(maxseg,maxseq2)*(*),segid*(nqntwcol)
        character caln(maxres,maxseq2)*(*), int2chr*(1)
        character card*(mqntlin), infile*(*), code(maxseq2)*(*)
        character cod(maxseq2)*(20), buffr(2*maxseq2)*(20)
        character alnchn(2,maxseq2)*(*),range(2,maxseq2)*(*)
        character str2chr*(1)
        logical rdall, codein(maxseq2), recover, cmpr

c ----- read all the sequences in the file?
        rdall = ifind5word('all', code, nseq).gt.0

c ----- stop on error and report everything:
        call openf4(ioinp,infile,'OLD','SEQUENTIAL','FORMATTED',3,
     &              .true.,ierr,cmpr,iolog)

c ----- read the protein codes in the alignment file:
        ncode = 0
10      read(ioinp, '(a)', err=910, end=910) card
        if (card(1:5) .eq. 'MOL  ') then
          ncode = ncode + 1
          call rootfn(card(6:),cod(ncode))
          go to 10
        end if

c ----- determine which sequences in the QUANTA file were selected and their
c       order in the QUANTA columns (order in MODELLER arrays is given by
c       the CODE array):
        if (rdall) then

          nseq  = ncode
          if (nseq.gt.maxseq2)
     &      write(*, '(a)') 'rdqnt___E> increase MAXSEQ'
          do  is = 1, nseq
            iseq(is)   = is
            codein(is) = .true.
            code(is)   = cod(is)
          end do

        else

          if (nseq.gt.maxseq2)
     &      write(*, '(a)') 'rdqnt___E> increase MAXSEQ'
          do  is = 1, nseq
            ip = ifind2word(code(is), cod, ncode)
            if (ip .gt. 0) then
              iseq(is) = ip
              codein(is) = .true.
            else
              codein(is) = .false.
            end if
          end do

        end if

        naln = 0
        do  is = 1, nseq
          ialn2(is) = 0
          if (codein(is)) then
            nsegmn(is) = 1
            iress1n(1,is) = 1
          else
            nsegmn(is) = 0
          end if
        end do

c ----- read in the whole current alignment position (may be spread over
c       several lines in the QUANTA file):
        if (mod(ncode, nqntcol) .eq. 0) then
          nlines = ncode / nqntcol
        else
          nlines = ncode / nqntcol + 1
        end if
c ----- the first line is already in card:
        call str_sn2(card(6:), buffr, 2*maxseq2, nbuffr)
        ifirst = 2

20      do  iline = ifirst, nlines
          read(ioinp, '(a)', err=910, end=130) card
          call str_sn2(card(6:), buffr(nbuffr+1), 2*maxseq2-nbuffr, nb)
          nbuffr = nbuffr + nb
        end do
        ifirst = 1
        nbuffr = 0

c ----- process the whole current alignment position:

c ----- if any of the chains has a chain break, insert a chain break
c       alignment position:

c ----- get the number of chain breaks and indices of sequences that have it;
c       also update the segment ID's
        nchnbrk = 0
        do  is = 1, nseq
         if (codein(is)) then
          ip = iseq(is)*2 - 1
          if (buffr(ip).ne.qntgap) then
            call rngchn(range(2,is), segid, buffr(ip+1))
            if (ialn2(is) .eq. 0) then
c ----------- remember the segment ID and the residue number of the first
c             residue in this sequence:
              range(1,is)  = range(2,is)
              segidn(1,is) = segid
            else
c ----------- chain break identified by different segment id's:
              if (segidn(nsegmn(is),is).ne.segid) then
                nchnbrk                = nchnbrk + 1
                ichnbrk(nchnbrk)       = is
                iress2n(nsegmn(is),is) = ialn2(is)
                nsegmn(is)             = nsegmn(is) + 1
                segidn(nsegmn(is),is)  = segid
                iress1n(nsegmn(is),is) = ialn2(is) + 1
              end if
            end if
          end if
         end if
        end do

c ----- if any breaks, insert the right residue codes and ialn values:
        if (nchnbrk .gt. 0) then
          naln = naln + 1
          if (naln .gt. maxres) then
            write(*,'(a)') 'rdqnt____E> increase MAXRES'
            if(recover(.true., 1)) return
          end if
          do  is = 1, nseq
            caln(naln,is) = int2chr(igaptyp)
            ialn(naln,is) = 0
          end do
          do  icb = 1, nchnbrk
            is = ichnbrk(icb)
            caln(naln, is)  = int2chr(ibrktyp)
          end do
        end if

        naln = naln + 1
        if (naln .gt. maxres) then
          write(*,'(a)') 'rdqnt____E> increase MAXRES'
          if(recover(.true., 1)) return
        end if
        do  is = 1, nseq
         if (codein(is)) then
          ip = iseq(is)*2 - 1
          if (buffr(ip).eq.qntgap) then
            caln(naln, is) = int2chr(igaptyp)
            ialn(naln,is) = 0
          else
c --------- 3 character PDB residue name to single character residue name:
            caln(naln,is) = str2chr(buffr(ip)(1:3))
            ialn2(is) = ialn2(is) + 1
            ialn(naln,is) = ialn2(is)
          end if
         end if
        end do

        go to 20

130     continue
        close(ioinp)

c ----- assuming there are no residue insertion codes in QUANTA alignment files:
        do  is = 1, nseq
          call rjust(range(1,is)(1:4))
          call rjust(range(2,is)(1:4))
          alnchn(1,is)           = segidn(1,is)
          alnchn(2,is)           = segidn(nsegmn(is),is)
          iress2n(nsegmn(is),is) = ialn2(is)
        end do
        return

910     write(*,*) 'rdqnt___E> reading in alignment'
        stop

      end


cf --------------------------------------------------------------------------
cf
cf    RNGCHN() routine will interpret BUFFR as SEGMENT_ID:RESIDUE_NUMBER
cf    and return the two components in  resnum and segid.
cf
cf    subroutine rngchn(resnum, segid, buffr)
cf
cf --------------------------------------------------------------------------

      subroutine rngchn(resnum, segid, buffr)
        implicit none
        integer i
        character resnum*(*), segid*(*), buffr*(*)

        i = index(buffr, ':')
        if (i.lt.2) then
          write(*,'(2a)')'rngchn___E> no segment_id or no ":"  ', buffr
          stop
        end if
        resnum = buffr(i+1:)
        segid  = buffr(1:i-1)

        return
      end


cf --------------------------------------------------------------------------
cf
cf    DEFSEG() will assign default segment values for the sequences in the
cf    alignment. Note that this does not take into account any segment
cf    delineations that may be apparent from the coordinate files. It only
cf    uses the chain break character in the CALN array.
cf
cf    subroutine defseg(caln,alnchn,iress1n,iress2n,maxseg,nseq,
cf   &                  nsegmn,segidn,maxres,ialn,naln)
cf
cf --------------------------------------------------------------------------

      subroutine defseg(caln,alnchn,iress1n,iress2n,maxseg,nseq,
     &                  nsegmn,segidn,maxres,ialn,naln)
        implicit none
#include "reslib.cst"
#include "lenf.cst"
#include "reslib.cmn"
        integer maxseg,nseq,iress1n(maxseg,nseq),iress2n(maxseg,nseq)
        integer maxres,nsegmn(nseq),i,ialn(maxres,nseq),j,nres,naln
        character alnchn(2,nseq)*(*),segidn(maxseg,nseq)*(*)
        character caln(maxres,nseq)*(*),chnbrk*(1), int2chr*(1)

        chnbrk = int2chr(ibrktyp)

        do  i = 1, nseq
          nsegmn(i) = 1
          iress1n(1,i) = 1
          segidn(1,i) = alnchn(1,i)
          nres = 0
          do  j = 1, naln
            if (ialn(j,i).gt.0) then
              nres = nres + 1
            else
              if (caln(j,i) .eq. chnbrk) then
                iress2n(nsegmn(i),i) = nres
                nsegmn(i) = nsegmn(i) + 1
                if (nsegmn(i) .gt. maxseg) then
                  write(*,'(a)') 'defseg__E> increase MAXSEG'
                  stop
                end if
                iress1n(nsegmn(i),i) = nres+1
              end if
            end if
          end do
          iress2n(nsegmn(i),i) = nres
        end do

        return
      end



cf ----------------------------------------------------------------------------
cf
cf    RDSEQPDB routine reads the sequence in the specified range from a
cf    PDB coordinate file.
cf
cf
cf ----------------------------------------------------------------------------

      subroutine rdseqpdb(ioinp,iolog,fname,maxres,nres,caln,resnum,
     &                    chain,rng,chnrng,water,hetatm,hydrogen,ierr)
        implicit none
#include "reslib.cst"
#include "lenf.cst"
#include "reslib.cmn"
        integer maxres,ioinp,ierr,nres,iolog
        character rng(2)*(*),chnrng(2)*(*),fname*(*),card*30
        character caln(maxres)*(*), str2chr*(1)
        character oldnum*(5), oldid*(1),oldnam*(3)
        character newnum*(5), newid*(1),newnam*(3)
        character resnum(maxres)*(*), chain(maxres)*(*)
        logical cmpr,inrange,recover,water,hetatm,hydrogen,p
        logical pdblin, reseqv1
        external pdblin, recover, str2chr, reseqv1

        call openf4(ioinp,fname,'OLD','SEQUENTIAL','FORMATTED',3,
     &              .false.,ierr,cmpr,iolog)

        if (ierr .gt. 0) then
          ierr = 1
          return
        end if

        nres = 0
        oldnum = '%%%%%'
        oldnam = '%%%'
        oldid = '%'
        inrange = .false.

5       continue

          read(ioinp, '(a30)', end=20, err=300)card

          if (card(1:6) .eq. 'ENDMDL') go to 20

          p = pdblin(card(1:6),card(18:20),card(17:17),card(13:16),
     &               water,hydrogen,hetatm,iwattyp)

          if (p) then

            read(card, '(17x,a3,1x,a1,a5)') newnam, newid, newnum

            if(reseqv1(newnum,newid,rng(1),chnrng(1)))inrange=.true.

            if (inrange) then
              if ((newnum.ne.oldnum) .or. (newnam.ne.oldnam)) then
                if (reseqv1(oldnum,oldid,rng(2),chnrng(2))) go to 20
                nres = nres + 1
                if (nres .gt. maxres) then
                  ierr = 3
                  write(*,'(a)') 'rdseqpd_E> too many residues'
                  if (recover(.true., 1)) go to 500
                end if
                caln(nres) = str2chr(newnam)
                resnum(nres) = newnum
                chain(nres) = newid
                oldnum = newnum
                oldnam = newnam
                oldid  = newid
              end if
            end if

          end if
          go to 5
20      continue
        close(ioinp)
        ierr = 0
        go to 500

300     ierr = 5

500     continue
        call unprepfil(fname, cmpr)
        return

      end


      subroutine descaln(alnfil)
        implicit none
#include "io.cst"
#include "mdt_all.cmn"
        integer lenr2, i
        character alnfil*(*)
        logical iowr
        external lenr2, iowr

        if (iowr(2)) then

         write(iolog,'(/2a/a,i5//a/999a1)')
     &  'Read the alignment from file       : ',alnfil(1:lenr2(alnfil)),
     &  'Total number of alignment positions: ',naln,
     &  '  #  Code        #_Res #_Segm PDB_code Name',
     &  ('-', i=1,79)
        do  i = 1, nseq
          write(iolog,'(i3,2x,a9,1x,2i7,1x,a11,1x,a40)')
     &    i,code(i),nresn(i),nsegmn(i),pdbcod(i),name(i)
        end do

        end if

        return
      end




      subroutine defali(nnseq)
        implicit none
#include "io.cst"
#include "mdt_all.cmn"
        integer nnseq

c ----- in case there is no alignment (explicit or default), construct
c       the default alignment fields (not the alignment):
        if (noaln) then
          nseq = nnseq
          call defaln(.true.,'structureX',nseq,code,prottyp,pdbcod,
     &                name,source,range,alnchn,resol,rfactr)
          call defseg(caln,alnchn,iress1n,iress2n,maxseg,nseq,nsegmn,
     &                segidn,maxres,ialn,naln)
c ------- the rest of the default alignment is set in rdabrk()
        end if

        return
      end
