C PROTEIN DATA BANK SOURCE CODE MAKEMOLS C AUTHOR. L. HOLM C ENTRY DATE. 12/94 UNSUPPORTED C LAST REVISION. 12/94 C PURPOSE. WRITE BASIC MOLSCRIPT INPUT FILE. C PURPOSE. THIS PROGRAM USES THE OUTPUT OF C PURPOSE. OF DSSP AS AN INPUT FILE AND C PURPOSE. GENERATES DIRECTIVES FOR USE BY C PURPOSE. MOLSCRIPT. C LANGUAGE. FORTRAN C/// C------------------------------------------------------------------------------- C---- ---- C---- COPYRIGHT (C) 1994 BY ---- C---- L. HOLM ---- C---- ALL RIGHTS RESERVED. ---- C---- ---- C---- This is the source code of the MAKEMOLS program. ---- C---- The MAKEMOLS program is a free software system. It is distributed ---- C---- in the hope that it will be useful, but WITHOUT ANY WARRANTY without ---- C---- even the implied warranty of merchantability or fitness for a ---- C---- particular purpose. ---- C---- The copyright holders and/or other parties provide the program ---- C---- "AS IS",without warranty of any kind, either expressed or implied. ---- C---- The entire risk as to the quality and performance of the program ---- C---- is with you. Should the program prove defective, you assume the cost ---- C---- of all necessary servicing, repair or correction. ---- C---- ---- C---- THIS SOFTWARE IS FURNISHED UNDER A LICENSE AND MAY BE USED AND COPIED ---- C---- ONLY IN ACCORDANCE WITH THE TERMS OF SUCH LICENSE AND WITH THE ---- C---- INCLUSION OF THE ABOVE COPYRIGHT NOTICE. THIS SOFTWARE OR ANY OTHER ---- C---- COPIES THEREOF MAY NOT BE PROVIDED OR OTHERWISE MADE AVAILABLE TO ANY ---- C---- OTHER PERSON. NO TITLE TO AND OWNERSHIP OF THE SOFTWARE IS HEREBY ---- C---- TRANSFERRED. ---- C---- ---- C---- THE INFORMATION IN THIS SOFTWARE IS SUBJECT TO CHANGE WITHOUT NOTICE ---- C---- AND SHOULD NOT BE CONSTRUED AS A COMMITMENT BY THE EMBL OR BY THE ---- C---- AUTHOR. ---- C---- ---- C---- NEITHER THE EMBL NOR THE AUTHOR ASSUME RESPONSIBILITY FOR THE USE ---- C---- OR RELIABILITY OF THIS SOFTWARE PRODUCT. ---- C---- ---- C------------------------------------------------------------------------------- C\\\ program makemols c c writes basic MolScript input file c implicit none integer maxres parameter(maxres=900) c integer nseg,from(200),to(200) character*6 segtype(200) c character chainid,seq(maxres),struc(maxres) character*5 resno(maxres) character*80 header real ca(3,maxres) character*33 dsspinfo(maxres) c integer i,nres character*80 filnam,infile c write(*,*) ' *********************************************************' write(*,*) ' This program writes a simple input file for MolScript. ' write(*,*) ' MolScript ref: P.Kraulis (1991) J.Appl.Cryst.24,946-950.' write(*,*) ' Secondary structure segments are displayed as ribbons.' write(*,*) ' Secondary structure definitions will be read from a DSSP' write(*,*) ' file (Kabsch and Sander (1983) Biopolymers 22,2577-2637).' write(*,*) ' Use a text editor to modify the script. ' write(*,*) ' Help in molscript.doc of your local installation.' write(*,*) ' *********************************************************' write(*,*) ' enter name of dssp file ' read(*,510) infile write(*,*) ' which chain ? (blank or A or B or ...; * for all chains) ' read(*,520) chainid call readdssp(infile,chainid,90,header,nres,resno,seq,struc,ca,dsspinfo) write(*,*) header call segmentparser(nres,struc,nseg,from,to,segtype) write(*,*) ' enter name of MolScript input file ' read(*,510) filnam open(91,file=filnam,status='new') write(91,*) ' plot' write(*,*) ' enter name of pdb file ' read(*,510) infile write(91,*) ' read mol "',infile(1:40),'";' write(91,*) ' transform atom * by centre position atom *;' do i=1,nseg write(*,*) from(i),to(i),segtype(i) write(91,*) ' ',segtype(i),' require from ',resno(from(i)), $ ' to ',resno(to(i)),' and molecule mol;' end do write(91,*) ' end_plot' close(91) write(*,*) ' MolScript input file in ',filnam(1:40) c 500 format(a4,a1) 510 format(a80) 520 format(a1) c end c c c subroutine segmentparser(nres,struc,nseg,from,to,segtype) implicit none integer nres,maxres parameter(maxres=900) character struc(maxres) integer nseg,from(200),to(200) character*6 segtype(200) c c HGI => helix (>= 3 residues) c BE => strand (>= 3 residues) c other => coil c integer isimp c character*6 alfabet data alfabet/'HGIBE?'/ integer i,j,k,tointeger(6),s(maxres) data tointeger/1,1,1,0,2,0/ c do i=1,nres s(i)=isimp(struc(i),6,alfabet,tointeger) end do i=0 do while(i.lt.nres) i=i+1 if(s(i).gt.0) then k=s(i) j=i do while(j.lt.nres.and.s(j+1).eq.k) j=j+1 end do if(j.lt.i+2) then write(*,*) 'annulled',s(i),i,j do k=i,j s(k)=0 end do end if i=j end if end do i=0 do while(i.lt.nres) i=i+1 if(s(i).gt.0) then k=s(i) j=i do while((j.lt.nres).and.(s(j+1).eq.k)) j=j+1 end do if(i.gt.1) then if(s(i-1).ne.0) then nseg=nseg+1 from(nseg)=i-1 to(nseg)=i segtype(nseg)='coil ' end if end if nseg=nseg+1 from(nseg)=i to(nseg)=j if(k.eq.1) segtype(nseg)='helix ' if(k.eq.2) segtype(nseg)='strand' i=j else j=i do while((j.lt.nres).and.(s(j+1).eq.0)) j=j+1 end do nseg=nseg+1 from(nseg)=max(1,i-1) to(nseg)=min(nres,j+1) segtype(nseg)='coil ' i=j end if end do c return end c c----------------------------------------------------------------------- c subroutine readdssp(filnam,chainid,iunit, $ header,nres,resno,seq,struc,ca,dsspinfo) c c frontend to getdssp to transport wanted subset of DSSP information c c parameter: c maxres c c input: filnam c chainid c iunit c c output: header, compnd, source, author -- echo PDB cards c nres c nchain c nss, nssintra, nssinter -- SS-bridges c area -- total accessible surface area c nhb, nhbp -- # backbone H-bonds, same per-cent c resno -- PDB residue number string with appended insertion c indicator, e.g. 1ACX 63A c seq -- one-letter sequence c struc -- one-letter secondary structure c acc -- accessible surface area per residue c dsspinfo -- structure string with renumbered beta-sheet resnos c * marks H-bonds to outside given chain c dsspresno, fsspresno -- local work arrays c tco,kappa,alfa,phi,psi -- backbone conformational angles c ca -- CA coordinates c ierr = 0 -- normal c = -1 -- file not found c = -2 -- too many residues, chain truncated to maxres c implicit none integer maxres parameter(maxres=900) integer iunit,nres,nchain,nss,nssintra,nssinter,nhb,acc(maxres) character*80 filnam,header,compnd,source,author character chainid,seq(maxres),struc(maxres) real nhbp,tco(maxres),kappa(maxres),alfa(maxres),phi(maxres), $ psi(maxres),ca(3,maxres) character*5 resno(maxres) character*33 dsspinfo(maxres) integer area,ierr,dsspresno(maxres) c call getdssp(filnam,chainid,iunit,maxres,header,compnd,source, $ author,nres,nchain,nss,nssintra,nssinter,area,nhb,nhbp, $ resno,seq,struc,acc,dsspinfo,dsspresno, $ tco,kappa,alfa,phi,psi,ca,ierr) if(ierr.eq.0) write(*,*) nres,' residues read ',filnam(1:60) if(ierr.eq.-1) write(*,*) ' DSSP file not found ',filnam(1:60) if(ierr.eq.-2) write(*,*) ' DSSP file truncated at ',maxres,'residues' return end c c----------------------------------------------------------------------- c subroutine getdssp(filnam,chainid,iunit,maxres,header,compnd,source, $ author,nres,nchain,nss,nssintra,nssinter,area,nhb,nhbp, $ resno,seq,struc,acc,dsspinfo,dsspresno, $ tco,kappa,alfa,phi,psi,ca,ierr) c c reads all information from a DSSP file c extracts residues with chainid (* is wildcard) c chain breaks (! residues) are ignored c c use a frontend to transport wanted subset of information c c parameter: c maxres c c input: filnam c chainid c iunit c c output: header, compnd, source, author -- echo PDB cards c nres c nchain c nss, nssintra, nssinter -- SS-bridges c area -- total accessible surface area c nhb, nhbp -- # backbone H-bonds, same per-cent c resno -- PDB residue number string with appended insertion c indicator, e.g. 1ACX 63A c seq -- one-letter sequence c struc -- one-letter secondary structure c acc -- accessible surface area per residue c dsspinfo -- structure string with renumbered beta-sheet resnos c * marks H-bonds to outside given chain c dsspresno, fsspresno -- local work arrays c tco,kappa,alfa,phi,psi -- backbone conformational angles c ca -- CA coordinates c ierr = 0 -- normal c = -1 -- file not found c = -2 -- too many residues, chain truncated to maxres c implicit none integer iunit,maxres,nres,nchain,nss,nssintra,nssinter,nhb,acc(maxres) character*80 filnam,header,compnd,source,author character chainid,seq(maxres),struc(maxres) real nhbp,tco(maxres),kappa(maxres),alfa(maxres),phi(maxres), $ psi(maxres),ca(3,maxres) character*5 resno(maxres) character*33 dsspinfo(maxres) integer area,ierr,dsspresno(maxres),fsspresno(5000) c integer val,sign,s character*4 str4 c integer i,ntot,bp1,bp2,x,j character*128 line logical lheader,lcompnd,lsource,lauthor character se,c character*4 rno c c initialize c ierr=-1 do i=1,80 header(i:i)=' ' compnd(i:i)=' ' source(i:i)=' ' author(i:i)=' ' end do nres=0 ntot=0 nchain=0 nss=0 nssintra=0 nssinter=0 area=0.0 nhb=0 nhbp=0.0 lheader=.false. lcompnd=.false. lsource=.false. lauthor=.false. c open(iunit,file=filnam,status='old',err=999,readonly) ierr=0 c c read header information until start of data (" # " line) c 100 read(iunit,500,end=999) line if((line(1:6).eq.'HEADER').and.(.not.lheader)) then header=line(1:80) lheader=.true. end if if((line(1:6).eq.'COMPND').and.(.not.lcompnd)) then compnd=line(1:80) lcompnd=.true. end if if((line(1:6).eq.'SOURCE').and.(.not.lsource)) then source=line(1:80) lsource=.true. end if if((line(1:6).eq.'AUTHOR').and.(.not.lauthor)) then author=line(1:80) lauthor=.true. end if if(line(19:42).eq.'TOTAL NUMBER OF RESIDUES') then ntot=val(line(1:5),sign) nchain=val(line(6:8),sign) nss=val(line(9:11),sign) nssintra=val(line(12:14),sign) nssinter=val(line(15:17),sign) end if if(line(12:29).eq.'ACCESSIBLE SURFACE') area=val(line(1:6),sign) if(line(30:65).eq.'HYDROGEN BONDS OF TYPE O(I)-->H-N(J)') then nhb=val(line(1:5),sign) nhbp=val(line(6:8),sign)+0.1*val(line(10:10),sign) end if if(line(1:4).ne.' # ') goto 100 c c read residue-information c 200 read(iunit,500,end=299) line c skip breaks se=line(14:14) if(se.eq.'!') goto 200 c check maxres i=val(line(1:5),sign) if((i.ge.5000).or.(nres.eq.maxres)) then ierr=-2 write(*,*) ' Too many residues in getdssp -- skipping ', $ i,line(1:38) goto 299 end if c check chainid c=line(12:12) if(chainid.ne.'*') then if(chainid.ne.c) then fsspresno(i)=0 goto 200 end if end if c new residue nres=nres+1 seq(nres)=se struc(nres)=line(17:17) acc(nres)=val(line(35:38),sign) c append residue-insertion code + rightadjust rno=line(7:10) c=line(11:11) if(c.ne.' ') then do i=2,4 rno(i-1:i-1)=rno(i:i) end do rno(4:4)=c end if c append chainid, remove blanks i=0 resno(nres)=' ' c=line(12:12) if(c.ne.' ') then i=i+1 resno(nres)(i:i)=c end if do j=1,4 c=rno(j:j) if(c.ne.' ') then i=i+1 resno(nres)(i:i)=c end if end do c x=val(line(78:79),s) tco(nres)=s*(x+0.001*val(line(81:83),sign)) x=val(line(84:87),s) kappa(nres)=s*(x+0.1*val(line(89:89),sign)) x=val(line(90:93),s) alfa(nres)=s*(x+0.1*val(line(95:95),sign)) x=val(line(96:99),s) phi(nres)=s*(x+0.1*val(line(101:101),sign)) x=val(line(102:105),s) psi(nres)=s*(x+0.1*val(line(107:107),sign)) c x=val(line(108:112),s) c ca(1,nres)=s*(x+0.1*val(line(114:114),sign)) c x=val(line(115:119),s) c ca(2,nres)=s*(x+0.1*val(line(121:121),sign)) c x=val(line(122:126),s) c ca(3,nres)=s*(x+0.1*val(line(128:128),sign)) read(line(108:114),*) ca(1,nres) read(line(115:121),*) ca(2,nres) read(line(122:128),*) ca(3,nres) c sheet info i=val(line(1:5),sign) dsspresno(nres)=i fsspresno(i)=nres dsspinfo(nres)=line(6:38) c goto 200 c c renumber sheet records; 999 if H-bond partner is in other chain c 299 do i=1,nres bp1=val(dsspinfo(i)(21:24),sign) bp2=val(dsspinfo(i)(25:28),sign) if(bp1.gt.0) then bp1=fsspresno(bp1) if(bp1.gt.0) then dsspinfo(i)(21:24)=str4(bp1) else dsspinfo(i)(21:24)=' 999' end if end if if(bp2.gt.0) then bp2=fsspresno(bp2) if(bp2.gt.0) then dsspinfo(i)(25:28)=str4(bp2) else dsspinfo(i)(25:28)=' 999' end if end if end do 500 format(a128) 999 close(iunit) return end c c----------------------------------------------------------------------- c function onelettercode(aaa) implicit none character*3 aaa character onelettercode,a c c converts residuename to one-letter-code c c COMPATIBLE WITH DSSP c '-' is to be rejected ! c character*50 aasymbol character*150 aminoacid data aasymbol/'ARNDCEQGHILKMFPSTWYVBZXXXXXXXXXXXXXXXX--CCCCIPPPW-'/ c integer i,k c aminoacid(1:60)= $ 'ALAARGASNASPCYSGLUGLNGLYHISILELEULYSMETPHEPROSERTHRTRPTYRVAL' aminoacid(61:120)= $ 'ASXGLXACDALBALIABUAROBASBETHSEHYPHYLORNPCASARTAUTHYUNKACEFOR' aminoacid(121:150) = 'CYHCSHCSSCYXILUPRZPR0CPRTRYHOH' c a='-' do i=1,148,3 if(aminoacid(i:i+2).eq.aaa) then k=(i+2)/3 a=aasymbol(k:k) goto 19 end if end do 19 onelettercode=a return end c c----------------------------------------------------------------------- c c c----------------------------------------------------------------------------- c function val(instring,s) c c converts first digit-segment of instring to an integer, s is sign c implicit none integer val,ic,s character*(*) instring c integer k,l,r,v real m c c ignore leading non-digits c l=1 do while(((instring(l:l).lt.'0').or.(instring(l:l).gt.'9')) $ .and.(l.le.len(instring))) l=l+1 end do c c check sign c s=1 if(l.gt.1) then if(instring(l-1:l-1).eq.'-') s=-1 end if c c ignore trailing non-digits c r=l do while((instring(r+1:r+1).ge.'0').and.(instring(r+1:r+1).le.'9') $ .and.(r.lt.len(instring))) r=r+1 end do c write(*,*) l,r m=0.1 v=0 do k=r,l,-1 m=m*10 v=v+ic(instring(k:k))*m end do val=v return end c c----------------------------------------------------------------------------- c function ic(a) implicit none character a,t(0:9) integer ic,i data t/'0','1','2','3','4','5','6','7','8','9'/ ic=0 do i=0,9 if(a.eq.t(i)) ic=i end do return end c c----------------------------------------------------------------------- c function str4(i) c c converts positive integer to 4-character string c implicit none integer i character*4 str4,s character t(0:9) data t/'0','1','2','3','4','5','6','7','8','9'/ c integer k,i1,i2,i3,i4 c do k=1,3 s(k:k)=' ' end do s(4:4)='0' i1=mod(i,10) i2=(mod(i-i1,100))/10 i3=(mod(i-i2-i1,1000))/100 i4=(mod(i-i3-i2-i1,10000))/1000 if(i.ge.0) s(4:4)=t(i1) if(i.ge.10) s(3:3)=t(i2) if(i.ge.100) s(2:2)=t(i3) if(i4.le.9) then if(i.ge.1000) s(1:1)=t(i4) else s(1:1)='*' end if str4=s return end c c----------------------------------------------------------------------- c function isimp(c,len,fromstring,tointeger) c c converts character c in fromstring to its equivalent index in tointeger c len-th is wildcard c implicit none integer len character*(*) fromstring integer tointeger(*) character c integer isimp c integer i c isimp=tointeger(len) do i=1,len-1 if(c.eq.fromstring(i:i)) then isimp=tointeger(i) goto 99 end if end do 99 return end c c------------------------------------------------------------------------------ c