To: craig@plato.raru.adelaide.edu.au (Craig Kloeden) From: doak@bioch.ox.ac.uk (David Doak) Subject: Re: PDB -> Rotater >Any chance of getting a copy of the perl script for the archive? >I get quite a few people asking for PDB conversion these days. I've included it with this email. It's pretty rudimentary - it only copes with one contiguous peptide chain and only deals with backbone atoms. Input and output can be defined on the command line and there is a -h switch to include backbone amide protons. eg. pdb2xyz.perl pdb=myfile out=newfile -h Also, the last line of the pdb file must be "END". Clearly it could be made a lot better. I tend to use rotater to eyeball the folds of structures I'm working on and so haven't got around to writing a conversion script which copes with sidechains (also, I'm lazy). The trouble with sidechains is that one has to describe the topology before allocating bonds (lot's of tedious code), while with the backbone the topology is simple. I think commercial molecular graphics programs sometimes allocate bonds based on distance criteria (ie. if 2 atoms are less than a certain distance apart then they are most likely bonded). Cheers, Dave. #!/usr/local/bin/perl # Script to convert PDB files to xyz format # David Doak $pdb="inp.pdb"; $out="out.xyz"; $h=0; while ($ARGV[0]) { if ($ARGV[0] =~ /^([A-Za-z_]+=)(.*)/) { eval '$'.$1.'$2;' } # process any FOO=bar switches if ($ARGV[0] =~ /^-([A-Za-z_]+)/) { eval '$'.$1.'=1;' } # process any -FOO switches shift; } $out = ">".$out; print "\nReading $pdb, "; if ($h eq 1) { print "using amide protons\n"; } elsif ($h eq 0) { print "ignoring amide protons\n"; } open (PDB, $pdb) || die "no pdb file"; while ($line = ) { chop($line); #print $line, "\n"; $at = substr($line,0,6); if ($at = "END") { $finres=$resnum; } if ($at = "ATOM") { $aname = substr($line,12,4); $resid = substr($line,17,3); $resnum = substr($line,22,4); $x = substr($line,30,8); $y = substr($line,38,8); $z = substr($line,46,8); #print "$aname $resnum $x $y $z \n"; } if ($aname eq " N ") { #print "hit n\n"; $nx[$resnum]=$x; $ny[$resnum]=$y; $nz[$resnum]=$z; $res[$resnum]=$resid; } if ($aname eq " HN ") { #print "hit h\n"; $hx[$resnum]=$x; $hy[$resnum]=$y; $hz[$resnum]=$z; } if ($aname eq " CA ") { #print "hit ca\n"; $cax[$resnum]=$x; $cay[$resnum]=$y; $caz[$resnum]=$z; } if ($aname eq " C ") { #print "hit c\n"; $cx[$resnum]=$x; $cy[$resnum]=$y; $cz[$resnum]=$z; } if ($aname eq " O ") { #print "hit o\n"; $ox[$resnum]=$x; $oy[$resnum]=$y; $oz[$resnum]=$z; } } print "Number of residues = $finres \n"; print "Output = $out \n"; #do centre of mass translation $xtot=0; $ytot=0; $ztot=0; for ($i=1; $i<=($finres); $i++) { $xtot=$xtot+$cax[$i]; $ytot=$ytot+$cay[$i]; $ztot=$ztot+$caz[$i]; } $xvect=($xtot/$finres); $yvect=($ytot/$finres); $zvect=($ztot/$finres); #print "$xtot $ytot $ztot \n"; #print "$xvect $yvect $zvect \n"; for ($i=1; $i<=($finres); $i++) { $nx[$i]=$nx[$i]-$xvect; $hx[$i]=$hx[$i]-$xvect; $cax[$i]=$cax[$i]-$xvect; $cx[$i]=$cx[$i]-$xvect; $ox[$i]=$ox[$i]-$xvect; $ny[$i]=$ny[$i]-$yvect; $hy[$i]=$hy[$i]-$yvect; $cay[$i]=$cay[$i]-$yvect; $cy[$i]=$cy[$i]-$yvect; $oy[$i]=$oy[$i]-$yvect; $nz[$i]=$nz[$i]-$zvect; $hz[$i]=$hz[$i]-$zvect; $caz[$i]=$caz[$i]-$zvect; $cz[$i]=$cz[$i]-$zvect; $oz[$i]=$oz[$i]-$zvect; } open (OUTFILE, $out); # first residue print OUTFILE "$nx[1] $ny[1] $nz[1] 0 \n"; print OUTFILE "$cax[1] $cay[1] $caz[1] 3 \n"; print OUTFILE "$cx[1] $cy[1] $cz[1] 2 \n"; print OUTFILE "$ox[1] $oy[1] $oz[1] 1 \n"; print OUTFILE "$cx[1] $cy[1] $cz[1] 0 \n"; # body of sequence for ($i=2; $i<=($finres-1); $i++) { print OUTFILE "$nx[$i] $ny[$i] $nz[$i] 2 \n"; if ($h eq 1 ) { if ($res[$i] ne "PRO") { print OUTFILE "$hx[$i] $hy[$i] $hz[$i] 7 \n"; print OUTFILE "$nx[$i] $ny[$i] $nz[$i] 0 \n"; } } print OUTFILE "$cax[$i] $cay[$i] $caz[$i] 2 \n"; print OUTFILE "$cx[$i] $cy[$i] $cz[$i] 2 \n"; print OUTFILE "$ox[$i] $oy[$i] $oz[$i] 1 \n"; print OUTFILE "$cx[$i] $cy[$i] $cz[$i] 0 \n"; } # last residue print OUTFILE "$nx[$finres] $ny[$finres] $nz[$finres] 2 \n"; if ($h eq 1 ) { if ($res[$i] ne "PRO") { print OUTFILE "$hx[$finres] $hy[$finres] $hz[$finres] 7 \n"; print OUTFILE "$nx[$finres] $ny[$finres] $nz[$finres] 0 \n"; } } print OUTFILE "$cax[$finres] $cay[$finres] $caz[$finres] 2 \n"; print OUTFILE "$cx[$finres] $cy[$finres] $cz[$finres] 2 \n"; close (OUTFILE); print "\nDone\n"; --------------------------------------------------------- David Doak "I was bored before I even began" doak@bioch.ox.ac.uk ---------------------------------------------------------