There done! My first actual program that does something useful around the lab.
It's ugly, but it works...
import Tkinter
import tkFileDialog
import re
root = Tkinter.Tk()
root.withdraw()
openfile = tkFileDialog.askopenfilename(parent=root,title='Open file')
fastafile = open(openfile)
line = fastafile.readline(0)
accessions = ''
sequences = ''
for line in fastafile:
if line.startswith('>'):
stripped1 = line.strip('\n')
accessions = accessions + stripped1
sequences = sequences + '\n'
else:
stripped2 = line.strip('\n')
sequences = sequences + stripped2
accessions2 = accessions
accessions3 = accessions.replace(' %s>', ' REVERSED >')
accessions3 = re.sub('( .*?>)', ' REVERSED >', accessions)
accessionlist = accessions2.split('>')
sequenceslist = sequences.split('\n')
del sequenceslist[0]
del accessionlist[0]
reversedsequences = sequences[::-1]
reversedaccessions = accessions3.replace('>', '>REV_')
reversedsequenceslist = reversedsequences.split('\n')
reversedaccessionlist = reversedaccessions.split('>')
reversedaccessionlist.reverse()
del reversedaccessionlist[-1]
del reversedsequenceslist[-1]
output = open('newdatabase.fasta', 'w')
for x,y in zip(accessionlist, sequenceslist):
print >>output, '>' + '%s\n%s' %(x, y)
for x,y in zip(reversedaccessionlist, reversedsequenceslist):
print >>output, '>' + '%s\n%s'% (x, y)
output.close() 