示例#1
0
 def test_write_sequences_to_fasta_file(self):
     """Tests writing to a FASTA file"""
     seqs = st.read_sequences_from_fasta_file('testdata/fasta_test.fa')
     with open('/tmp/fasta_tmp.fa', 'w') as outputfile:
         st.write_sequences_to_fasta_file(outputfile, seqs)
     seqs2 = st.read_sequences_from_fasta_file('/tmp/fasta_tmp.fa')
     self.assertEquals(seqs, seqs2)
 def test_write_sequences_to_fasta_file(self):
     """Tests writing to a FASTA file"""
     seqs = st.read_sequences_from_fasta_file('testdata/fasta_test.fa')
     with open('/tmp/fasta_tmp.fa', 'w') as outputfile:
         st.write_sequences_to_fasta_file(outputfile, seqs)
     seqs2 = st.read_sequences_from_fasta_file('/tmp/fasta_tmp.fa')
     self.assertEquals(seqs, seqs2)
示例#3
0
 def test_write_sequences_to_fasta_file_empty_seqs(self):
     """Tests ensures that only non-empty sequences will be written to FASTA"""
     seqs = [['seq1', 'TATATA'], ['seq2', '']]
     with open('/tmp/fasta_tmp.fa', 'w') as outputfile:
         st.write_sequences_to_fasta_file(outputfile, seqs)
     seqs2 = st.read_sequences_from_fasta_file('/tmp/fasta_tmp.fa')
     self.assertEquals(1, len(seqs2))
     self.assertEquals(seqs[0][0], seqs2[0][0])
     self.assertEquals(seqs[0][1], seqs2[0][1])
 def test_write_sequences_to_fasta_file_empty_seqs(self):
     """Tests ensures that only non-empty sequences will be written to FASTA"""
     seqs = [['seq1', 'TATATA'], ['seq2', '']]
     with open('/tmp/fasta_tmp.fa', 'w') as outputfile:
         st.write_sequences_to_fasta_file(outputfile, seqs)
     seqs2 = st.read_sequences_from_fasta_file('/tmp/fasta_tmp.fa')
     self.assertEquals(1, len(seqs2))
     self.assertEquals(seqs[0][0], seqs2[0][0])
     self.assertEquals(seqs[0][1], seqs2[0][1])
示例#5
0
 def test_read_sequences_from_fasta_file(self):
     """test reading sequences from a string in FASTA format"""
     with open("testdata/fasta_test.fa") as inputfile:
         fasta_string = inputfile.read()
     seqs = st.read_sequences_from_fasta_file('testdata/fasta_test.fa')
     self.assertEquals(7, len(seqs))
     seq = ("CCGAGGAAGACAGACGCAATTTCACATCGAACTCGTGTACGGCATCCTCT" +
            "TTATTGCCGGCTTTGCTTTTCTCGTCTTCCGCGTCGATCCCCGGGTGGCA" +
            "GCGTTCGAAGGAGGTCTCGTCATTGGTTACTTATTGAGAATTTAGGGGAA" +
            "AATGTCAATCTACGAGTGGA")
     self.assertEquals('VNG6198H', seqs[6][0])
     self.assertEquals(seq, seqs[6][1])
 def test_read_sequences_from_fasta_file(self):
     """test reading sequences from a string in FASTA format"""
     with open("testdata/fasta_test.fa") as inputfile:
         fasta_string = inputfile.read()
     seqs = st.read_sequences_from_fasta_file('testdata/fasta_test.fa')
     self.assertEquals(7, len(seqs))
     seq = ("CCGAGGAAGACAGACGCAATTTCACATCGAACTCGTGTACGGCATCCTCT" +
            "TTATTGCCGGCTTTGCTTTTCTCGTCTTCCGCGTCGATCCCCGGGTGGCA" +
            "GCGTTCGAAGGAGGTCTCGTCATTGGTTACTTATTGAGAATTTAGGGGAA" +
            "AATGTCAATCTACGAGTGGA")
     self.assertEquals('VNG6198H', seqs[6][0])
     self.assertEquals(seq, seqs[6][1])
示例#7
0
def make_sequences( genome_fasta_file, gene_features_file,
        outfile='sequences.csv', distance={'upstream':300,'downstream':100}, from_end=False, fasta=False ):

    if from_end:
        distance = ( distance['upstream'], distance['downstream'] )
    else:
        '''WARNING: as of 2012-03-22, the st.extract functions used flipped distances!
           e.g. distance[1] is the UPSTREAM distance and distance[0] is the DOWNSTREAM
           CHECK YOUR SEQUENCES after running this! Also, a negative number is expected for
           DOWNSTREAM. So, (-100,300) must be passed to st.extract_upstream in order to get
           a sequence from 300 upstream to 100 downstream. WEIRD!'''
        distance = (-1*distance['downstream'],distance['upstream'])

    contig_sequences = st.read_sequences_from_fasta_file( genome_fasta_file )
    # convert contig_sequences to dictionary (this func returns a list of tuples)
    contig_dict = {}
    for name, seq in contig_sequences:
        contig_dict[name] = seq
    print 'loaded %i contigs' %len(contig_dict)
    print string.join( [ '%s: %ibp' %(a,len(b)) for a,b in contig_dict.items()] , ',' )

    features = st.read_features_from_file( gene_features_file )
    print 'loaded %i features' %len(features)
#    print str(features.values()[1])

    sequences = []
    for feature in features.values():
        location = feature.location()
#        print location, location.contig, distance, feature.id()
        if from_end:
            sequences.append( ( feature.id(), st.extract_downstream(contig_dict[location.contig], location, distance)[1] ) )
        else:
            sequences.append( ( feature.id(), st.extract_upstream(contig_dict[location.contig], location, distance)[1] ) )
#        print sequences[feature.id()]

    outf = open(outfile,'w')
    if fasta: st.write_sequences_to_fasta_file(outf,sequences)
    else:
        sep = ','
        for id, seq in sequences:
            outf.write( '%s%s%s\n' %(id,sep,seq) )
    outf.close()
示例#8
0
def make_sequences( genome_fasta_file, gene_features_file,
        outfile='sequences.csv', distance={'upstream':300,'downstream':100}, from_end=False, fasta=False ):

    if from_end:
        distance = ( distance['upstream'], distance['downstream'] )
    else:
        '''WARNING: as of 2012-03-22, the st.extract functions used flipped distances!
           e.g. distance[1] is the UPSTREAM distance and distance[0] is the DOWNSTREAM
           CHECK YOUR SEQUENCES after running this! Also, a negative number is expected for
           DOWNSTREAM. So, (-100,300) must be passed to st.extract_upstream in order to get
           a sequence from 300 upstream to 100 downstream. WEIRD!'''
        distance = (-1*distance['downstream'],distance['upstream'])

    contig_sequences = st.read_sequences_from_fasta_file( genome_fasta_file )
    # convert contig_sequences to dictionary (this func returns a list of tuples)
    contig_dict = {}
    for name, seq in contig_sequences:
        contig_dict[name] = seq
    print 'loaded %i contigs' %len(contig_dict)
    print string.join( [ '%s: %ibp' %(a,len(b)) for a,b in contig_dict.items()] , ',' )

    features = st.read_features_from_file( gene_features_file )
    print 'loaded %i features' %len(features)
#    print str(features.values()[1])

    sequences = []
    for feature in features.values():
        location = feature.location
#        print location, location.contig, distance, feature.id
        if from_end:
            sequences.append( ( feature.id, st.extract_downstream(contig_dict[location.contig], location, distance)[1] ) )
        else:
            sequences.append( ( feature.id, st.extract_upstream(contig_dict[location.contig], location, distance)[1] ) )
#        print sequences[feature.id]

    outf = open(outfile,'w')
    if fasta: st.write_sequences_to_fasta_file(outf,sequences)
    else:
        sep = ','
        for id, seq in sequences:
            outf.write( '%s%s%s\n' %(id,sep,seq) )
    outf.close()
示例#9
0
                    htmlfile.write('<h3>Interesting motifs (not highest-ranking) can also be:</h3>')
                    has_advice = False

                if len(buckets[i]) > 0:
                    pat_index = 0
                    seekpat = buckets[i][pat_index]
                    if seekpat.score > 0:
                        pat_index += 1
                        if seekpat.usable():
                            output_pattern(htmlfile, seekpat, seqs, reverse)
        htmlfile.write("</body></html>")


if __name__ == '__main__':
    print "Adviser Python"
    if len(sys.argv) <= 1:
        print "usage: python adviser.py <fasta-file> [S]"
    else:
        basename = sys.argv[1]
        reverse = False
        print "Processing '%s'" % basename
        seqs = st.read_sequences_from_fasta_file(basename)
        mixfile  = '%s.mix' % basename
        print "Running adviser on '%s'" % mixfile

        mix_entries = read_mixfile(mixfile)  # seems correct
        buckets = make_buckets(mix_entries)  # seems correct
        compute_pattern_relationships(buckets, reverse)  # seems correct
        search_patterns(buckets, seqs, reverse)  # fixed
        output_results(seqs, mix_entries, buckets, sys.argv[1], reverse)