文件: main.py 项目: kpj/DictyPy
def apply_procedure(Classifier):
    farser = FastaParser(Classifier.data_file)
    genes = farser.parse()

    group_genes(Classifier, genes, 'grouped_genes.json')

def main():
    """ Read and extract data
    farser = FastaParser(sys.argv[1])
    genes = farser.parse()

def main():
    """ Read and extract data
    farser = FastaParser(sys.argv[1])
    genes = farser.parse()

    exprs = extract_expression_levels('data/gene_expression.csv')
    groups = group_expression_levels(genes, exprs)
def main():
    """ Generate overview
    farser = FastaParser(sys.argv[1])
    genes = farser.parse()

    dnana = DNAAnalyzer(strict=False)
    codu = dnana.get_avg_codon_usage(genes)

    with open('results/plain_codon_usage_table.txt', 'w') as fd:
        output_data(codu, fd)
def main():
    """ Generate overview
    farser = FastaParser(sys.argv[1])
    genes = farser.parse()

    codu, label = get_codu(genes, 'strong')
    rarest = get_rare_codons(codu)
    pos = get_codon_positions(rarest.values(), genes)

    plot_positions(pos.values(), label)
def main():
    """ Generate overview
    farser = FastaParser(sys.argv[1])
    genes = farser.parse()

    exprs = extract_expression_levels(sys.argv[2]) if len(
        sys.argv) == 3 else None
    if exprs is None:
        groups = {'all': genes}
        groups = group_expression_levels(genes, exprs)

    group_codu = get_codu_per_group(groups)
    with open('results/codon_usage_table.txt', 'w') as fd:
        output_data(group_codu, fd)
def main():
    opt = options()
    fp = FastaParser(opt.file_path)
    sequence_list = fp.get_sequence_list()
    print "#" * 10
    print "#" * 10
    ss = datetime.now()
    print "Starting..."
    sa = SequenceAligner(sequence_list)
    results = sa.get_aligned_sequence()
    print "Process complete. Results on the way...."
    storage_path = make_storage_path(opt.storage_path)
    results_path = write_results_to_file(
        results, opt.results_name, storage_path)
    elapsed = datetime.now() - ss
    print "Job completed after {} min".format(elapsed.total_seconds()/60)
    print "Sequence Alignment results stored --> {}".format(results_path)
    print "#" * 10
    print "#" * 10
import os

# Step 2: One should be able to make new instances from this class
# In fact, one should be able to make as many new objects as one wants
# Here we will just make two in this example.
# The class initialization should take one argument: the path of the fasta
# file to parse.
# Use the included fasta example files, or it won't work.
all_contigs_file_path = os.getenv(
) + "/Desktop/Python_Course/python_ebc_2016/day_05/exercise/all_contigs.fasta"
predicted_genes_file_path = os.getenv(
) + "/Desktop/Python_Course/python_ebc_2016/day_05/exercise/predicted_genes.fasta"

contigs = FastaParser(all_contigs_file_path)
genes = FastaParser(predicted_genes_file_path)

# print(genes.__getitem__())

# Step 3: What if we give a path, but there is no file there ?
# Then your class should complain ! It must throw an exception
# of type IOError. To check this, we will use a function from
# the pytest module. It's like assert but for Exceptions.
# If you don't have pytest just install it with "pip install --user pytest"
import pytest
with pytest.raises(IOError):
    not_found = FastaParser('/file_does_not_exist.fasta')

# Step 4: What if we don't give a file path at all when making
# a new instance ? Then your class should complain !
Once the exercise is completed, this file should be uploaded to your github repository called "python_homework" in a directory called "day5".

    """ Try to match given pattern in all fasta entries specified in given file
    def match(record):
        ret = []
        seq = str(record.seq)

        for pat in patterns:
            match = re.search(pat, seq)
            if not match: continue
            ret.append((record, match))

        return ret

    # parse genome data
    farser = FastaParser(data_file)
    genes = farser.parse()

    print('Matching \n > "%s"' % '"\n > "'.join(patterns))

    pbar = ProgressBar(maxval=len(genes))

    # generate result
    res = []
    for i, record in enumerate(genes):
        foo = match(record)
        if foo: res.extend(foo)


    # sort result in natural order
    def natural_keys(text):
        def atoi(text):
            return int(text) if text.isdigit() else text

        return [atoi(c) for c in re.split('(\d+)', text)]

    res = sorted(res,
                 key=lambda e: natural_keys(get_position(e[0].description)))

    # save result
    with open('results/regex_lookup.fa',
              'w') as fd, open('results/regex_lookup_fragments.fa',
                               'w') as fd_frag:
        for record, match in res:
            seqs = match.groups()

            name = get_gene_name(record.description)
            pos = get_position(record.description)

            for seq in seqs:
                # save full match
                rec = SeqRecord(Seq(seq, IUPAC.ambiguous_dna),
                                description=record.description + '|' +
                SeqIO.write(rec, fd, 'fasta')

                # save match fragments
                for i, s in enumerate(get_subsequences(seq)):
                    rec_frag = SeqRecord(Seq(s, IUPAC.ambiguous_dna),
                                         description=record.description + '|' +
                                         str(match.span()) + '|' +
                                         ('fragment #%d' % i))

                    SeqIO.write(rec_frag, fd_frag, 'fasta')
文件: gene_stats.py 项目: kpj/DictyPy
def main():
    farser = FastaParser('dicty_primary_cds')
    genes = farser.parse()

Written by Lucas Sinclair.

