Пример #1
0
def pdb_mapgaps(mapgaps_profile, pdb_fname):
    """Align a PDB structure to a MAPGAPS profile.

    Returns a tuple: (SeqRecord, list of aligned residue numbers)
    """
    from biocma import cma

    with read_pdb_seq(pdb_fname) as (seqfname, seqs):
        call_quiet('run_gaps', mapgaps_profile, seqfname)
    pdb_cma = cma.read(seqfname + '_aln.cma')
    hits = {}
    head_lengths = {}
    for seq in pdb_cma['sequences']:
        hits[seq['id']] = seq['seq']
        head_lengths[seq['id']] = seq['head_len']
    ref_id, ref_aln = choose_best_aligned(hits)
    ref_record = SeqIO.to_dict(seqs)[ref_id]
    offset = (ref_record.annotations['start'] + head_lengths[ref_id] - 1)
    resnums, inserts = aln_resnums_inserts(ref_record, ref_aln, offset)
    return ref_record, resnums, inserts
Пример #2
0
 def test_get_inserts(self):
     block = cma.read(EX_CMA)
     inserts = utils.get_inserts(block)
     self.assertEqual(len(inserts), len(block['sequences']))
     fullseqs = SeqIO.to_dict(SeqIO.parse(EX_FASTA, 'fasta'))
     for sequence, targets in (
         (block['sequences'][1], ['n', 't', 'fyklyllkkydsntlfnv']),
         (block['sequences'][-1], ['altkl', 'nkl',
                                   'siptvgfskdgdrlqemykasvcsyteecqg',
                                   'ndndgeylldge', 'eh', 'p',
                                   'epecancneedknmsennhkkdskhkgdsnhksdsnhksdsnhksdsnhksgsnhksdcnhksgsnhksdsnhqsdcnhmsdhnhksdnnhksdsshksdsshksdsshksgsnhksdnnhksdsshksgsnhksdhnhksdsnhksdsnhknesnhknesnhknesnhknesnhknesnhkndsnhksdsnhmsdhnhksdnnhksdhnhmsdhnhksdnnhksdnnhmsdhnhksdnnhksdnnhksdnnhksdhnhmsdhnhksdnnhksdhnhksdsnhmsdhnhmsdhnhksdhnhksdhnhksdnnhksdsnhksdsnhksdhnhksdsnhmsdhnhmsdhnhksdhnhksdnnhksdsnhksdsnhksdhnhksdsnhmsdhnhmsdhnhmsdhnhksdhnhksdnnhksdsnhksdsnhksdsnhksdhnhksdhkhmsdnnhksdnnhksdhnhksdnnhksdhnhksdsnhksdsnhksdsnhksdsnhksdnnhksdhnhnsdsnhmsdhnhksdhnhksdhnhksdnnhksdnnhksdhnhksdhkknnnnnkdnknddnddsdasdavhediellesysdlnkfnemlteqln',
                                   'vt', 'edtrv', 'pmythnl', 'g',
                                   'sfqscqpcv', 'iirehiklkidnpfehlstitdqee',
                                   'yfd', 'ra', 'fqlak'])):
         full = fullseqs[sequence['id']]
         ins_ranges = [str(full.seq)[start-1:end]
                    for start, end in inserts[sequence['id']]]
         print sequence['id'], ins_ranges
         self.assertEqual(len(ins_ranges), len(targets))
         for ins, tgt in zip(ins_ranges, targets):
             self.assertEqual(ins.lower(), tgt)
Пример #3
0
def pdb_mapgaps(mapgaps_profile, pdb_fname):
    """Align a PDB structure to a MAPGAPS profile.

    Returns a tuple: (SeqRecord, list of aligned residue numbers)
    """
    from biocma import cma

    with read_pdb_seq(pdb_fname) as (seqfname, seqs):
        subprocess.check_call(['run_gaps', mapgaps_profile, seqfname])
    pdb_cma = cma.read(seqfname + '_aln.cma')
    hits = {}
    head_lengths = {}
    for seq in pdb_cma['sequences']:
        hits[seq['id']] = seq['seq']
        head_lengths[seq['id']] = seq['head_len']
    ref_id, ref_aln = choose_best_aligned(hits)
    ref_record = SeqIO.to_dict(seqs)[ref_id]
    offset = (ref_record.annotations['start']
              + head_lengths[ref_id]
              - 1)
    resnums, inserts = aln_resnums_inserts(ref_record, ref_aln, offset)
    return ref_record, resnums, inserts
Пример #4
0
 def test_read(self):
     block = cma.read(EX_CMA)
     self.assertEqual(len(block['sequences']), 24)
     self.assertEqual(block['query_length'], block['sequences'][0]['length'])
Пример #5
0
                graph.add((ruri, MSA.deleted_aln_pos, Literal(i)))
            else:
                graph.add((ruri, RDF.type, MSA.aligned_residue))
                graph.add((ruri, MSA.aln_pos, Literal(i)))
                if unquote(acc) in dedup_eqv[i]:
                    graph.add((ruri, MSA.native_pos, Literal(dedup_eqv[i][unquote(acc)])))
                else:
                    print "shouldn't happen" #deletions taken care of above
                graph.add((ruri, MSA.native_residue, Literal(r)))
        bar.next()
    bar.finish()	
    graph.serialize(destination=outfile, format='pretty-xml')
    return

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description = 'Populate the Multiple Sequence Alignment Ontology')
    parser.add_argument('infile', metavar='infile', type=str, help='input aligned sequences')
    parser.add_argument('name', metavar='name', type=str, help='Graph name')
    parser.add_argument('--namespace', metavar='namespace', type=str, default='http://localhost/msaont#', help='Graph namespace')
    parser.add_argument('-p', dest='prop', action='store', default=0.25, help='proportion of inserts allowed in an aligned residue')
    parser.add_argument('-o', dest='outfile', action='store', default='out.rdf', help='outfile')
    args = parser.parse_args()
    ext = args.infile.split('.')[-1]
    if ext != 'cma':
        seqs = manipulate_fasta(args.infile, args.prop)
    else:
        seqs = cma.read(args.infile)
    populate(seqs, args.name, args.outfile, args.namespace)