Пример #1
0
 def _fn(x):
     out = {}
     if x[0].strip() not in ['', '\\N']:
         out['symbol'] = x[0].strip()
     if x[1].strip() not in ['', '\\N']:
         _name = SubStr(x[1].strip(), '', ' [Source:').strip()
         if _name:
             out['name'] = _name
     return out
Пример #2
0
 def _fn(x):
     import logging
     out = {'taxid': int(x[0])}
     if x[1].strip() not in ['', '\\N']:
         out['symbol'] = x[1].strip()
     if x[2].strip() not in ['', '\\N']:
         _name = SubStr(x[2].strip(), '', ' [Source:').strip()
         if _name:
             out['name'] = _name
     return out
Пример #3
0
 def get_geneid(self, rec):
     '''Return geneid as integer, None if not found.'''
     geneid = None
     gene_feature = [x for x in rec.features if x.type == 'gene']
     # NCBI has now fixed this issue (https://twitter.com/kdpru/status/474673626730741761)
     # if len(gene_feature) == 0 and rec.id == 'NR_001526.1':
     #     print "Known error for NR_001526.1. Fixed."
     #     return '252949'         # a temp fix for this wrong rec from NCBI
     assert len(gene_feature) == 1, '#: {}, id: {}'.format(len(gene_feature), rec.id)
     gene_feature = gene_feature[0]
     db_xref = gene_feature.qualifiers.get('db_xref', None)
     if db_xref:
         x = [x for x in db_xref if x.startswith('GeneID:')]
         if len(x) == 1:
             geneid = int(SubStr(x[0], 'GeneID:'))
     return geneid
Пример #4
0
 def get_summary(self, rec):
     '''Return summary string if available, return '' otherwise.'''
     summary = ''
     comment = rec.annotations.get('comment', None)
     if comment:
         if comment.find('Summary:') != -1:
             summary = SubStr(comment, 'Summary: ',).replace('\n', ' ')
             for end_str in [# '[provided by RefSeq].',
                             #'[provided by ',
                             #'[supplied by ',
                             '##',
                             # '[RGD',
                             'COMPLETENESS:',
                             'Sequence Note:',
                             'Transcript Variant:',
                             'CCDS Note:',
                             'Publication Note:',
                             ' '*10]:
                 if summary.find(end_str) != -1:
                     summary = SubStr(summary, end_string=end_str)
             summary = summary.strip()
     return summary