示例#1
0
 def export_active_sequences(self):
     """
     Export RNA sequences with active cross-references.
     """
     try:
         previous_upi = ''
         iupac_chars = re.compile('^[ABCDGHKMNRSTVWXYU]+$', re.IGNORECASE)
         with cursor() as cur:
             cur.execute(self.get_active_sequences_sql())
             for counter, result in enumerate(cur):
                 if self.test and counter >= self.test_entries:
                     return
                 if result['upi'] == previous_upi:
                     continue
                 else:
                     previous_upi = result['upi']
                 rna = Rna(upi=result['upi'],
                           seq_short=result['seq_short'],
                           seq_long=result['seq_long'])
                 fasta = rna.get_sequence_fasta()
                 self.filehandles['seq_active'].write(fasta)
                 if counter < self.examples:
                     self.filehandles['seq_example'].write(fasta)
                 if iupac_chars.match(rna.get_sequence()):
                     self.filehandles['nhmmer_db'].write(fasta)
                 else:
                     self.filehandles['nhmmer_db_excluded'].write(fasta)
                 # species specific identifiers
                 sequence = re.sub(r'^>.+?\n', '',
                                   fasta)  # delete first line
                 template = ">{upi}_{taxid} {description}\n{sequence}"
                 queryset = rna.xrefs.filter(deleted='N')
                 for taxid in set(queryset.values_list('taxid', flat=True)):
                     description = rna.get_description(taxid=taxid)
                     species_specific_fasta = template.format(
                         upi=result['upi'],
                         taxid=taxid,
                         sequence=sequence,
                         description=description)
                     self.filehandles['species_specific'].write(
                         species_specific_fasta)
     except psycopg2.Error as exc:
         self.log_database_error(exc)
         sys.exit(1)
示例#2
0
 def export_active_sequences(self):
     """
     Export RNA sequences with active cross-references.
     """
     try:
         previous_upi = ''
         iupac_chars = re.compile('^[ABCDGHKMNRSTVWXYU]+$', re.IGNORECASE)
         with cursor() as cur:
             cur.execute(self.get_active_sequences_sql())
             for counter, result in enumerate(cur):
                 if self.test and counter >= self.test_entries:
                     return
                 if result['upi'] == previous_upi:
                     continue
                 else:
                     previous_upi = result['upi']
                 rna = Rna(upi=result['upi'],
                           seq_short=result['seq_short'],
                           seq_long=result['seq_long'])
                 fasta = rna.get_sequence_fasta()
                 self.filehandles['seq_active'].write(fasta)
                 if counter < self.examples:
                     self.filehandles['seq_example'].write(fasta)
                 if iupac_chars.match(rna.get_sequence()):
                     self.filehandles['nhmmer_db'].write(fasta)
                 else:
                     self.filehandles['nhmmer_db_excluded'].write(fasta)
                 # species specific identifiers
                 sequence = re.sub(r'^>.+?\n', '', fasta) # delete first line
                 template = ">{upi}_{taxid} {description}\n{sequence}"
                 queryset = rna.xrefs.filter(deleted='N')
                 for taxid in set(queryset.values_list('taxid', flat=True)):
                     description = rna.get_description(taxid=taxid)
                     species_specific_fasta = template.format(upi=result['upi'],
                                                              taxid=taxid,
                                                              sequence=sequence,
                                                              description=description)
                     self.filehandles['species_specific'].write(species_specific_fasta)
     except psycopg2.Error as exc:
         self.log_database_error(exc)
         sys.exit(1)
示例#3
0
        def process_inactive_sequences():
            """
            Create inactive.fasta file.
            """
            counter = 0
            previous_upi = ''

            for row in self.cursor:
                if self.test and counter > self.test_entries:
                    return
                result = self.row_to_dict(row)
                if result['upi'] == previous_upi:
                    continue
                else:
                    previous_upi = result['upi']
                rna = Rna(upi=result['upi'],
                          seq_short=result['seq_short'],
                          seq_long=read_lob(result['seq_long']))
                fasta = rna.get_sequence_fasta()
                self.filehandles['seq_inactive'].write(fasta)
                counter += 1
示例#4
0
        def process_inactive_sequences():
            """
            Create inactive.fasta file.
            """
            counter = 0
            previous_upi = ''

            for row in self.cursor:
                if self.test and counter > self.test_entries:
                    return
                result = self.row_to_dict(row)
                if result['upi'] == previous_upi:
                    continue
                else:
                    previous_upi = result['upi']
                rna = Rna(upi=result['upi'],
                          seq_short=result['seq_short'],
                          seq_long=read_lob(result['seq_long']))
                fasta = rna.get_sequence_fasta()
                self.filehandles['seq_inactive'].write(fasta)
                counter += 1
示例#5
0
 def export_inactive_sequences(self):
     """
     Export RNA sequences without active cross-references.
     """
     try:
         previous_upi = ''
         with cursor() as cur:
             cur.execute(self.get_inactive_sequences_sql())
             for counter, result in enumerate(cur):
                 if self.test and counter > self.test_entries:
                     return
                 if result['upi'] == previous_upi:
                     continue
                 else:
                     previous_upi = result['upi']
                 rna = Rna(upi=result['upi'],
                           seq_short=result['seq_short'],
                           seq_long=result['seq_long'])
                 fasta = rna.get_sequence_fasta()
                 self.filehandles['seq_inactive'].write(fasta)
     except psycopg2.Error as exc:
         self.log_database_error(exc)
         sys.exit(1)
示例#6
0
 def export_inactive_sequences(self):
     """
     Export RNA sequences without active cross-references.
     """
     try:
         previous_upi = ''
         with cursor() as cur:
             cur.execute(self.get_inactive_sequences_sql())
             for counter, result in enumerate(cur):
                 if self.test and counter > self.test_entries:
                     return
                 if result['upi'] == previous_upi:
                     continue
                 else:
                     previous_upi = result['upi']
                 rna = Rna(upi=result['upi'],
                           seq_short=result['seq_short'],
                           seq_long=result['seq_long'])
                 fasta = rna.get_sequence_fasta()
                 self.filehandles['seq_inactive'].write(fasta)
     except psycopg2.Error as exc:
         self.log_database_error(exc)
         sys.exit(1)
示例#7
0
        def process_active_sequences():
            """
            Create the active.fasta file and the example.fasta file.
            """
            counter = 0
            previous_upi = ''
            valid_chars = re.compile('^[ABCDGHKMNRSTVWXYU]+$', re.IGNORECASE) # IUPAC

            for row in self.cursor:
                if self.test and counter >= self.test_entries:
                    return
                result = self.row_to_dict(row)
                if result['upi'] == previous_upi:
                    continue
                else:
                    previous_upi = result['upi']
                rna = Rna(upi=result['upi'],
                          seq_short=result['seq_short'],
                          seq_long=read_lob(result['seq_long']))
                fasta = rna.get_sequence_fasta()
                self.filehandles['seq_active'].write(fasta)
                if counter < self.examples:
                    self.filehandles['seq_example'].write(fasta)
                if valid_chars.match(rna.get_sequence()):
                    self.filehandles['nhmmer_db'].write(fasta)
                else:
                    self.filehandles['nhmmer_db_excluded'].write(fasta)
                # species specific identifiers
                sequence = re.sub(r'^>.+?\n', '', fasta) # delete first line
                template = ">{upi}_{taxid} {description}\n{sequence}"
                queryset = rna.xrefs.filter(deleted='N')
                for taxid in set(queryset.values_list('taxid', flat=True)):
                    species_specific_fasta = template.format(upi=result['upi'],
                        taxid=taxid, sequence=sequence, description=rna.get_description(taxid=taxid))
                    self.filehandles['species_specific'].write(species_specific_fasta)
                counter += 1
 def get_description():
     """
     Get species-specific entry description.
     """
     rna = Rna(upi=row['upi'])
     return rna.get_description(taxid=row['taxid'])