def GetContig(prediction): """get contig sequence for prediction.""" global global_last_filename_genome global global_forward_sequences global global_reverse_sequences if "%s" in param_genome_file: filename_genome = param_genome_file % prediction.mSbjctToken else: filename_genome = param_genome_file if global_last_filename_genome != filename_genome: if param_loglevel >= 2: print "# reading genome %s" % filename_genome try: global_forward_sequences, global_reverse_sequences = Genomics.ReadGenomicSequences( open(filename_genome, "r")) except IOError: raise "# WARNING: genome %s not found" % filename_genome global_last_filename_genome = filename_genome if prediction.mSbjctStrand == "+": return (prediction.mSbjctToken, global_forward_sequences[prediction.mSbjctToken], False) else: return (prediction.mSbjctToken, global_reverse_sequences[prediction.mSbjctToken], True)
param_max_intron = int(a) elif o in ("-c", "--min-coverage-query"): param_min_coverage_query = float(a) elif o in ("-s", "--min-score"): param_min_total_score = float(a) if len(args) > 0: print USAGE, "no arguments required." sys.exit(2) print E.GetHeader() print E.GetParams() # read complete genomic sequence if param_filename_genome: forward_sequences, reverse_sequences = Genomics.ReadGenomicSequences( open(param_filename_genome, "r")) else: forward_sequences = None reverse_sequences = None # read peptide sequences if param_filename_peptides: peptide_sequences = Genomics.ReadPeptideSequences( open(param_filename_peptides, "r")) else: peptide_sequences = {} print HEADER if param_loglevel >= 2: print SHORT_HEADER_SUMMARY
if options.range_peptide: options.range_peptide = map(int, options.range_peptide.split(",")) wrapper = Exonerate(options=options.options, output_options=options.output_options) wrapper.mLogLevel = options.loglevel if options.loglevel >= 2: print "# reading peptide sequence." peptide_sequences = Genomics.ReadPeptideSequences( open(options.input_filename_peptide, "r")) if options.loglevel >= 2: print "# reading genome sequence." genome_sequences = Genomics.ReadGenomicSequences(open( options.input_filename_genome, "r"), do_reverse=0) if not options.id_peptides: options.id_peptides = peptide_sequences.keys() if not options.id_genomes: options.id_genomes = genome_sequences.keys() for x in options.id_peptides: ps = peptide_sequences[x] if options.range_peptide: ps = ps[options.range_peptide[0]:options.range_peptide[1]] for y in options.id_genomes: gs = genome_sequences[y] if options.range_genome: gs = gs[options.range_genome[0]:options.range_genome[1]]
def ReadTranscriptsAndCds(transcript_ids1, transcript_ids2): if param_loglevel >= 1: print "# reading %i left and %i right transcripts" % ( len(transcript_ids1), len(transcript_ids2)) sys.stdout.flush() if param_loglevel >= 1: print "# reading exon boundaries." sys.stdout.flush() cds1 = Exons.ReadExonBoundaries(open(param_filename_cds1, "r"), filter=transcript_ids1, reset=True) cds2 = Exons.ReadExonBoundaries(open(param_filename_cds2, "r"), filter=transcript_ids2, reset=True) if param_loglevel >= 1: print "# read %i left and %i right cds" % (len(cds1), len(cds2)) sys.stdout.flush() if param_loglevel >= 2: if len(cds1) != len(transcript_ids1): print "# missed in left: %s" % ":".join( set(transcript_ids1.keys()).difference(cds1.keys())) if len(cds2) != len(transcript_ids2): print "# missed in right: %s" % ":".join( set(transcript_ids2.keys()).difference(cds2.keys())) if param_loglevel >= 1: print "# reading genomic sequences." sys.stdout.flush() transcripts1 = {} if param_filename_transcripts1: if param_mode_genome1 == "indexed": transcripts1 = Genomics.ParseFasta2HashFromIndex( param_filename_transcripts1, filter=transcript_ids1) else: transcripts1 = Genomics.ReadGenomicSequences( open(param_filename_transcripts1, "r"), do_reverse=0, filter=transcript_ids1, mask=param_mask) transcripts2 = {} if param_filename_transcripts2: if param_mode_genome2 == "indexed": transcripts2 = Genomics.ParseFasta2HashFromIndex( param_filename_transcripts2, filter=transcript_ids2) else: transcripts2 = Genomics.ReadGenomicSequences( open(param_filename_transcripts2, "r"), do_reverse=0, filter=transcript_ids2, mask=param_mask) if param_loglevel >= 1: print "# read %i left and %i right transcript sequences" % ( len(transcripts1), len(transcripts2)) sys.stdout.flush() return transcripts1, transcripts2, cds1, cds2
def BuildLines(dbhandle, statement, genome_lengths, prefix="", default_color=None): c = dbhandle.cursor() c.execute(statement) if param_loglevel >= 2: print "# received %i results." % c.rowcount sbjct_token = "" sbjct_strand = None sbjct_from = 10000000000000000 sbjct_to = 0 lines = [] nmatches = 0 for line in c.fetchall(): entry = PredictionParser.PredictionParserEntry() entry.FillFromTable(line) if not genome_lengths.has_key(entry.mSbjctToken): filename_genome = param_genome_file % entry.mSbjctToken forward_sequences, reverse_sequences = Genomics.ReadGenomicSequences( open(filename_genome, "r")) genome_lengths[entry.mSbjctToken] = (len( forward_sequences[entry.mSbjctToken]), 0) lgenome, offset = genome_lengths[entry.mSbjctToken] if param_loglevel >= 4: print "# lgenome=%i, offset=%i" % (lgenome, offset) # get cds information exons = [] if param_tablename_exons: cc = dbhandle.cursor() statement = """SELECT exon_from, exon_to, exon_frame, genome_exon_from, genome_exon_to FROM %s WHERE prediction_id = %i""" % ( param_tablename_exons, entry.mPredictionId, ) if param_restrict_good_exons: statement += " AND is_ok = TRUE" try: cc.execute(statement) result = cc.fetchall() except pgdb.DatabaseError, msg: print "# query failed with message", msg result = [] exons = result cc.close() if not exons: if entry.mMapPeptide2Genome: exons = Genomics.Alignment2ExonBoundaries( entry.mMapPeptide2Genome, query_from=entry.mQueryFrom - 1, sbjct_from=entry.mSbjctGenomeFrom, add_stop_codon=1) else: exons = [("", "", 0, entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo)] # select gene id if param_tablename_genes: cc = dbhandle.cursor() statement = """SELECT gene_id FROM %s WHERE prediction_id = %i""" % (param_tablename_genes, entry.mPredictionId) try: cc.execute(statement) result = cc.fetchone() except pgdb.DatabaseError, msg: print "# query failed with message", msg result = None gene_id = result[0] dbhandle.commit() cc.close()