def type_sequences( input, grouping=GROUPING, exon_fofn=None, genomic_reference=None, cDNA_reference=None, loci=None): """ Pick the top Amplicon Analysis consensus seqs from a Fasta by Nreads """ log_file = get_log_file( input ) initialize_logger( log, log_file=log_file ) # First, get any references not specified by the user grouping = grouping or GROUPING exon_fofn = exon_fofn or get_exon_reference() genomic_reference = genomic_reference or get_genomic_reference() cDNA_reference = cDNA_reference or get_cDNA_reference() # Second, get the input file if a directory was specified sequence_file = get_input_file( input ) # Finally, run the Typing procedure renamed_file = rename_sequences( sequence_file ) raw_alignment = full_align_best_reference( renamed_file, genomic_reference ) reoriented = orient_sequences( renamed_file, alignment_file=raw_alignment ) selected = extract_alleles( reoriented, alignment_file=raw_alignment, method=grouping, loci=loci) gDNA_alignment = full_align_best_reference( selected, genomic_reference ) cDNA_file = extract_cDNA( selected, exon_fofn, alignment_file=gDNA_alignment ) cDNA_alignment = align_by_identity( cDNA_file, cDNA_reference ) typing = summarize_typing( gDNA_alignment, cDNA_alignment ) return typing
def __init__( self ): # Parse the options parse_args() self._config = ConfigParser.SafeConfigParser() self._config.read( args.config_file ) # Initialize output folder and sub-folders self.subfolders = _initialize_folders( args.output ) # Initialize logging log_file = os.path.join( args.output, "HLA_Pipeline.log" ) initialize_logger( log, log_file=log_file )
def __init__(self): # Parse the options parse_args() self._config = ConfigParser.SafeConfigParser() self._config.read(args.config_file) # Initialize output folder and sub-folders self.subfolders = _initialize_folders(args.output) # Initialize logging log_file = os.path.join(args.output, "HLA_Pipeline.log") initialize_logger(log, log_file=log_file)
def _align_multiple_sequences(sequences, output=None): """ Align """ temp_in = _write_temp_fasta( sequences ) temp_out = NamedTemporaryFile(suffix=".afa", delete=False) multi_sequence_alignment( temp_in, temp_out.name ) os.unlink( temp_in ) return temp_out.name def sorted_sequences( sequence_file ): """ Yield successive reads in descending order of NumReads """ seqs = list(AmpliconAnalysisReader( sequence_file )) for seq in sorted(seqs, key=lambda s: s.num_reads, reverse=True): yield seq if __name__ == "__main__": import sys from pbhla.log import initialize_logger query_file = sys.argv[1] debug_flag = bool(sys.argv[2]) if len(sys.argv) > 2 else None initialize_logger( log_level='INFO' ) cd = ChimeraDetector(debug=debug_flag) cd.run( query_file )