def set_FIMO_thresh(self, FIMO_thresh): try: float_thresh = float(FIMO_thresh) if float_thresh >= 0 and float_thresh <= 1: self.FIMO_thresh = FIMO_thresh else: message = ( "Error: the following value for FIMO threshold <{FIMO_thresh}> " "is not correct").format(FIMO_thresh=FIMO_thresh) raise exceptions.WrongArgumentError(message) except: message = ( "Error: the following value for FIMO threshold <{FIMO_thresh}> " "is not correct").format(FIMO_thresh=FIMO_thresh) raise exceptions.WrongArgumentError(message)
def set_num_threads(self, num_threads): """Sets number of threads according to user input""" try: int_min = int(num_threads) if int_min > 0: self.num_threads = num_threads else: message = ( "Error: the following value for number of threads <{num_threads}> " "is not correct").format(num_threads=num_threads) raise exceptions.WrongArgumentError(message) except: message = ( "Error: the following value for number of threads <{num_threads}> " "is not correct").format(num_threads=num_threads) raise exceptions.WrongArgumentError(message)
def set_RNAseq_min_length(self, RNAseq_min_length): """Sets minimum length for RNAseq sequences according to user input""" try: int_min = int(RNAseq_min_length) if int_min > 0: self.RNAseq_min_length = RNAseq_min_length else: message = ( "Error: the following value for minimum RNAseq length <{min_len}> " "is not correct").format(min_len=RNAseq_min_length) raise exceptions.WrongArgumentError(message) except: message = ( "Error: the following value for minimum RNAseq length <{min_len}> " "is not correct").format(min_len=RNAseq_min_length) raise exceptions.WrongArgumentError(message)
def set_AME_scoring(self, AME_scoring): if AME_scoring in ["max", "avg"]: self.AME_scoring = AME_scoring else: message = ( "Error: the following AME scoring method <{AME_scoring}> " "is not correct").format(AME_scoring=AME_scoring) raise exceptions.WrongArgumentError(message)
def set_rand_sv_ratio(self, rand_sv_ratio): try: self.rand_sv_ratio = int(rand_sv_ratio) except: message = ( "Error: the following random to SV sequences ratio <{AME_scoring}> " "is not correct").format(rand_sv_ratio=rand_sv_ratio) raise exceptions.WrongArgumentError(message)
def set_SV_types(self, SV_types_string): for SV_type in SV_types_string.split(","): if SV_type not in ["inv", "dup", "tra", "del"]: message = ( "Error: the following SV type <{SV_type}> is incorrect." ).format(SV_type=SV_type) raise exceptions.WrongArgumentError(message) self.SV_types = SV_types_string.split(",")
def set_sample_attr(self, sample_attr_string): dict_attr = {} for pair in sample_attr_string.split(","): if ":" not in pair: message = "Error: please use the correct format for attribute listing" raise exceptions.WrongArgumentError(message) if pair.split(":")[0] in dict_attr.keys(): dict_attr[pair.split(":")[0]].append(pair.split(":")[1]) else: dict_attr[pair.split(":")[0]] = [pair.split(":")[1]] self.sample_attr = dict_attr
def set_population(self, population): """Sets population origin of sample according to user input""" if population in [ "asian_pacific_islander", "black", "caucasian", "hispanic", "native_american" ]: self.population = population else: message = ("Error: the following population <{population}> " "is not correct").format(population=population) raise exceptions.WrongArgumentError(message)
def main(): """ The main function: - parses through all the command line arguments - creates the hla_pipeline class - verifies there are no missing arguments -runs the program and analysis """ try: opts, args = getopt.getopt( sys.argv[1:], "g:e:r:G:l:p:o:t:H:s:K:a:P:L:c:h", [ "input_WGS=", "input_WES=", "input_RNAseq=", "genome_dir=", "RNAseq_min_length=", "population=", "output_dir=", "num_threads=", "dir_HLA_HD=", "dir_seq2HLA=", "dir_Kourami=", "dir_arcasHLA", "dir_picard=", "dir_HLA_LA=", "correct_HLA", "help" ]) except getopt.GetoptError as e: print(e) sys.exit(2) if len(args) > 0: message = "Error: non-paired arguments are not allowed." raise exceptions.WrongArgumentError(message) HLA_pipeline = pipeline.HLAPipeline() for opt, arg in opts: if opt in ("-h", "--help"): description() sys.exit() elif opt in ("-g", "--input_WGS"): HLA_pipeline.set_path_WGS(arg) elif opt in ("-e", "--input_WES"): HLA_pipeline.set_path_WES(arg) elif opt in ("-r", "--input_RNAseq"): HLA_pipeline.set_path_RNAseq(arg) elif opt in ("-G", "--dir_genome"): HLA_pipeline.set_dir_genome(arg) elif opt in ("-l", "--RNAseq_min_length"): HLA_pipeline.set_RNAseq_min_length(arg) elif opt in ("-p", "--population"): HLA_pipeline.set_population(arg) elif opt in ("-o", "--output_dir"): HLA_pipeline.set_output_dir(arg) elif opt in ("-t", "--num_threads"): HLA_pipeline.set_num_threads(arg) elif opt in ("-H", "--dir_HLA_HD"): HLA_pipeline.set_dir_HLA_HD(arg) elif opt in ("-s", "--dir_seq2HLA"): HLA_pipeline.set_dir_seq2HLA(arg) elif opt in ("-K", "--dir_Kourami"): HLA_pipeline.set_dir_Kourami(arg) elif opt in ("-a", "--dir_arcasHLA"): HLA_pipeline.set_dir_arcasHLA(arg) elif opt in ("-P", "--dir_picard"): HLA_pipeline.set_dir_picard(arg) elif opt in ("-L", "--dir_HLA_LA"): HLA_pipeline.set_dir_HLA_LA(arg) elif opt in ("-c", "--correct_HLA"): HLA_pipeline.set_path_correct_HLA(arg) else: message = "Error: {opt} is not a valid option".format(opt=opt) raise exceptions.WrongArgumentError(message) for pipeline_attr in [ "input_WGS", "input_WES", "input_RNAseq", "RNAseq_min_length", "population", "output_dir", "dir_genome", "dir_HLA_HD", "dir_arcasHLA", "dir_seq2HLA", "dir_Kourami", "dir_picard", "dir_HLA_LA" ]: if not hasattr(HLA_pipeline, pipeline_attr): message = ("Error: you must indicate --{attr}.").format( attr=pipeline_attr) raise exceptions.MissingArgumentError(message) for path in (HLA_pipeline.path_WGS, HLA_pipeline.path_WES): runprogram.Kourami(HLA_pipeline, path) runprogram.HLA_LA(HLA_pipeline, path) runprogram.seq2HLA(HLA_pipeline) runprogram.HLA_HD(HLA_pipeline) runprogram.arcasHLA(HLA_pipeline) analysis.extract_results(HLA_pipeline) analysis.calculate_accuracy(HLA_pipeline)
def main(): '''Reads input from terminal and coordinates pipeline''' try: opts, args = getopt.getopt( sys.argv[1:], "i:o:f:l:e:m:a:t:s:r:F:A:c:p:h", [ "input_dir=", "output_dir=", "genome_fasta=", "genome_len=", "genome_include=", "motif_path=", "sample_attr=", "sampleinfo_table=", "SV_types=", "rand_sv_ratio=", "FIMO_thresh=", "AME_scoring=", "config=", "prefix=", "help" ]) except getopt.GetoptError as e: print(e) sys.exit(2) if len(args) > 0: message = "Error: non-paired arguments are not allowed." raise exceptions.WrongArgumentError(message) motif_pipeline = pipeline.MotifPipeline() sample_attr_path = None genome_fasta = None genome_len = None genome_include = None prefix = None config_name = "local" for opt, arg in opts: if opt in ("-h", "--help"): description() sys.exit() elif opt in ("-i", "--input_dir"): motif_pipeline.set_input_dir(arg) elif opt in ("-o", "--output_dir"): motif_pipeline.set_output_dir(arg) elif opt in ("-f", "--genome_fasta"): genome_fasta = arg elif opt in ("-l", "--genome_len"): genome_len = arg elif opt in ("-e", "--genome_include"): genome_include = arg elif opt in ("-m", "--motif_path"): motif_pipeline.set_motif_path(arg) elif opt in ("-a", "--sample_attr"): motif_pipeline.set_sample_attr(arg) elif opt in ("-t", "--sampleinfo_table"): sample_attr_path = arg elif opt in ("-s", "--SV_types"): motif_pipeline.set_SV_types(arg) elif opt in ("-r", "--rand_sv_ratio"): motif_pipeline.set_rand_sv_ratio(arg) elif opt in ("-F", "--FIMO_thresh"): motif_pipeline.set_FIMO_thresh(arg) elif opt in ("-A", "--AME_scoring"): motif_pipeline.set_AME_scoring(arg) elif opt in ("-c", "--config"): config_name = arg elif opt in ("-p", "--prefix"): prefix = arg else: message = "Error: {opt} is not a valid option".format(opt=opt) raise exceptions.WrongArgumentError(message) if ((sample_attr_path is None and not motif_pipeline.sample_attr == "all") or (sample_attr_path is not None and motif_pipeline.sample_attr == "all")): message = "Error: you must indicate both --sampleinfo_table and --sample_attr, or neither." raise exceptions.MissingArgumentError(message) if genome_fasta is None: message = "Error: you must indicate --genome_fasta." raise exceptions.MissingArgumentError(message) if genome_len is None: message = "Error: you must indicate --genome_len." raise exceptions.MissingArgumentError(message) if genome_include is None: message = "Error: you must indicate --genome_include." raise exceptions.MissingArgumentError(message) for pipeline_attr in ["input_dir", "output_dir", "motif_path"]: if not hasattr(motif_pipeline, pipeline_attr): message = ("Error: you must indicate --{attr}.").format( attr=pipeline_attr) raise exceptions.MissingArgumentError(message) motif_pipeline.set_subdir_name(prefix) motif_pipeline.write_description() motif_pipeline.set_list_bedpe(sample_attr_path) reference_genome = refgenome.ReferenceGenome(genome_fasta, genome_len, genome_include) base_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1]) try: config = os.path.join(base_dir, 'configs', '{}.py'.format(config_name)) spec = importlib.util.spec_from_file_location('', config) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) parsl.load(module.config) except: raise exceptions.IncorrectPathError( "Cannot find the config file <{config_name}>.".format( config_name=config_name)) if not os.path.isdir(motif_pipeline.output_dir + "bed_files"): os.mkdir(motif_pipeline.output_dir + "bed_files") for file_name in motif_pipeline.list_bedpe: sv_types_to_run = get_SV_types(motif_pipeline, file_name) if sv_types_to_run: extractdata.bedpe_to_bed(reference_genome, motif_pipeline, file_name, sv_types_to_run) parsl.wait_for_current_tasks() runprogram.merge(motif_pipeline) motif_pipeline.set_num_SV_breakpoints() runprogram.bedtools(motif_pipeline, reference_genome) runprogram.FIMO(motif_pipeline) runprogram.AME(motif_pipeline) extractdata.extract_list_sequences_AME(motif_pipeline) extractdata.extract_output_FIMO(motif_pipeline) extractdata.extract_output_AME(motif_pipeline) graphs.generate_histogram(motif_pipeline)