def main(): parser = argparse.ArgumentParser( description="Fetch Condel scores") cmd = DefaultCommandHelper(parser) cmd.add_db_args() parser.add_argument("muts_path", metavar="SNVS_PATH", help="SNV's to check. Use - for standard input.") parser.add_argument("out_path", metavar="OUTPUT_PATH", help="The results path. Use - for standard output.") cmd.add_selected_predictors_args() cmd.add_selected_annotations_args() cmd.add_selected_columns_args() args, logger = cmd.parse_args("fetch") db = cmd.open_db() predictors = cmd.get_selected_predictors() annotations = cmd.get_selected_annotations() columns = cmd.get_selected_columns() logger.info("Reading {} ...".format(args.muts_path if args.muts_path != "-" else "from standard input")) try: progress = RatedProgress(logger, name="SNVs") with tsv.open(args.muts_path) as f: with tsv.open(args.out_path, "w") as wf: tsv.write_line(wf, "ID", *[c.upper() for c in columns] + [a.upper() for a in annotations] + predictors) hit = fail = 0 mut = DnaAndProtMutationParser() for line_num, line in enumerate(f, start=1): line = line.rstrip(" \n\r") if len(line) == 0 or line.startswith("#"): continue try: mut.parse(line) except PrematureEnd: logger.error("Missing fields at line {}".format(line_num)) fail += 1 continue except UnexpectedToken as ex: logger.error("Unexpected field '{}' at line {}".format(ex.args[0], line_num)) fail += 1 continue exists = False for row in query_mutation(logger, db, mut, annotations, predictors): exists = True ann = row["annotations"] scores = row["scores"] tsv.write_line(wf, mut.identifier, *[row[c] for c in columns] + [ann[a] for a in annotations] + [scores[p] for p in predictors]) """ if logger.isEnabledFor(logging.DEBUG): logger.debug(" --> {} {} {} {} {} {} {} {} {} {}".format( row["chr"], row["start"], row["ref"], row["alt"], row["transcript"], row["protein"], row["aa_pos"], row["aa_ref"], row["aa_alt"], mut.identifier or "*")) """ progress.update() if exists: hit += 1 else: fail += 1 progress.log_totals() logger.info("Finished. total={}, hits={}, fails={}, elapsed={}".format(hit + fail, hit, fail, progress.elapsed_time)) except: return cmd.handle_error() finally: db.close()
def fetch_iter(db, muts_path, maps=None, predictors=None, muts_header=False, state=None, logger=None): """ Iterator that fetches scores from the database from the mutations in a file. :param db: FannsDb interface. :param muts_path: The input path for mutations. :param maps: Map transcript/protein ensembl identifiers with external identifiers (swissprot_id, ...) :param predictors: Predictors for which to obtain the scores. :param muts_header: Whether the muts_path has a header or not. :param state: The state of the iteration: hits, fails. :param logger: Logger to use. If not specified a new one is created. """ def query_mutation(logger, db, mut, maps, predictors): if mut.coord == Mutation.GENOMIC: if logger.isEnabledFor(logging.DEBUG): logger.debug(" Querying {} {} {} {} {} {} {} ...".format( mut.chr, mut.start, mut.end or "*", mut.ref or "*", mut.alt, mut.strand or "*", mut.identifier or "*")) for row in db.query_scores(chr=mut.chr, start=mut.start, ref=mut.ref, alt=mut.alt, strand=mut.strand, predictors=predictors, maps=maps): yield row elif mut.coord == Mutation.PROTEIN: if logger.isEnabledFor(logging.DEBUG): logger.debug(" Querying {} {} {} {} {} ...".format( mut.protein, mut.start, mut.ref or "*", mut.alt, mut.identifier or "*")) for row in db.query_scores(protein=mut.protein, aa_pos=mut.start, aa_ref=mut.ref, aa_alt=mut.alt, predictors=predictors, maps=maps): yield row else: logger.warn("Unknown coordinates system: {}".format(mut.line)) if logger is None: logger = logging.getLogger("fannsdb.fetch") state = state if state is not None else {} state[STATE_HITS] = state[STATE_FAILS] = 0 maps = maps if maps is not None else [] predictors = predictors if predictors is not None else [] logger.info("Reading {} ...".format(os.path.basename(muts_path) if muts_path != "-" else "from standard input")) progress = RatedProgress(logger, name="SNVs") with tsv.open(muts_path) as f: if muts_header: tsv.skip_comments_and_empty(f) # this returns the first non empty nor comment line (the header) mutparser = DnaAndProtMutationParser() for line_num, line in enumerate(f, start=1): line = line.rstrip(" \n\r") if len(line) == 0 or line.startswith("#"): continue try: mut = mutparser.parse(line) except PrematureEnd: logger.error("Missing fields at line {}".format(line_num)) state[STATE_FAILS] += 1 continue except UnexpectedToken as ex: logger.error("Unexpected field '{}' at line {}".format(ex.args[0], line_num)) state[STATE_FAILS] += 1 continue state.update({ STATE_LINE_NUM : line_num, STATE_LINE : line, STATE_MUTATION : mut}) exists = False for row in query_mutation(logger, db, mut, maps, predictors): exists = True yield row progress.update() if exists: state[STATE_HITS] += 1 else: state[STATE_FAILS] += 1 progress.log_totals() hits, fails = [state[k] for k in [STATE_HITS, STATE_FAILS]] logger.info("Finished. total={}, hits={}, fails={}, elapsed={}".format(hits + fails, hits, fails, progress.elapsed_time))