def addLociDataFromFiles(msi_spl, in_loci_list, method_name, keys, res_cls): """ Get selected data for loci of a sample and add them as data in LocusRes. :param msi_spl: The sample where the results are added. :type msi_spl: MSISample :param in_loci_list: The path to the file containing the list of metrics files by locus (format: TSV). The header must be: #Locus_position<tab>Locus_name<tab>Filepath. Each file referenced in "Filepath" must be in JSON format and must contain a dictionary of metrics for one locus of the sample. :type in_loci_list: str :param method_name: The name of the method storing locus results in LocusRes. :type method_name: str :param keys: The keys extracted from in_locus_data and stored in LocusRes. :type keys: dict (keys are name in in_locus_data and values are names in LocusRes.data) :param res_cls: The class used to store LocusRes in msi_locus. :type res_cls: LocusRes or one of its subclasses """ with HashedSVIO(in_loci_list) as FH_loci_list: for record in FH_loci_list: # One file by locus # Add locus if record["Locus_position"] not in msi_spl.loci: msi_spl.addLocus( MSILocus(record["Locus_position"], record["Locus_name"])) msi_locus = msi_spl.loci[record["Locus_position"]] # Add result and data addLociResult(msi_locus, record["Filepath"], method_name, keys, res_cls)
def getNbOccur(in_profile, nb_distinct_reads): """ Return the number of future occurrences for each distinct reads. :param in_profile: Path to the file containing the percentage of distinct sequences by number of duplications (format: TSV). Header line must start with "#" and must contain "duplication_level" and "%_distinct". :type in_profile: str :param nb_distinct_reads: The duplication will be apply on this number of distinct reads. :type nb_distinct_reads: int :return: The number of future occurrences for each distinct reads. :rtype: list """ # Get profile profile = None with HashedSVIO(in_profile, title_starter="#") as FH_in: profile = FH_in.read() # Get nb_occurences nb_occurences = [] for category in profile: nb_reads_at_dup_lvl = int( round(float(category["%_distinct"]) * nb_distinct_reads / 100, 0)) for idx in range(nb_reads_at_dup_lvl): nb_occurences.append(int(category["duplication_level"])) nb_missing = len(nb_occurences) - nb_distinct_reads for idx in range(nb_missing): nb_occurences.append(1) # Shuffle nb_occurences random.shuffle(nb_occurences) return nb_occurences
def getVariantsProfile(profile_path, min_allele_freq=None): profiles = list() with HashedSVIO(profile_path, title_starter="#") as FH_profile: for record in FH_profile: # Type Occurence Freq_min Freq_max Lg_min Lg_max curr_profile = { "type": record["Type"], "occurence": float(record["Occurence"]), "AF": { "min": float(record["Freq_min"]), "max": float(record["Freq_max"]) }, "length": { "min": int(record["Lg_min"]), "max": int(record["Lg_max"]) } } profiles.append(curr_profile) if min_allele_freq is not None: if min_allele_freq > curr_profile["AF"]["min"]: log.error( "The minimum allele frequency in {} must be >= {}.". format(profile_path, min_allele_freq)) if curr_profile["AF"]["min"] != round( curr_profile["AF"]["min"], int(1 / min_allele_freq)): log.error( "The allele frequency precision must be >= {}.".format( min_allele_freq)) if curr_profile["AF"]["max"] != round( curr_profile["AF"]["max"], int(1 / min_allele_freq)): log.error( "The allele frequency precision must be >= {}.".format( min_allele_freq)) return profiles
def getStatus(in_annotations, samples): """ Return status by locus by sample. :param in_annotations: Path to the file containing status by locus by sample (format: TSV). :type in_annotations: str :param samples: List of samples names. :type samples: list :return: Status by locus by sample. :rtype: dict """ status_by_spl = {} samples = set(samples) with HashedSVIO(in_annotations, title_starter="") as FH: for record in FH: spl_name = getSplFromLibName(record["sample"]) if spl_name in samples: status_by_spl[spl_name] = { key: value for key, value in record.items() if key not in ["sample", "sample_status"] } status_by_spl[spl_name]["sample"] = record["sample_status"] for spl in samples: if spl not in status_by_spl: raise Exception("Sample {} has no expected data.".format(spl)) return status_by_spl
def loadMitelman(db_path, db_version, fusions_by_partners, aliases_by_symbol, annotation_symbols): """ Set fusions partners data from Mitelman database: MBCA.TXT.DATA,REF.TXT.DATA. :param db_path: Path to the Mitelman database MBCA.TXT.DATA,REF.TXT.DATA (format: TSV). :type db_path: str :param db_version: Database version to traceback sources in fusions_by_partners. :type db_version: str :param fusions_by_partners: By partners (upGene_@_downGene) the ids of fusions by source (chimerakb, cosmic, mitelman, pubmed, ...). :type fusions_by_partners: dict :param aliases_by_symbol: Gene name aliases by symbol. :type aliases_by_symbol: dict :param annotation_symbols: List of genes names known in genes annotations file. :type annotation_symbols: set """ mbca_path = db_path pubmed_by_fusion = {} if "," in db_path: mbca_path, ref_path = db_path.split(",") pubmed_by_fusion = pubmedByFusion(ref_path) # MolClin RefNo InvNo Morph Topo Immunology GeneLength GeneShort GeneLong KaryLength KaryShort KaryLong with HashedSVIO(mbca_path) as reader: for record in reader: if record["GeneShort"] != "": for fusion in record["GeneShort"].split(","): if "/" in fusion: genes = fusion.replace("+", "").split("/") # PDRG1/ARF3/RUNX1 => fusion between 3 genes for up_gene, down_gene in zip(genes, genes[1:]): # For each breakpoint found = False try: up_gene = selectAnnotSymbol(up_gene, annotation_symbols, aliases_by_symbol) down_gene = selectAnnotSymbol(down_gene, annotation_symbols, aliases_by_symbol) found = True except Exception: log.warning( "Error to parse gene names [{}, {}] from Mitelman (PMID: {}).".format( up_gene, down_gene, record["RefNo"] ) ) if found: fusion_partners = "{}_@_{}".format(up_gene, down_gene) source = "mitelman_{}".format(db_version) if fusion_partners not in fusions_by_partners: fusions_by_partners[fusion_partners] = {source: set()} if source not in fusions_by_partners[fusion_partners]: fusions_by_partners[fusion_partners][source] = set() fusions_by_partners[fusion_partners][source].add(int(record["RefNo"])) if "PMID" not in fusions_by_partners[fusion_partners]: fusions_by_partners[fusion_partners]["PMID"] = set() for pmid in pubmed_by_fusion[record["RefNo"]]: fusions_by_partners[fusion_partners]["PMID"].add(int(pmid))
def process(args, log): """ Convert MSI status file (splA<tab>status_locus_1<tab>status_locus_2) in MSI annotation file. :param args: The namespace extracted from the script arguments. :type args: Namespace :param log: The logger of the script. :type log: loggin.Logger """ # Get targeted loci IDs and names loci_in_bed = [] id_by_name = {} with BEDIO(args.input_targets) as FH_in: for record in FH_in: id = "{}:{}-{}".format(record.chrom, record.start - 1, record.end) id_by_name[record.name] = id loci_in_bed.append(id) if not args.loci_by_id: loci_in_bed = sorted(id_by_name.keys()) # Write annotation file with HashedSVIO(args.input_status, title_starter="") as FH_in: loci_in_status = set([elt for elt in FH_in.titles if elt != "sample"]) if len(set(loci_in_bed) - loci_in_status) > 0: msg = "The following loci are defined in targets but are missing from status file: {}".format( set(loci_in_status) - loci_in_status) log.error(msg) raise Exception(msg) with MSIAnnot(args.output_annotations, "w") as FH_out: for record in FH_in: for locus in loci_in_bed: if record[locus] not in Status.authorizedValues(): msg = 'The status "{}" of the locus {} in sample {} is invalid. It must be: {}'.format( record[locus], locus, record["sample"], Status.authorizedValues()) log.error(msg) raise Exception(msg) FH_out.write({ "sample": record["sample"], "locus_position": locus if args.loci_by_id else id_by_name[locus], "method_id": "model", "key": "status", "value": record[locus], "type": "str" })
def sourcesBySymbols(in_known): """ Return sources descriptions by fusion ID from database. :param in_known: Path to the file containing known fusions (format: TSV). This file must contains 3 columns : 5prim_gene, 3_prim_gene and sources. 5prim_gene and 3prim_gene are symbol with the same master name of the name in GTF used for the annotation of breakends. sources is a string containing db1name:entryId,entryId|db2name:entryId (example: cosmic_91:1743,1745|chimerdb_pub-V4:3427,3428). :type in_known: str :return: sources descriptions (db1name:entryId,entryId|db2name:entryId) by fusion ID (5primSymbol_@_3primSymbol). :rtype: dict """ sources_by_symbols = {} with HashedSVIO(in_known) as reader: for record in reader: fusion_id = "{}_@_{}".format(record["5prim_gene"], record["3prim_gene"]) sources_by_symbols[fusion_id] = record["sources"] return sources_by_symbols
def aliasesBySymbols(in_aliases): """ Return all names aliases by each gene symbol. :param in_aliases: Path to genes synonyms (format: TSV). :type in_aliases: str :return: Names aliases by each gene symbol. :rtype: dict """ is_ncbi = True with HashedSVIO(in_aliases) as reader: if "Gene name" in reader.titles and "Gene Synonym" in reader.titles: is_ncbi = False if is_ncbi: return aliasesBySymbolsFromNCBI(in_aliases) else: return aliasesBySymbolsFromEnsembl(in_aliases)
def process(args): """ Tag stability for loci and sample from length distribution on loci. :param args: The namespace extracted from the script arguments. :type args: Namespace """ spl_name = args.sample_name if args.sample_name is None: spl_name = os.path.basename(args.output_report).split(".")[0] if spl_name.endswith("_report"): spl_name = spl_name[:-7] msi_spl = MSISample(spl_name) # Parse lengths metrics by loci with HashedSVIO(args.input_combined_list) as FH_loci_list: for record in FH_loci_list: with open(record["Filepath"]) as FH_locus: locus_metrics = json.load(FH_locus) msi_locus = MSILocus.fromDict({ "name": record["Locus_name"], "position": record["Locus_position"], "results": { "PairsCombi": { "_class": "LocusResPairsCombi", "status": Status.none, "data": { "nb_by_length": locus_metrics["nb_by_length"], "nb_pairs_aligned": locus_metrics["nb_uncombined_pairs"] + locus_metrics["nb_combined_pairs"] } } } }) msi_spl.addLocus(msi_locus) # Process status msi_models = MSIReport.parse(args.input_models) for locus_id in msi_spl.loci: processor = PairsCombiProcessor(locus_id, msi_models, [msi_spl], args.min_support) processor.setLocusStatus() msi_spl.setStatus("PairsCombi") # Write report MSIReport.write([msi_spl], args.output_report)
def pubmedByFusion(in_ref): """ Return Pubmed IDs by fusion partners from REF.TXT.DATA from Mitelman database. :param in_ref: Path to the REF.TXT.DATA from Mitelman database (format: TSV). :type in_ref: str :return: Pubmed IDs by fusion partners. :rtype: dict """ # RefNo TitleLength TitleShort TitleLong Volume Year Journal Text Abbreviation AuthorsLength AuthorsShort AuthorsLong Flag Pubmed pubmed_by_fusion = {} with HashedSVIO(in_ref) as reader: for record in reader: if record["RefNo"] not in pubmed_by_fusion: pubmed_by_fusion[record["RefNo"]] = set() if record["Pubmed"] != "": pubmed_by_fusion[record["RefNo"]].add(record["Pubmed"]) return pubmed_by_fusion
def writePartnersDb(db_path, fusions_by_partners): """ Write known fusions partners database. :param db_path: Path to the fusions partners database (format: TSV). :type db_path: str :param fusions_by_partners: By partners (upGene_@_downGene) the ids of fusions by source (chimerakb, cosmic, mitelman, pubmed, ...). :type fusions_by_partners: dict """ with HashedSVIO(db_path, "w") as writer: writer.titles = ["5prim_gene", "3prim_gene", "sources"] for partners, entries_by_src in fusions_by_partners.items(): up_gene, down_gene = partners.split("_@_") sources = [src + ":" + ",".join([str(elt) for elt in sorted(ids)]) for src, ids in entries_by_src.items()] writer.write({ "5prim_gene": up_gene, "3prim_gene": down_gene, "sources": "|".join(sources) })
def loadGeneric(db_path, db_name, db_version, fusions_by_partners, aliases_by_symbol, annotation_symbols, up_title="up_gene", down_title="down_gene"): """ Set fusions partners data from single source database. :param db_path: Path to fusions database (format: TSV). :type db_path: str :param db_name: Database name. :type db_name: str :param db_version: Database version to traceback sources in fusions_by_partners. :type db_version: str :param fusions_by_partners: By partners (upGene_@_downGene) the ids of fusions by source (Babiceanu, BodyMap, ...). :type fusions_by_partners: dict :param aliases_by_symbol: Gene name aliases by symbol. :type aliases_by_symbol: dict :param annotation_symbols: List of genes names known in genes annotations file. :type annotation_symbols: set :param up_title: Title of column containing gene name of first partner. :type up_title: str :param down_title: Title of column containing gene name of second partner. :type down_title: str """ source = "{}_{}".format(db_name, db_version) with HashedSVIO(db_path) as reader: for record in reader: up_gene = None down_gene = None try: up_gene = selectAnnotSymbol(record[up_title], annotation_symbols, aliases_by_symbol) down_gene = selectAnnotSymbol(record[down_title], annotation_symbols, aliases_by_symbol) except Exception: log.warning( "Error to parse gene names [{}, {}] from {}.".format( record[up_title], record[down_title], db_name ) ) if up_gene and down_gene: fusion_partners = "{}_@_{}".format(up_gene, down_gene) if fusion_partners not in fusions_by_partners: fusions_by_partners[fusion_partners] = {source: set()} if source not in fusions_by_partners[fusion_partners]: fusions_by_partners[fusion_partners][source] = set() fusions_by_partners[fusion_partners][source].add(fusion_partners)
def loadCosmic(db_path, db_version, fusions_by_partners, aliases_by_symbol, annotation_symbols): """ Set fusions partners data from cosmic database. :param db_path: Path to the cosmic database (format: TSV). :type db_path: str :param db_version: Database version to traceback sources in fusions_by_partners. :type db_version: str :param fusions_by_partners: By partners (upGene_@_downGene) the ids of fusions by source (chimerakb, cosmic, mitelman, pubmed, ...). :type fusions_by_partners: dict :param aliases_by_symbol: Gene name aliases by symbol. :type aliases_by_symbol: dict :param annotation_symbols: List of genes names known in genes annotations file. :type annotation_symbols: set """ # Sample ID Sample name Primary site Site subtype 1 Site subtype 2 Site subtype 3 Primary histology Histology subtype 1 Histology subtype 2 Histology subtype 3 Fusion ID Translocation Name 5'_CHROMOSOME 5'_GENOME_START_FROM 5'_GENOME_START_TO 5'_GENOME_STOP_FROM 5'_GENOME_STOP_TO 5'_STRAND 3'_CHROMOSOME 3'_GENOME_START_FROM 3'_GENOME_START_TO 3'_GENOME_STOP_FROM 3'_GENOME_STOP_TO 3'_STRAND Fusion type Pubmed_PMID with HashedSVIO(db_path) as reader: for record in reader: if record["Translocation Name"] != "": matches = re.fullmatch(r"ENS.+\((.+)\):.+_ENS.+\((.+)\):.+", record["Translocation Name"]) # ENST00000324093.4(PLXND1):r.1_2864_ENST00000393238.3(TMCC1):r.918_5992 if matches is None: log.warning( "Error to parse gene names {} from cosmic (PMID: {}).".format( record["Translocation Name"], record["Pubmed_PMID"] ) ) else: up_gene, down_gene = matches.groups() up_gene = selectAnnotSymbol(up_gene, annotation_symbols, aliases_by_symbol) down_gene = selectAnnotSymbol(down_gene, annotation_symbols, aliases_by_symbol) fusion_partners = "{}_@_{}".format(up_gene, down_gene) source = "cosmic_{}".format(db_version) if fusion_partners not in fusions_by_partners: fusions_by_partners[fusion_partners] = {source: set()} if source not in fusions_by_partners[fusion_partners]: fusions_by_partners[fusion_partners][source] = set() fusions_by_partners[fusion_partners][source].add(int(record["Fusion ID"])) if record["Pubmed_PMID"] != "": if "PMID" not in fusions_by_partners[fusion_partners]: fusions_by_partners[fusion_partners]["PMID"] = set() fusions_by_partners[fusion_partners]["PMID"].add(int(record["Pubmed_PMID"]))
def loadChimerdb(db_path, db_version, fusions_by_partners, aliases_by_symbol, annotation_symbols): """ Set fusions partners data from chimerdb database. :param db_path: Path to the chimerdb database (format: TSV). :type db_path: str :param db_version: Database version to traceback sources in fusions_by_partners. :type db_version: str :param fusions_by_partners: By partners (upGene_@_downGene) the ids of fusions by source (chimerakb, cosmic, mitelman, pubmed, ...). :type fusions_by_partners: dict :param aliases_by_symbol: Gene name aliases by symbol. :type aliases_by_symbol: dict :param annotation_symbols: List of genes names known in genes annotations file. :type annotation_symbols: set """ # id Source webSource Fusion_pair H_gene H_chr H_position H_strand T_gene T_chr T_position T_strand Breakpoint_Type Genome_Build_Version PMID Disease Validation Kinase Oncogene Tumor_suppressor Receptor Transcription_Factor ChimerPub ChimerSeq with HashedSVIO(db_path) as reader: for record in reader: up_gene = None down_gene = None try: up_gene = selectAnnotSymbol(record["H_gene"], annotation_symbols, aliases_by_symbol) down_gene = selectAnnotSymbol(record["T_gene"], annotation_symbols, aliases_by_symbol) except Exception: log.warning( "Error to parse gene names [{}, {}] from chimerDB (PMID: {}).".format( record["H_gene"], record["T_gene"], record["PMID"] ) ) if up_gene and down_gene: fusion_partners = "{}_@_{}".format(up_gene, down_gene) source = "chimerdb_{}".format(db_version) if fusion_partners not in fusions_by_partners: fusions_by_partners[fusion_partners] = {source: set()} if source not in fusions_by_partners[fusion_partners]: fusions_by_partners[fusion_partners][source] = set() fusions_by_partners[fusion_partners][source].add(int(record["id"])) if record["PMID"] != "": if "PMID" not in fusions_by_partners[fusion_partners]: fusions_by_partners[fusion_partners]["PMID"] = set() pubmed_ids = set(map(int, record["PMID"].split(","))) fusions_by_partners[fusion_partners]["PMID"] = fusions_by_partners[fusion_partners]["PMID"] | pubmed_ids
def getGroupsData(groups_path, samples, sample_tag="Sample", group_tag="Group", separator="\t"): """ @summary: Return group name by sample, samples by grop and samples without group from separated value file. @param groups_path: [str] Path to the separated value file describing links between samples and groups. @param samples: [lsit] The list of all samples. @param sample_tag: [str] The title of column used to store the samples names. @param group_tag: [str] The title of column used to store the groups names. @param separator: [str] The separator used between fields in the input file. @return: [list] The first element is a dictionary representing the group name by sample. the second element is a dictionary representing the list of samples by group name. The last element is the list of samples without group. """ group_by_spl = {} spl_by_group = {} without_group = {} processed_by_spl = {spl: False for spl in samples} # Parse groups information with HashedSVIO(groups_path, separator=separator, title_starter="#") as FH_gp: for record in FH_gp: sample = record[sample_tag] group = record[group_tag] processed_by_spl[sample] = True if sample not in processed_by_spl: raise Exception( 'The sample "{}" found in {} does not exist in expected samples.' .format(sample, groups_path)) group_by_spl[sample] = group if group in spl_by_group: spl_by_group[group].append(sample) else: spl_by_group[group] = [sample] # Store samples without group for spl, is_in_gp in processed_by_spl.items(): if not is_in_gp: without_group[spl] = True # Return return group_by_spl, spl_by_group, without_group
def getNoise(input_noise): """ Return by variant id ("chrom:pos=ref/alt") the noise rate. :param input_noise: The path to the file containing artifactual variants with their maximum frequency (format: TSV). The header line of the file must be "#Chromosome<tab>Possition<tab>Reference_allele<tab>Alternative_allele<tab>Noise_rate". :type input_noise: str :return: By variant id ("chrom:pos=ref/alt") the noise rate. :rtype: dict """ expected_titles = ["Chromosome", "Position", "Reference_allele", "Alternative_allele", "Noise_rate"] noise_by_var = dict() with HashedSVIO(input_noise, title_starter="#") as FH_noise: if FH_noise.titles != expected_titles: raise Exception( 'The header line in "{}" does not correpond to "#{}".'.format( input_noise, "\t".join(expected_titles) ) ) for record in FH_noise: variant_id = "{}:{}={}/{}".format(record["Chromosome"], record["Position"], record["Reference_allele"], record["Alternative_allele"]) noise_by_var[variant_id] = float(record["Noise_rate"]) return noise_by_var
def aliasesBySymbolsFromEnsembl(in_aliases): """ Return all names aliases by each gene symbol from Ensembl biomart export. :param in_aliases: Path to genes synonyms from Ensembl (format: TSV). :type in_aliases: str :return: Names aliases by each gene symbol. :rtype: dict """ aliases_by_symbol = {} with HashedSVIO(in_aliases) as reader: for record in reader: name = record["Gene name"] alias = record["Gene Synonym"] if name not in aliases_by_symbol: aliases_by_symbol[name] = [name, alias] else: aliases_by_symbol[name].append(alias) if alias not in aliases_by_symbol: aliases_by_symbol[alias] = [alias, name] else: aliases_by_symbol[alias].append(name) return aliases_by_symbol
def aliasesBySymbolsFromNCBI(in_aliases): """ Return all names aliases by each gene symbol from NCBI RefSeq gene_info. :param in_aliases: Path to genes synonyms from gene_info (format: TSV). :type in_aliases: str :return: Names aliases by each gene symbol. :rtype: dict """ aliases_by_symbol = {} with HashedSVIO(in_aliases) as reader: for record in reader: name = record["Symbol"] aliases = record["Synonyms"].split("|") if name not in aliases_by_symbol: aliases_by_symbol[name] = [name] + aliases else: aliases_by_symbol[name] += aliases for alias in aliases: if alias not in aliases_by_symbol: aliases_by_symbol[alias] = [name] + aliases else: aliases_by_symbol[alias] += [name] return aliases_by_symbol
help='Path to the merged variants file (format: VCF).') args = parser.parse_args() # Logger logging.basicConfig( format= '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s' ) log = logging.getLogger(os.path.basename(__file__)) log.setLevel(logging.INFO) log.info("Command: " + " ".join(sys.argv)) # Get accession by chromosome ID acc_by_chrom = {} if args.input_assembly_accessions: with HashedSVIO(args.input_assembly_accessions, title_starter=None) as FH: for record in FH: acc_by_chrom[ record["sequence_id"]] = record["RefSeq_accession"] # Connect to HGVS mapper if args.input_sequence_repository is not None: os.environ["HGVS_SEQREPO_DIR"] = args.input_sequence_repository hgvs_mapper = getAssemblyMapper(args.assembly_version, args.input_UTA_config) # Write nb_records = { "analysed": 0, "fixed_HGVSg": 0, "fixed_HGVSc": 0,
args = parser.parse_args() # Logger logging.basicConfig(format='%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s') log = logging.getLogger(os.path.basename(__file__)) log.setLevel(logging.INFO) log.info("Command: " + " ".join(sys.argv)) # Load annotations log.info("Load model from {}.".format(args.input_annotations)) tr_by_id = {tr.annot["id"]: tr for tr in loadModel(args.input_annotations, "transcripts")} # Parse and convert domains data log.info("Parse and convert domains data from {}.".format(args.input_domains)) domains_by_tr_id = dict() with HashedSVIO(args.input_domains) as reader: for record in reader: if record['Interpro ID'] != "": record['Interpro start'] = int(record['Interpro start']) record['Interpro end'] = int(record['Interpro end']) tr_id = record['Transcript stable ID version'].split(".", 1)[0] if tr_id not in tr_by_id: log.warning("The transcript {} is missing in {}.".format(tr_id, args.input_annotations)) else: domain_id = record['Interpro ID'] # Get genomic coordinates transcript = tr_by_id[tr_id] protein = transcript.proteins[0] if len(transcript.proteins) > 1: msg = "The transcript {} is linked with several proteins {}.".format(tr_id, [prot.annot["id"] for prot in transcript.proteins]) log.error(msg)
default=".", help='Path to the output folder. [Default: %(default)s]') args = parser.parse_args() # Logger logging.basicConfig( format= '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] %(message)s' ) log = logging.getLogger() log.setLevel(logging.INFO) log.info("Command: " + " ".join(sys.argv)) # Get status by locus status_by_spl = {} with HashedSVIO(args.input_status, title_starter="") as FH_in: for record in FH_in: status_by_spl[record["sample"]] = { locus: status for locus, status in record.items() if locus not in ["sample", "sample_status"] } # Get min and max amplicon size by locus range_by_locus = {} for filename in os.listdir(args.input_data): filepath = os.path.join(args.input_data, filename) report = MSIReport.parse(filepath) for spl in report: for locus_id, locus in spl.loci.items(): if locus_id not in range_by_locus: