def test_get_mongo_variant(setup_database, vcf_case, get_institute): variant_parser = VCFParser(infile=one_variant) variants = [] individuals = variant_parser.individuals scout_individuals = {ind_id:ind_id for ind_id in individuals} for variant in variant_parser: variants.append(variant) variant = variants[0] mongo_variant = get_mongo_variant( variant=variant, variant_type='clinical', individuals=scout_individuals, case=vcf_case, institute=get_institute, variant_count=100 ) assert mongo_variant.chromosome == '14' assert mongo_variant.reference == 'C' assert mongo_variant.alternative == 'A' assert len(mongo_variant.genes) == 2 assert len(mongo_variant.compounds) == 3
def add_variants(self, vcf_file, variant_type, case, variant_number_treshold=5000, rank_score_threshold = 0): """Add variants to the mongo database Args: variants(str): Path to a vcf file variant_type(str): 'research' or 'clinical' case(Case): The case for which the variants should be uploaded nr_of_variants(int): Treshold for number of variants rank_score_threshold(int): Treshold for rankscore """ case_id = case.case_id logger.info("Setting up a variant parser") variant_parser = VCFParser(infile=vcf_file) nr_of_variants = 0 self.delete_variants(case_id, variant_type) institute = self.institute(institute_id=case.owner) start_inserting_variants = datetime.now() # Check which individuals that exists in the vcf file. # Save the individuals in a dictionary with individual ids as keys # and display names as values individuals = {} # loop over keys (internal ids) logger.info("Checking which individuals in ped file exists in vcf") for individual in case.individuals: individual_id = individual.individual_id display_name = individual.display_name logger.debug("Checking individual {0}".format(individual_id)) if individual_id in variant_parser.individuals: logger.debug("Individual {0} found".format(individual_id)) individuals[individual_id] = display_name else: logger.warning("Individual {0} is present in ped file but"\ " not in vcf".format(individual_id)) logger.info('Start parsing variants') # If a rank score threshold is used, check if below that threshold for variant in variant_parser: logger.debug("Parsing variant {0}".format(variant['variant_id'])) if not float(variant['rank_scores'][case.display_name]) > rank_score_threshold: logger.info("Lower rank score threshold reached after {0}"\ " variants".format(nr_of_variants)) break if variant_number_treshold: if nr_of_variants > variant_number_treshold: logger.info("Variant number threshold reached. ({0})".format( variant_number_treshold)) break nr_of_variants += 1 mongo_variant = get_mongo_variant( variant=variant, variant_type=variant_type, individuals=individuals, case=case, institute=institute, variant_count=nr_of_variants, ) logger.debug("Saving variant {0}".format(mongo_variant.display_name)) mongo_variant.save() if nr_of_variants % 1000 == 0: logger.info('{0} variants parsed'.format(nr_of_variants)) logger.info("Parsing variants done") logger.info("{0} variants inserted".format(nr_of_variants)) logger.info("Time to insert variants: {0}".format( datetime.now() - start_inserting_variants))