def sample_variants_csv(self, sample, type): if not sampleinfo_mongo.is_sample(sample) or not variants_mongo.is_sample_loaded(sample, type): self.__log_sample_doesnt_exist() return out_path = "%s/%s.csv" % ( self.output_files_dir, sample) print out_path csv_writer = csv.writer(open(out_path, "w"), delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC) header = ['CHROM', 'POS', 'REF', 'ALT', 'GT', 'RSID', 'Gene', 'ExonicFunc', 'AAChange', 'FREQ', 'QC_Final', 'QC_Cov', 'QC_AF', 'In_Hotspot'] csv_writer.writerow(header) client, db = mongo.get_connection() total_loaded_samples = variants_mongo.count_samples() for var in variants_mongo.get_sample_vars(sample, type, db): new_variant = {} chrom, pos, ref, alt = var['CHROM'], var['POS'], var['REF'], var['ALT'] al1, al2 = genotypetools.get_genotype_alleles(ref, alt, var['GT_calc']) new_variant.update({'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': ",".join(alt), 'GT': "/".join([al1, al2])}) hotspot = hotspot_mongo.get_variant(chrom, pos, ref, alt, db) annot = hotspot['ANNOTATION'][0] new_variant.update({'RSID': annot['snp137NonFlagged'], 'Gene': annot['Gene_refGene'], 'ExonicFunc': annot['ExonicFunc_refGene'], 'AAChange': annot['AAChange_refGene']}) if 'p.' in new_variant['AAChange']: new_variant['AAChange'] = new_variant['AAChange'].split('p.')[1].split(",")[0] zygosity = hotspot['orig_stats']['zygosity'] freq = sum([zygosity['het_count'], zygosity['het_alt_count'], zygosity['hom_count']]) / float(total_loaded_samples) final_qc, qc_cov, qc_af = var['FINAL_QC'], var['COV_QC'], var['AF_QC'] if hotspot['orig_stats']['qc']['final_qc_count'] > 0: in_hotspot = "TRUE" else: in_hotspot = "FALSE" new_variant.update({"FREQ": freq, "QC_Final": final_qc, "QC_Cov": qc_cov, "QC_AF": qc_af, "In_Hotspot": in_hotspot}) out_row = [str(new_variant[field]) for field in header] csv_writer.writerow(out_row) #print "\t".join(out_row) return out_path
def __reconcile(self, chrom, pos, ref, alt, db): query = {'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': {'$all': alt}} hotspot_coll = hotspot_mongo.get_collection(db) variants_coll = variants_mongo.get_collection(db) matched_documents = [doc for doc in hotspot_coll.find(query)] if len(matched_documents) == 1: self.__log_reconciling_variant(chrom, pos, ref, alt) # UPDATING THE ALTERNATE ALLELE FOR THE HOTSPOT VARIANT modified_count = hotspot_coll.update_one(query, {'$set': {'ALT': alt}}).modified_count if modified_count != 1: self.__log_serious_hotspot_discrepancy('THERE WAS A PROBLEM MODIFYING THE OLD HOTSPOT DOCUMENT') # UPDATING ALL THE PREVIOUSLY LOADED VARIANTS FOR THE HOTSPOT VARIANT variant_query = {'TYPE': 'orig', 'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': {'$all': alt}} loaded_variants = [doc for doc in variants_coll.find(variant_query)] for var in loaded_variants: var_ref, var_alt, var_gt, var_gt_orig = var['REF'], var['ALT'], var['GT_calc'], var['GT_orig'] var_fao, var_af = var['FAO'], var['AF_calc'] new_alleles = [ref] + alt if var_gt != './.': al1, al2 = genotypetools.get_genotype_alleles(var_ref, var_alt, var_gt) al_num1 = new_alleles.index(al1) al_num2 = new_alleles.index(al2) corrected_gt_calc = "/".join([str(val) for val in sorted([al_num1, al_num2])]) else: corrected_gt_calc = './.' if var_gt_orig != './.': al1, al2 = genotypetools.get_genotype_alleles(var_ref, var_alt, var_gt_orig) al_num1 = new_alleles.index(al1) al_num2 = new_alleles.index(al2) corrected_gt_orig = "/".join([str(val) for val in sorted([al_num1, al_num2])]) else: corrected_gt_orig = './.' if len(var_fao) == 2: var_fao = list(reversed(var_fao)) var_af = list(reversed(var_af)) ######################################################################################## # I'm going to put off correcting the AF_calc and FAO when number of alternate alleles is > 2 # for now, because it just is not needed for downstream analysis # DO THIS LATER ######################################################################################## query = {'TYPE': 'orig', 'SAMPLE': var['SAMPLE'],'CHROM': chrom, 'POS': pos, 'REF': ref, 'ALT': {'$all': alt}} update = {'$set': {'ALT': alt, 'GT_calc': corrected_gt_calc, 'FAO': var_fao, 'AF_calc': var_af, 'GT_orig': corrected_gt_orig}} modified_count = variants_coll.update_one(query, update).modified_count if modified_count != 1: self.__log_serious_hotspot_discrepancy('THERE WAS A PROBLEM MODIFYING THE ORIGINAL VARIANTS ' 'DOCUMENT') else: self.__log_serious_hotspot_discrepancy('THE NUMBER OF DOCUMENTS WITH MATCHING THE ' 'ALLELE GROUP IS NOT 1')
def __reconcile(self, chrom, pos, ref, alt, db): query = {"CHROM": chrom, "POS": pos, "REF": ref, "ALT": {"$all": alt}} hotspot_coll = hotspot_mongo.get_collection(db) variants_coll = variants_mongo.get_collection(db) matched_documents = [doc for doc in hotspot_coll.find(query)] if len(matched_documents) == 1: self.__log_reconciling_variant(chrom, pos, ref, alt) # UPDATING THE ALTERNATE ALLELE FOR THE HOTSPOT VARIANT modified_count = hotspot_coll.update_one(query, {"$set": {"ALT": alt}}).modified_count if modified_count != 1: self.__log_serious_hotspot_discrepancy("THERE WAS A PROBLEM MODIFYING THE OLD HOTSPOT DOCUMENT") # UPDATING ALL THE PREVIOUSLY LOADED VARIANTS FOR THE HOTSPOT VARIANT variant_query = {"TYPE": "orig", "CHROM": chrom, "POS": pos, "REF": ref, "ALT": {"$all": alt}} loaded_variants = [doc for doc in variants_coll.find(variant_query)] for var in loaded_variants: var_ref, var_alt, var_gt, var_gt_orig = var["REF"], var["ALT"], var["GT_calc"], var["GT_orig"] var_fao, var_af = var["FAO"], var["AF_calc"] new_alleles = [ref] + alt if var_gt != "./.": al1, al2 = genotypetools.get_genotype_alleles(var_ref, var_alt, var_gt) al_num1 = new_alleles.index(al1) al_num2 = new_alleles.index(al2) corrected_gt_calc = "/".join([str(val) for val in sorted([al_num1, al_num2])]) else: corrected_gt_calc = "./." if var_gt_orig != "./.": al1, al2 = genotypetools.get_genotype_alleles(var_ref, var_alt, var_gt_orig) al_num1 = new_alleles.index(al1) al_num2 = new_alleles.index(al2) corrected_gt_orig = "/".join([str(val) for val in sorted([al_num1, al_num2])]) else: corrected_gt_orig = "./." if len(var_fao) == 2: var_fao = list(reversed(var_fao)) var_af = list(reversed(var_af)) ######################################################################################## # I'm going to put off correcting the AF_calc and FAO when number of alternate alleles is > 2 # for now, because it just is not needed for downstream analysis # DO THIS LATER ######################################################################################## query = { "TYPE": "orig", "SAMPLE": var["SAMPLE"], "CHROM": chrom, "POS": pos, "REF": ref, "ALT": {"$all": alt}, } update = { "$set": { "ALT": alt, "GT_calc": corrected_gt_calc, "FAO": var_fao, "AF_calc": var_af, "GT_orig": corrected_gt_orig, } } modified_count = variants_coll.update_one(query, update).modified_count if modified_count != 1: self.__log_serious_hotspot_discrepancy( "THERE WAS A PROBLEM MODIFYING THE ORIGINAL VARIANTS " "DOCUMENT" ) else: self.__log_serious_hotspot_discrepancy("THE NUMBER OF DOCUMENTSG THE " "ALLELE GROUP IS NOT 1")