def get_replicates(gemini_db): """ Returns a dataframe of sample information with the following columns: samples, plate_num, tissue, replicate """ samples_df = pd.DataFrame([[sample] + list(split_id(sample)) for sample in gem_ops.get_samples(gemini_db)], \ columns=["full_name", "sample", "plate", "tissue", "replicate"]) return samples_df
min_allele_freq = args.min_allele_freq min_depth = args.min_depth min_alt_depth = args.min_alt_depth max_num_het = args.max_num_het max_aaf_all = args.max_aaf_all tissue = args.tissue.split(",") sample_pattern = args.sample_pattern annotations = args.annotations.split(",") results_file = args.results_file add_joint = args.add_joint if operation == "find_all": # Get all variants in a set of samples. # Get samples to process. samples = [sample for sample in gem_ops.get_samples(gemini_db) if re.search(sample_pattern, sample) > 0] # Get sample variants. all_vars_df = get_variants_in_samples(gemini_db, samples, annotations, min_allele_freq, min_alt_depth, min_depth, max_aaf_all, somatic=False) # Write results to file. if sample_pattern == ".*": sample_pattern = "all" out_filename = "find_vars_results_%s_minaf%.2f_ad%i_d%i.txt" % (sample_pattern, min_allele_freq, min_alt_depth, min_depth) out_file = open(out_filename, "w") out_file.write( all_vars_df.to_csv(sep="\t", index=False, float_format='%.3f') ) out_file.close() print "Wrote results to file %s" % out_filename elif operation == "augment_vars":