def ranked_vaccine_peptides_with_metadata_from_parsed_args(args): """ Computes all the data needed for report generation. Parameters ---------- args : Namespace Parsed user args from this run Returns a dictionary containing 3 items: - ranked variant/vaccine peptide list - a dictionary of command-line arguments used to generate it - patient info object """ if hasattr(args, 'input_json_file'): with open(args.input_json_file) as f: data = serializable.from_json(f.read()) # the JSON data from the previous run will have the older args saved, which may need to # be overridden with args from this run (which all be output related) data['args'].update(vars(args)) # if we need to truncate the variant list based on max_mutations_in_report, do that here if len(data['variants']) > args.max_mutations_in_report: data['variants'] = data['variants'][:args. max_mutations_in_report] return data # get various things from user args mhc_alleles = mhc_alleles_from_args(args) logger.info("MHC alleles: %s", mhc_alleles) variants = variant_collection_from_args(args) logger.info("Variants: %s", variants) vaxrank_results = run_vaxrank_from_parsed_args(args) variants_count_dict = vaxrank_results.variant_counts() assert len(variants) == variants_count_dict['num_total_variants'], \ "Len(variants) is %d but variants_count_dict came back with %d" % ( len(variants), variants_count_dict['num_total_variants']) if args.output_passing_variants_csv: variant_metadata_dicts = vaxrank_results.variant_properties( gene_pathway_check=GenePathwayCheck()) df = pd.DataFrame(variant_metadata_dicts) df.to_csv(args.output_passing_variants_csv, index=False) ranked_variants_with_vaccine_peptides = vaxrank_results.ranked_vaccine_peptides ranked_variants_with_vaccine_peptides_for_report = \ ranked_variants_with_vaccine_peptides[:args.max_mutations_in_report] patient_info = PatientInfo( patient_id=args.output_patient_id, vcf_paths=variants.sources, bam_path=args.bam, mhc_alleles=mhc_alleles, num_somatic_variants=variants_count_dict['num_total_variants'], num_coding_effect_variants=variants_count_dict[ 'num_coding_effect_variants'], num_variants_with_rna_support=variants_count_dict[ 'num_variants_with_rna_support'], num_variants_with_vaccine_peptides=variants_count_dict[ 'num_variants_with_vaccine_peptides']) # return variants, patient info, and command-line args data = { # TODO: # change this field to 'ranked_variants_with_vaccine_peptides' # but figure out how to do it in a backwards compatible way 'variants': ranked_variants_with_vaccine_peptides_for_report, 'patient_info': patient_info, 'args': vars(args), } logger.info('About to save args: %s', data['args']) # save JSON data if necessary. as of time of writing, vaxrank takes ~25 min to run, # most of which is core logic. the formatting is super fast, and it can # be useful to save the data to be able to iterate just on the formatting if args.output_json_file: with open(args.output_json_file, 'w') as f: f.write(serializable.to_json(data)) logger.info('Wrote JSON report data to %s', args.output_json_file) return data
def ranked_variant_list_with_metadata(args): """ Computes all the data needed for report generation. Parameters ---------- args : Namespace Parsed user args from this run Returns a dictionary containing 3 items: - ranked variant/vaccine peptide list - a dictionary of command-line arguments used to generate it - patient info object """ if hasattr(args, 'input_json_file'): with open(args.input_json_file) as f: data = serializable.from_json(f.read()) # the JSON data from the previous run will have the older args saved, which may need to # be overridden with args from this run (which all be output related) data['args'].update(vars(args)) # if we need to truncate the variant list based on max_mutations_in_report, do that here if len(data['variants']) > args.max_mutations_in_report: data['variants'] = data['variants'][:args. max_mutations_in_report] return data # get various things from user args mhc_alleles = mhc_alleles_from_args(args) logger.info("MHC alleles: %s", mhc_alleles) variants = variant_collection_from_args(args) logger.info("Variants: %s", variants) # generator that for each variant gathers all RNA reads, both those # supporting the variant and reference alleles reads_generator = allele_reads_generator_from_args(args) mhc_predictor = mhc_binding_predictor_from_args(args) ranked_list, variants_count_dict = ranked_vaccine_peptides( reads_generator=reads_generator, mhc_predictor=mhc_predictor, vaccine_peptide_length=args.vaccine_peptide_length, padding_around_mutation=args.padding_around_mutation, max_vaccine_peptides_per_variant=args. max_vaccine_peptides_per_mutation, min_alt_rna_reads=args.min_alt_rna_reads, min_variant_sequence_coverage=args.min_variant_sequence_coverage, min_epitope_score=args.min_epitope_score, num_mutant_epitopes_to_keep=args.num_epitopes_per_peptide, variant_sequence_assembly=args.variant_sequence_assembly) ranked_list_for_report = ranked_list[:args.max_mutations_in_report] patient_info = PatientInfo( patient_id=args.output_patient_id, vcf_paths=variants.sources, bam_path=args.bam, mhc_alleles=mhc_alleles, num_somatic_variants=len(variants), num_coding_effect_variants=variants_count_dict[ 'num_coding_effect_variants'], num_variants_with_rna_support=variants_count_dict[ 'num_variants_with_rna_support'], num_variants_with_vaccine_peptides=variants_count_dict[ 'num_variants_with_vaccine_peptides']) # return variants, patient info, and command-line args data = { 'variants': ranked_list_for_report, 'patient_info': patient_info, 'args': vars(args), } logger.info('About to save args: %s', data['args']) # save JSON data if necessary. as of time of writing, vaxrank takes ~25 min to run, # most of which is core logic. the formatting is super fast, and it can # be useful to save the data to be able to iterate just on the formatting if args.output_json_file: with open(args.output_json_file, 'w') as f: f.write(serializable.to_json(data)) logger.info('Wrote JSON report data to %s', args.output_json_file) return data
def ranked_variant_list_with_metadata(args): """ Computes all the data needed for report generation. Parameters ---------- args : Namespace Parsed user args from this run Returns a dictionary containing 3 items: - ranked variant/vaccine peptide list - a dictionary of command-line arguments used to generate it - patient info object """ if hasattr(args, 'input_json_file'): with open(args.input_json_file) as f: data = serializable.from_json(f.read()) # the JSON data from the previous run will have the older args saved, which may need to # be overridden with args from this run (which all be output related) data['args'].update(vars(args)) # if we need to truncate the variant list based on max_mutations_in_report, do that here if len(data['variants']) > args.max_mutations_in_report: data['variants'] = data['variants'][:args.max_mutations_in_report] return data # get various things from user args mhc_alleles = mhc_alleles_from_args(args) logger.info("MHC alleles: %s", mhc_alleles) variants = variant_collection_from_args(args) logger.info("Variants: %s", variants) # generator that for each variant gathers all RNA reads, both those # supporting the variant and reference alleles reads_generator = allele_reads_generator_from_args(args) mhc_predictor = mhc_binding_predictor_from_args(args) ranked_list, variants_count_dict = ranked_vaccine_peptides( reads_generator=reads_generator, mhc_predictor=mhc_predictor, vaccine_peptide_length=args.vaccine_peptide_length, padding_around_mutation=args.padding_around_mutation, max_vaccine_peptides_per_variant=args.max_vaccine_peptides_per_mutation, min_alt_rna_reads=args.min_alt_rna_reads, min_variant_sequence_coverage=args.min_variant_sequence_coverage, min_epitope_score=args.min_epitope_score, num_mutant_epitopes_to_keep=args.num_epitopes_per_peptide, variant_sequence_assembly=args.variant_sequence_assembly) ranked_list_for_report = ranked_list[:args.max_mutations_in_report] patient_info = PatientInfo( patient_id=args.output_patient_id, vcf_paths=variants.sources, bam_path=args.bam, mhc_alleles=mhc_alleles, num_somatic_variants=len(variants), num_coding_effect_variants=variants_count_dict['num_coding_effect_variants'], num_variants_with_rna_support=variants_count_dict['num_variants_with_rna_support'], num_variants_with_vaccine_peptides=variants_count_dict['num_variants_with_vaccine_peptides'] ) # return variants, patient info, and command-line args data = { 'variants': ranked_list_for_report, 'patient_info': patient_info, 'args': vars(args), } logger.info('About to save args: %s', data['args']) # save JSON data if necessary. as of time of writing, vaxrank takes ~25 min to run, # most of which is core logic. the formatting is super fast, and it can # be useful to save the data to be able to iterate just on the formatting if args.output_json_file: with open(args.output_json_file, 'w') as f: f.write(serializable.to_json(data)) logger.info('Wrote JSON report data to %s', args.output_json_file) return data