def test_serialization(): original = VariantCollection([ Variant( 1, start=10, ref="AA", alt="AAT", ensembl=77), Variant(10, start=15, ref="A", alt="G"), Variant(20, start=150, ref="", alt="G"), ]) original.metadata[original[0]] = {"a": "b"} original.metadata[original[2]] = {"bar": 2} # This causes the variants' ensembl objects to make a SQL connection, # which makes the ensembl object non-serializable. By calling this # method, we are checking that we don't attempt to directly serialize # the ensembl object. original.effects() # Test pickling. serialized = pickle.dumps(original) reconstituted = pickle.loads(serialized) eq_(original, reconstituted) eq_(reconstituted[0], original[0]) eq_(reconstituted.metadata[original[0]], original.metadata[original[0]]) # Test json. serialized = original.to_json() reconstituted = VariantCollection.from_json(serialized) eq_(original, reconstituted) eq_(reconstituted[0], original[0]) eq_(reconstituted.metadata[original[0]], original.metadata[original[0]])
def variant_collection_from_args(args): variant_collections = [] if args.reference_name: genome = genome_for_reference_name(args.reference_name) else: # no genome specified, assume it can be inferred from the file(s) # we're loading genome = None for vcf_path in args.vcf: vcf_variants = load_vcf(vcf_path, genome=genome) variant_collections.append(vcf_variants) for maf_path in args.maf: maf_variants = load_maf(maf_path) variant_collections.append(maf_variants) if args.variant: if not genome: raise ValueError( "--reference-name must be specified when using --variant") variants = [ Variant( chromosome, start=position, ref=ref, alt=alt, ensembl=genome) for (chromosome, position, ref, alt) in args.variant ] variant_collection = VariantCollection(variants) variant_collections.append(variant_collection) if len(variant_collections) == 0: raise ValueError( "No variants loaded (use --maf, --vcf, or --variant options)") for json_path in args.json_variant_files: with open(json_path, 'r') as f: json_string = f.read() variant_collections.append( VariantCollection.from_json(json_string)) if len(variant_collections) == 0: raise ValueError( "No variants loaded (use --maf, --vcf, --json-variants options)") elif len(variant_collections) == 1: return variant_collections[0] else: combined_variants = [] for variant_collection in variant_collections: combined_variants.extend(list(variant_collection)) return VariantCollection(combined_variants)
def variant_collection_from_args(args): variant_collections = [] if args.reference_name: genome = genome_for_reference_name(args.reference_name) else: # no genome specified, assume it can be inferred from the file(s) # we're loading genome = None for vcf_path in args.vcf: vcf_variants = load_vcf(vcf_path, genome=genome) variant_collections.append(vcf_variants) for maf_path in args.maf: maf_variants = load_maf(maf_path) variant_collections.append(maf_variants) if args.variant: if not genome: raise ValueError( "--reference-name must be specified when using --variant") variants = [ Variant(chromosome, start=position, ref=ref, alt=alt, ensembl=genome) for (chromosome, position, ref, alt) in args.variant ] variant_collection = VariantCollection(variants) variant_collections.append(variant_collection) if len(variant_collections) == 0: raise ValueError( "No variants loaded (use --maf, --vcf, or --variant options)") for json_path in args.json_variant_files: with open(json_path, 'r') as f: json_string = f.read() variant_collections.append( VariantCollection.from_json(json_string)) if len(variant_collections) == 0: raise ValueError( "No variants loaded (use --maf, --vcf, --json-variants options)") elif len(variant_collections) == 1: return variant_collections[0] else: combined_variants = [] for variant_collection in variant_collections: combined_variants.extend(list(variant_collection)) return VariantCollection(combined_variants)
def test_variant_collection_serialization(): variant_list = [ Variant(1, start=10, ref="AA", alt="AAT"), Variant(10, start=15, ref="A", alt="G"), Variant(20, start=150, ref="", alt="G"), ] original = VariantCollection( variant_list, source_to_metadata_dict={ "test_data": {variant: { "a": "b", "bar": 2 } for variant in variant_list} }) # This causes the variants' ensembl objects to make a SQL connection, # which makes the ensembl object non-serializable. By calling this # method, we are checking that we don't attempt to directly serialize # the ensembl object. original.effects() original_first_variant = original[0] original_metadata = original.metadata # Test pickling reconstructed = pickle.loads(pickle.dumps(original)) eq_(original, reconstructed) eq_(reconstructed[0], original_first_variant) eq_(reconstructed.metadata[original_first_variant], original_metadata[original_first_variant]) merged = original.intersection(original) merged_reconstructed = pickle.loads(pickle.dumps(merged)) eq_(merged, merged_reconstructed) # Test JSON serialization variants_from_json = VariantCollection.from_json(original.to_json()) eq_(original, variants_from_json) eq_(variants_from_json[0], original_first_variant) # pylint: disable=no-member eq_(variants_from_json.metadata[original_first_variant], original_metadata[original_first_variant])
def test_variant_collection_serialization(): variant_list = [ Variant( 1, start=10, ref="AA", alt="AAT"), Variant(10, start=15, ref="A", alt="G"), Variant(20, start=150, ref="", alt="G"), ] original = VariantCollection( variant_list, source_to_metadata_dict={ "test_data": {variant: {"a": "b", "bar": 2} for variant in variant_list}}) # This causes the variants' ensembl objects to make a SQL connection, # which makes the ensembl object non-serializable. By calling this # method, we are checking that we don't attempt to directly serialize # the ensembl object. original.effects() original_first_variant = original[0] original_metadata = original.metadata # Test pickling reconstructed = pickle.loads(pickle.dumps(original)) eq_(original, reconstructed) eq_(reconstructed[0], original_first_variant) eq_(reconstructed.metadata[original_first_variant], original_metadata[original_first_variant]) merged = original.intersection(original) merged_reconstructed = pickle.loads(pickle.dumps(merged)) eq_(merged, merged_reconstructed) # Test JSON serialization variants_from_json = VariantCollection.from_json(original.to_json()) eq_(original, variants_from_json) eq_(variants_from_json[0], original_first_variant) # pylint: disable=no-member eq_(variants_from_json.metadata[original_first_variant], original_metadata[original_first_variant])