示例#1
0
def test_serialization():
    original = VariantCollection([
            Variant(
                1, start=10, ref="AA", alt="AAT", ensembl=77),
            Variant(10, start=15, ref="A", alt="G"),
            Variant(20, start=150, ref="", alt="G"),
    ])
    original.metadata[original[0]] = {"a": "b"}
    original.metadata[original[2]] = {"bar": 2}

    # This causes the variants' ensembl objects to make a SQL connection,
    # which makes the ensembl object non-serializable. By calling this
    # method, we are checking that we don't attempt to directly serialize
    # the ensembl object.
    original.effects()

    # Test pickling.
    serialized = pickle.dumps(original)
    reconstituted = pickle.loads(serialized)
    eq_(original, reconstituted)
    eq_(reconstituted[0], original[0])
    eq_(reconstituted.metadata[original[0]], original.metadata[original[0]])

    # Test json.
    serialized = original.to_json()
    reconstituted = VariantCollection.from_json(serialized)
    eq_(original, reconstituted)
    eq_(reconstituted[0], original[0])
    eq_(reconstituted.metadata[original[0]], original.metadata[original[0]])
示例#2
0
def variant_collection_from_args(args):
    variant_collections = []

    if args.reference_name:
        genome = genome_for_reference_name(args.reference_name)
    else:
        # no genome specified, assume it can be inferred from the file(s)
        # we're loading
        genome = None

    for vcf_path in args.vcf:
        vcf_variants = load_vcf(vcf_path, genome=genome)
        variant_collections.append(vcf_variants)
    for maf_path in args.maf:
        maf_variants = load_maf(maf_path)
        variant_collections.append(maf_variants)

    if args.variant:
        if not genome:
            raise ValueError(
                "--reference-name must be specified when using --variant")

        variants = [
            Variant(
                chromosome,
                start=position,
                ref=ref,
                alt=alt,
                ensembl=genome)
            for (chromosome, position, ref, alt)
            in args.variant
        ]
        variant_collection = VariantCollection(variants)
        variant_collections.append(variant_collection)

    if len(variant_collections) == 0:
        raise ValueError(
            "No variants loaded (use --maf, --vcf, or --variant options)")

    for json_path in args.json_variant_files:
        with open(json_path, 'r') as f:
            json_string = f.read()
            variant_collections.append(
                VariantCollection.from_json(json_string))
    if len(variant_collections) == 0:
        raise ValueError(
            "No variants loaded (use --maf, --vcf, --json-variants options)")
    elif len(variant_collections) == 1:
        return variant_collections[0]
    else:
        combined_variants = []
        for variant_collection in variant_collections:
            combined_variants.extend(list(variant_collection))
        return VariantCollection(combined_variants)
示例#3
0
def variant_collection_from_args(args):
    variant_collections = []

    if args.reference_name:
        genome = genome_for_reference_name(args.reference_name)
    else:
        # no genome specified, assume it can be inferred from the file(s)
        # we're loading
        genome = None

    for vcf_path in args.vcf:
        vcf_variants = load_vcf(vcf_path, genome=genome)
        variant_collections.append(vcf_variants)
    for maf_path in args.maf:
        maf_variants = load_maf(maf_path)
        variant_collections.append(maf_variants)

    if args.variant:
        if not genome:
            raise ValueError(
                "--reference-name must be specified when using --variant")

        variants = [
            Variant(chromosome,
                    start=position,
                    ref=ref,
                    alt=alt,
                    ensembl=genome)
            for (chromosome, position, ref, alt) in args.variant
        ]
        variant_collection = VariantCollection(variants)
        variant_collections.append(variant_collection)

    if len(variant_collections) == 0:
        raise ValueError(
            "No variants loaded (use --maf, --vcf, or --variant options)")

    for json_path in args.json_variant_files:
        with open(json_path, 'r') as f:
            json_string = f.read()
            variant_collections.append(
                VariantCollection.from_json(json_string))
    if len(variant_collections) == 0:
        raise ValueError(
            "No variants loaded (use --maf, --vcf, --json-variants options)")
    elif len(variant_collections) == 1:
        return variant_collections[0]
    else:
        combined_variants = []
        for variant_collection in variant_collections:
            combined_variants.extend(list(variant_collection))
        return VariantCollection(combined_variants)
示例#4
0
def test_variant_collection_serialization():
    variant_list = [
        Variant(1, start=10, ref="AA", alt="AAT"),
        Variant(10, start=15, ref="A", alt="G"),
        Variant(20, start=150, ref="", alt="G"),
    ]
    original = VariantCollection(
        variant_list,
        source_to_metadata_dict={
            "test_data":
            {variant: {
                "a": "b",
                "bar": 2
            }
             for variant in variant_list}
        })

    # This causes the variants' ensembl objects to make a SQL connection,
    # which makes the ensembl object non-serializable. By calling this
    # method, we are checking that we don't attempt to directly serialize
    # the ensembl object.
    original.effects()

    original_first_variant = original[0]
    original_metadata = original.metadata

    # Test pickling
    reconstructed = pickle.loads(pickle.dumps(original))
    eq_(original, reconstructed)
    eq_(reconstructed[0], original_first_variant)
    eq_(reconstructed.metadata[original_first_variant],
        original_metadata[original_first_variant])

    merged = original.intersection(original)
    merged_reconstructed = pickle.loads(pickle.dumps(merged))
    eq_(merged, merged_reconstructed)

    # Test JSON serialization
    variants_from_json = VariantCollection.from_json(original.to_json())
    eq_(original, variants_from_json)

    eq_(variants_from_json[0], original_first_variant)

    # pylint: disable=no-member
    eq_(variants_from_json.metadata[original_first_variant],
        original_metadata[original_first_variant])
def test_variant_collection_serialization():
    variant_list = [
        Variant(
            1, start=10, ref="AA", alt="AAT"),
        Variant(10, start=15, ref="A", alt="G"),
        Variant(20, start=150, ref="", alt="G"),
    ]
    original = VariantCollection(
        variant_list,
        source_to_metadata_dict={
            "test_data":
                {variant: {"a": "b", "bar": 2} for variant in variant_list}})

    # This causes the variants' ensembl objects to make a SQL connection,
    # which makes the ensembl object non-serializable. By calling this
    # method, we are checking that we don't attempt to directly serialize
    # the ensembl object.
    original.effects()

    original_first_variant = original[0]
    original_metadata = original.metadata

    # Test pickling
    reconstructed = pickle.loads(pickle.dumps(original))
    eq_(original, reconstructed)
    eq_(reconstructed[0], original_first_variant)
    eq_(reconstructed.metadata[original_first_variant],
        original_metadata[original_first_variant])

    merged = original.intersection(original)
    merged_reconstructed = pickle.loads(pickle.dumps(merged))
    eq_(merged, merged_reconstructed)

    # Test JSON serialization
    variants_from_json = VariantCollection.from_json(original.to_json())
    eq_(original, variants_from_json)

    eq_(variants_from_json[0], original_first_variant)

    # pylint: disable=no-member
    eq_(variants_from_json.metadata[original_first_variant],
        original_metadata[original_first_variant])