Пример #1
0
def test_rank_models():
    """Test to get the raw chromosome"""
    variant_dict = get_variant_dict(info="GeneticModels=1:AD|AD_dn,2:AR_hom")
    variant_line = get_variant_line(info="GeneticModels=1:AD|AD_dn,2:AR_hom")
    
    plugin = Plugin(
        name='genetic_models',
        field='INFO',
        data_type='string',
        info_key="GeneticModels",
        separators=[',', ':', '|'],
        dict_entry=True,
        string_rules={
            'AD_dn':1,
            'AD':2
        }
    )

    dict_entry = plugin.get_value(
        variant_dict=variant_dict,
        dict_key='1'
    )
    line_entry = plugin.get_value(
        variant_line=variant_line,
        dict_key='1'
    )

    assert dict_entry == 'AD'
    assert line_entry == 'AD'
Пример #2
0
def test_pos():
    """Test to get the raw chromosome"""
    plugin = Plugin(name='Pos', field='POS')
    test_pos = '1000'
    variant_line = get_variant_line(pos=test_pos)
    variant_dict = get_variant_dict(pos=test_pos)

    assert plugin.get_value(variant_line=variant_line) == test_pos
    assert plugin.get_value(variant_dict=variant_dict) == test_pos
Пример #3
0
def test_chrom():
    """Test to get the raw chromosome"""
    plugin = Plugin(name='Chrom', field='CHROM')
    test_chrom = '10'
    variant_line = get_variant_line(chrom=test_chrom)
    variant_dict = get_variant_dict(chrom=test_chrom)
    
    assert plugin.get_value(variant_line=variant_line) == test_chrom
    assert plugin.get_value(variant_dict=variant_dict) == test_chrom
Пример #4
0
def test_id_flag():
    """Test to get the raw chromosome"""
    plugin = Plugin(name='ID', field='ID')
    db_id = 'rs001'
    variant_line = get_variant_line(db_id=db_id)
    variant_dict = get_variant_dict(db_id=db_id)

    assert plugin.get_value(variant_line=variant_line) == db_id
    assert plugin.get_value(variant_dict=variant_dict) == db_id
Пример #5
0
def test_multiple_id_no_rule():
    """Test to get the raw chromosome"""
    plugin = Plugin(name='ID', field='ID', data_type='flag')
    db_id_1 = 'rs001'
    db_id_2 = 'rs002'
    db_id = "{0};{1}".format(db_id_1, db_id_2)
    variant_line = get_variant_line(db_id=db_id)
    variant_dict = get_variant_dict(db_id=db_id)

    assert plugin.get_value(variant_line=variant_line) == True
    assert plugin.get_value(variant_dict=variant_dict) == True
Пример #6
0
def test_flag():
    """Test to get the raw chromosome"""
    plugin = Plugin(
        name='DB',
        field='INFO',
        info_key="DB",
        data_type='flag',
        )
    
    variant_line = get_variant_line()
    variant_dict = get_variant_dict()

    assert plugin.get_value(variant_line=variant_line) == True
    assert plugin.get_value(variant_dict=variant_dict) == True
Пример #7
0
def test_rank_score_with_key():
    """Test to get the raw chromosome"""
    variant_dict = get_variant_dict(info="RankScore=1:12,2:11")
    variant_line = get_variant_line(info="RankScore=1:12,2:11")
    
    plugin = Plugin(
        name='rank_score',
        data_type='integer',
        field='INFO', 
        info_key="RankScore", 
        separators=[',', ':'],
        dict_entry=True
    )
    dict_entry = plugin.get_value(
        variant_dict=variant_dict,
        dict_key='1'
        )
    line_entry = plugin.get_value(
        variant_line=variant_line,
        dict_key='1'
    )
    
    assert dict_entry == 12
    assert line_entry == 12
Пример #8
0
def test_1000G_float():
    """Test to get the raw chromosome"""
    plugin = Plugin(
        name='thousand_g',
        field='INFO',
        info_key="1000GAF",
        separators=[','],
        data_type='float',
        )
    
    test_value = 0.744609
    variant_line = get_variant_line()
    variant_dict = get_variant_dict()
    
    assert plugin.get_value(variant_line=variant_line) == test_value
    assert plugin.get_value(variant_dict=variant_dict) == test_value
Пример #9
0
def test_1000G_record_rule():
    """Test to get the raw chromosome"""
    plugin = Plugin(
        name='thousand_g',
        field='INFO',
        info_key="1000GAF",
        separators=[','],
        data_type='float',
        record_rule='min',
        )
    info = "1000GAF=0.744609,0.02;AC=2;AF=1.00;AN=2"

    variant_line = get_variant_line(info=info)
    variant_dict = get_variant_dict(info=info)

    assert plugin.get_value(variant_line=variant_line) == 0.02
    assert plugin.get_value(variant_dict=variant_dict) == 0.02
Пример #10
0
def test_filter_min_rule():
    """Test to get the raw chromosome"""
    plugin = Plugin(
        name='Filter',
        field='FILTER',
        data_type='string',
        string_rules={
            'PASS':2,
            'NOT_PASS':1
        },
        record_rule='min'
        )
    filt = "PASS;NOT_PASS"
    
    variant_line = get_variant_line(filt=filt)
    variant_dict = get_variant_dict(filt=filt)

    assert plugin.get_value(variant_line=variant_line) == 'NOT_PASS'
    assert plugin.get_value(variant_dict=variant_dict) == 'NOT_PASS'
Пример #11
0
def filter(variant_file, annotation, threshold, discard, greater, silent,
           outfile):
    """
    Filter vcf variants.
    
    Filter variants based on their annotation
    """
    logger.info("Running genmod filter version {0}".format(__version__))
    variant_file = get_file_handle(variant_file)
    start_time_analysis = datetime.now()

    logger.info("Initializing a Header Parser")
    head = HeaderParser()

    for line in variant_file:
        line = line.rstrip()

        if line.startswith('#'):
            if line.startswith('##'):
                head.parse_meta_data(line)
            else:
                head.parse_header_line(line)
        else:
            break

    #Add the first variant to the iterator
    variant_file = itertools.chain([line], variant_file)

    header_line = head.header

    if not annotation in head.info_dict:
        logger.warning(
            "Annotation {0} not specified in header".format(annotation))
        logger.info("Please check VCF file")
        logger.info("Exiting...")
        sys.exit(1)

    logger.info(
        "Building a plugin from extract_vcf for {0}".format(annotation))
    annotation_plugin = Plugin(name=annotation,
                               field='INFO',
                               info_key=annotation,
                               separators=[','],
                               record_rule='min',
                               data_type='float')
    logger.debug("Plugin=(field={0},info_key={1},separators={2},record_rule={3}"\
    ",data_type={4})".format('INFO', annotation, "','", 'min', 'float'))

    print_headers(head=head, outfile=outfile, silent=silent)

    nr_of_variants = 0
    nr_of_passed_variants = 0
    for variant in variant_file:
        nr_of_variants += 1
        keep_variant = False
        value = annotation_plugin.get_value(variant_line=variant)
        logger.debug("Found value {0}".format(value))
        if value:
            if greater:
                if value > threshold:
                    keep_variant = True
            else:
                if value < threshold:
                    keep_variant = True
        else:
            if not discard:
                keep_variant = True

        if keep_variant:
            logger.debug("Keeping variant")
            nr_of_passed_variants += 1
            print_variant(variant_line=variant,
                          outfile=outfile,
                          mode='vcf',
                          silent=silent)
        else:
            logger.debug("Discarding variant")

    logger.info("Number of variants in file {0}".format(nr_of_variants))
    logger.info(
        "Number of variants passing filter {0}".format(nr_of_passed_variants))
    logger.info(
        "Number of variants filtered {0}".format(nr_of_variants -
                                                 nr_of_passed_variants))
Пример #12
0
def filter(variant_file, annotation, threshold, discard, greater, silent, outfile):
    """
    Filter vcf variants.
    
    Filter variants based on their annotation
    """
    logger.info("Running genmod filter version {0}".format(__version__))
    
    start_time_analysis = datetime.now()
    
    logger.info("Initializing a Header Parser")
    head = HeaderParser()
    
    for line in variant_file:
        line = line.rstrip()

        if line.startswith('#'):
            if line.startswith('##'):
                head.parse_meta_data(line)
            else:
                head.parse_header_line(line)
        else:
            break
    
    #Add the first variant to the iterator
    variant_file = itertools.chain([line], variant_file)
    
    header_line = head.header
    
    if not annotation in head.info_dict:
        logger.warning("Annotation {0} not specified in header".format(annotation))
        logger.info("Please check VCF file")
        logger.info("Exiting...")
        sys.exit(1)
    
    logger.info("Building a plugin from extract_vcf for {0}".format(annotation))
    annotation_plugin = Plugin(
        name=annotation, 
        field='INFO',
        info_key=annotation,
        separators = [','],
        record_rule = 'min',
        data_type = 'float'
    )
    logger.debug("Plugin=(field={0},info_key={1},separators={2},record_rule={3}"\
    ",data_type={4})".format('INFO', annotation, "','", 'min', 'float'))
    
    print_headers(head=head, outfile=outfile, silent=silent)
    
    nr_of_variants = 0
    nr_of_passed_variants = 0
    for variant in variant_file:
        nr_of_variants += 1
        keep_variant = False
        value = annotation_plugin.get_value(variant_line=variant)
        logger.debug("Found value {0}".format(value))
        if value:
            if greater:
                if value > threshold:
                    keep_variant = True
            else:
                if value < threshold:
                    keep_variant = True
        else:
            if not discard:
                keep_variant = True
        
        if keep_variant:
            logger.debug("Keeping variant")
            nr_of_passed_variants += 1
            print_variant(
                variant_line=variant, 
                outfile=outfile, 
                mode='vcf', 
                silent=silent
            )
        else:
            logger.debug("Discarding variant")
            

    logger.info("Number of variants in file {0}".format(nr_of_variants))
    logger.info("Number of variants passing filter {0}".format(nr_of_passed_variants))
    logger.info("Number of variants filtered {0}".format(
        nr_of_variants - nr_of_passed_variants))