Exemplo n.º 1
0
 def test_can_find_chrom_using_legacy_argument_name(self):
     line = '1\t100\tA\tC\t1'
     special_parser = parsers.GenericGwasLineParser(chr_col=1, pos_col=2, ref_col=3, alt_col=4,
                                                    pvalue_col=5, is_neg_log_pvalue=True,
                                                    delimiter='\t')
     p = special_parser(line)
     assert p.chrom == '1'
Exemplo n.º 2
0
def standard_gwas_parser_basic():
    return parsers.GenericGwasLineParser(chrom_col=1,
                                         pos_col=2,
                                         ref_col=3,
                                         alt_col=4,
                                         pvalue_col=5,
                                         is_neg_log_pvalue=True,
                                         delimiter='\t')
Exemplo n.º 3
0
 def test_can_convert_to_neglogpvalue(self):
     line = '1\t100\tA\tC\t1'
     special_parser = parsers.GenericGwasLineParser(chrom_col=1, pos_col=2, ref_col=3, alt_col=4,
                                                    pvalue_col=5, is_neg_log_pvalue=True,
                                                    delimiter='\t')
     p = special_parser(line)
     assert p.neg_log_pvalue == pytest.approx(1), 'Converts -log to pvalue'
     assert p.pvalue == pytest.approx(0.1), 'Converts -log to pvalue'
Exemplo n.º 4
0
 def test_can_convert_to_logpvalue_using_legacy_argument_names(self):
     line = '1\t100\tA\tC\t1'
     special_parser = parsers.GenericGwasLineParser(chrom_col=1, pos_col=2, ref_col=3, alt_col=4,
                                                    pval_col=5, is_log_pval=True,
                                                    delimiter='\t')
     p = special_parser(line)
     assert p.neg_log_pvalue == pytest.approx(1), 'Parses -logp as is'
     assert p.pvalue == pytest.approx(0.1), 'Converts -log to pvalue'
Exemplo n.º 5
0
 def test_gets_marker_info_from_hybrid_fields(self):
     line = 'chr2:100_NA_NA\tA\tC\t.05'
     special_parser = parsers.GenericGwasLineParser(marker_col=1, ref_col=2, alt_col=3, pval_col=4)
     p = special_parser(line)
     assert p.chrom == '2', 'Read chrom from marker'
     assert p.pos == 100, 'Read pos from marker'
     assert p.ref == 'A', 'Read ref from column and ignored marker value'
     assert p.alt == 'C', 'Read alt from column and ignored marker value'
Exemplo n.º 6
0
 def test_warns_about_incorrect_delimiter(self):
     """
     Regression test: human-edited files may have a mix of tabs and spaces; this is hard to spot!
     """
     line = 'chr2:100:A:C_anno\t.05'
     special_parser = parsers.GenericGwasLineParser(marker_col=1, pvalue_col=2, delimiter=' ')
     with pytest.raises(exceptions.LineParseException, match="delimiter"):
         special_parser(line)
Exemplo n.º 7
0
 def test_parses_marker_to_clean_format(self):
     line = 'chr2:100:A:C_anno\t.05'
     special_parser = parsers.GenericGwasLineParser(marker_col=1, pvalue_col=2, delimiter='\t')
     p = special_parser(line)
     assert p.chrom == '2', 'Finds chromosome'
     assert p.pos == 100, 'Finds position'
     assert p.ref == 'A', 'Finds ref'
     assert p.alt == 'C', 'Finds alt'
     assert p.marker == '2:100_A/C', 'Turns a messy marker into a cleaned standardized format'
Exemplo n.º 8
0
 def test_parses_rsid_to_clean_format(self):
     scenarios = [
         ('chrx\t100\t.05\trs12', 'rs12'),  # Handles valid rsid as given
         ('chrx\t100\t.05\tNA', None),  # Missing values
         ('chrx\t100\t.05\t99', 'rs99'),  # Ensures prefix is present
     ]
     parser = parsers.GenericGwasLineParser(chrom_col=1, pos_col=2, pvalue_col=3, rsid_col=4, delimiter='\t')
     for text, expected in scenarios:
         actual = parser(text).rsid
         assert actual == expected, 'Found correct rsid from: {}'.format(text)
Exemplo n.º 9
0
def standard_gwas_parser():
    return parsers.GenericGwasLineParser(chrom_col=1,
                                         pos_col=2,
                                         ref_col=3,
                                         alt_col=4,
                                         pvalue_col=5,
                                         is_neg_log_pvalue=True,
                                         beta_col=6,
                                         stderr_beta_col=7,
                                         allele_freq_col=8,
                                         is_alt_effect=True,
                                         delimiter='\t')
Exemplo n.º 10
0
def main(source: ty.Union[str, ty.Iterable],
         out_fn: ty.Union[str, None],
         parser_options: dict,
         auto_config=False,
         skip_rows=None,
         skip_errors=True,
         max_errors=100,
         make_tabix: bool = False):
    try:
        parser = parsers.GenericGwasLineParser(**parser_options)
    except exceptions.ConfigurationException:
        parser = None

    if source is None:
        source = sys.stdin

    if not auto_config and (skip_rows is None or parser is None):
        logger.error(
            'Please provide all options required to parse the file, or use the --auto flag to guess'
        )
        sys.exit(1)

    # Guess how to read the file. If no parser was provided, try to guess columns.
    reader = sniffers.guess_gwas_generic(source,
                                         skip_rows=skip_rows,
                                         parser=parser,
                                         parser_options=parser_options,
                                         skip_errors=skip_errors,
                                         max_errors=max_errors)

    try:
        dest_fn = reader.write(out_fn, make_tabix=make_tabix) or 'console'
    except exceptions.TooManyBadLinesException:
        logger.error('ERROR: Too many lines failed to parse; stopping.')
    except Exception:
        logger.exception('Conversion failed due to unknown error')
    else:
        logger.info(
            'Conversion succeeded! Results written to: {}'.format(dest_fn))
    finally:
        for n, reason, _ in reader.errors:
            logger.error(
                'Excluded row {} from output due to parse error: {}'.format(
                    n, reason))
Exemplo n.º 11
0
    def test_validates_frequency_fields(self):
        with pytest.raises(exceptions.ConfigurationException, match='mutually exclusive'):
            parsers.GenericGwasLineParser(marker_col=1, pvalue_col=2, allele_count_col=3, allele_freq_col=4)

        with pytest.raises(exceptions.ConfigurationException, match='n_samples'):
            parsers.GenericGwasLineParser(marker_col=1, pvalue_col=2, allele_count_col=3, n_samples_col=None)
Exemplo n.º 12
0
 def test_validates_that_allele_spec_is_none_or_both(self):
     with pytest.raises(exceptions.ConfigurationException, match='all required'):
         parsers.GenericGwasLineParser(marker_col=1, ref_col=3, pvalue_col=None)
Exemplo n.º 13
0
 def test_validates_arguments_required_fields(self):
     with pytest.raises(exceptions.ConfigurationException, match='all required'):
         parsers.GenericGwasLineParser(marker_col=1, pvalue_col=None)
Exemplo n.º 14
0
 def test_parses_freq_from_freq(self):
     line = 'chr2:100:A:C_anno\t.05\t0.25'
     special_parser = parsers.GenericGwasLineParser(marker_col=1, pvalue_col=2,
                                                    allele_freq_col=3, is_alt_effect=True)
     p = special_parser(line)
     assert p.alt_allele_freq == 0.25, "Parses frequency as is"
Exemplo n.º 15
0
 def test_parses_freq_from_counts(self):
     line = 'chr2:100:A:C_anno\t.05\t25\t100'
     special_parser = parsers.GenericGwasLineParser(marker_col=1, pvalue_col=2,
                                                    allele_count_col=3, n_samples_col=4, is_alt_effect=False)
     p = special_parser(line)
     assert p.alt_allele_freq == 0.875, "Calculates frequency from counts and orients to alt allele"
Exemplo n.º 16
0
 def test_parses_chr_to_clean_format(self):
     line = 'chrx\t100\t.05'
     special_parser = parsers.GenericGwasLineParser(chrom_col=1, pos_col=2, pvalue_col=3, delimiter='\t')
     p = special_parser(line)
     assert p.chrom == 'X', 'Strips prefix from chromosome labels and always uses uppercase letters'