Exemplo n.º 1
0
def test_parse_accession_taxa_table5(ncbi_taxa_file):
    acc_ids = {'XM_006695435'}
    iterator = parse_accession_taxa_table(ncbi_taxa_file,
                                          key=0,
                                          value=1,
                                          acc_ids=acc_ids)
    iterator = list(acc_id for acc_id, taxon_id in iterator)
    assert list(acc_ids) == iterator
Exemplo n.º 2
0
def filter_taxa_command(verbose, table, taxonomy, include_taxon_id,
                        include_taxon_name, exclude_taxon_id,
                        exclude_taxon_name, progress, input_file, output_file):
    mgkit.logger.config_log(level=logging.DEBUG if verbose else logging.INFO)

    LOG.info('Writing to file (%s)',
             getattr(output_file, 'name', repr(output_file)))

    taxonomy = taxon.Taxonomy(taxonomy)

    exclude_ids = validate_taxon_ids(exclude_taxon_id, taxonomy) | \
        validate_taxon_names(exclude_taxon_name, taxonomy)

    include_ids = validate_taxon_ids(include_taxon_id, taxonomy) | \
        validate_taxon_names(include_taxon_name, taxonomy)

    if exclude_ids:
        LOG.info("Excluding Taxa: %s", exclude_ids)
        exclude_func = functools.partial(filter_taxon_by_id_list,
                                         filter_list=exclude_ids,
                                         exclude=True,
                                         func=functools.partial(
                                             taxon.is_ancestor, taxonomy))
        exclude_func = memoize(exclude_func)
    else:
        exclude_func = None
    if include_ids:
        LOG.info("Only include Taxa: %s", include_ids)
        include_func = functools.partial(filter_taxon_by_id_list,
                                         filter_list=include_ids,
                                         exclude=False,
                                         func=functools.partial(
                                             taxon.is_ancestor, taxonomy))
        include_func = memoize(include_func)
    else:
        include_func = None

    if table:
        iterator = blast.parse_accession_taxa_table(input_file,
                                                    key=0,
                                                    value=1,
                                                    num_lines=None)
        if progress:
            iterator = tqdm(iterator)
        for acc_id, taxon_id in iterator:
            if include_func is not None:
                if not include_func(taxon_id):
                    continue
            if exclude_func is not None:
                if not exclude_func(taxon_id):
                    continue
            output_file.write("{}\t{}\n".format(acc_id,
                                                taxon_id).encode('ascii'))
    else:
        iterator = gff.parse_gff(input_file)
        if progress:
            iterator = tqdm(iterator)
        for annotation in iterator:
            if annotation.taxon_id is None:
                continue
            if include_func is not None:
                if not include_func(annotation.taxon_id):
                    continue
            if exclude_func is not None:
                if not exclude_func(annotation.taxon_id):
                    continue
            annotation.to_file(output_file)
Exemplo n.º 3
0
def test_parse_accession_taxa_table1(ncbi_taxa_file):
    assert next(
        parse_accession_taxa_table(ncbi_taxa_file,
                                   key=0,
                                   value=1,
                                   no_zero=False))[0] == 'KH113978'
Exemplo n.º 4
0
def test_parse_accession_taxa_table3(ncbi_taxa_file):
    assert next(
        parse_accession_taxa_table(ncbi_taxa_file,
                                   key=0,
                                   value=1,
                                   no_zero=True))[0] == 'XM_006695435'
Exemplo n.º 5
0
def test_parse_accession_taxa_table2(ncbi_taxa_file):
    assert next(
        parse_accession_taxa_table(ncbi_taxa_file,
                                   key=0,
                                   value=1,
                                   no_zero=False))[1] == 0