def test_get_first_id(self):
        """get_first_id should identify first id in fasta file"""
        lines = """>S74_1 E86FECS01CEVAV orig_bc=ACATGTCACGTG new_bc=ACATGTCACGTG bc_diffs=0
CTCCTC
>Unassigned_2 E86FECS01EKKMF orig_bc=AGCGCTGATGTA new_bc=None bc_diffs=1
GGTGCCTCCCTCGC
>S80_3 E86FECS01EKKMF orig_bc=AGCGCTGATGTA new_bc=None bc_diffs=1
GGTGCCTCCCTCGC
>S80_4 E86FECS01CW66X orig_bc=AGTCCATAGCTG new_bc=AGTCCATAGCTG bc_diffs=0
GTCCTGGCAG""".splitlines()
        self.assertEqual(get_first_id(lines), set(["S74_1", "Unassigned_2", "S80_3", "S80_4"]))
    def test_get_first_id(self):
        """get_first_id should identify first id in fasta file"""
        lines = """>S74_1 E86FECS01CEVAV orig_bc=ACATGTCACGTG new_bc=ACATGTCACGTG bc_diffs=0
CTCCTC
>Unassigned_2 E86FECS01EKKMF orig_bc=AGCGCTGATGTA new_bc=None bc_diffs=1
GGTGCCTCCCTCGC
>S80_3 E86FECS01EKKMF orig_bc=AGCGCTGATGTA new_bc=None bc_diffs=1
GGTGCCTCCCTCGC
>S80_4 E86FECS01CW66X orig_bc=AGTCCATAGCTG new_bc=AGTCCATAGCTG bc_diffs=0
GTCCTGGCAG""".splitlines()
        self.assertEqual(get_first_id(lines), set(['S74_1','Unassigned_2','S80_3','S80_4']))
Exemplo n.º 3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    options, args = option_parser.parse_args()
    if options.debug:
        print "PRODUCING DEBUG OUTPUT"

    bad_seq_ids = set()
    bad_otu_ids = None

    # if we got a file to screen against, find the relevant ids and delete them
    if options.screened_rep_seqs:
        bad_otu_ids = get_first_id(open(options.screened_rep_seqs, 'U'))
        if not options.otus:
            raise RuntimeError(
                "Must specify an OTU file if performing a screen.")
        for line in open(options.otus, 'U'):
            fields = line.split()
            if fields[0] in bad_otu_ids:
                bad_seq_ids.update(fields[1:])

    if options.debug:
        if bad_otu_ids is not None:
            print "Found %s bad otu ids: %s" % (len(bad_otu_ids), bad_otu_ids)
        print "Found %s bad seq ids: %s" % (len(bad_seq_ids), bad_seq_ids)

    ids = get_ids(open(options.in_fasta, 'U'), options.field, bad_seq_ids,
                  options.debug)

    # add empty unassigned ids for file creation
    if 'Unassigned' not in ids:
        ids['Unassigned'] = []

    if not exists(options.outdir):
        makedirs(options.outdir)
    for k, idlist in ids.items():
        outfile = open(join(options.outdir, k + '.txt'), 'w')
        outfile.write('\n'.join(sorted(idlist)))
        outfile.close()