def main(): import argparse parser = argparse.ArgumentParser('Harness for alignment free homology.', add_help=False, epilog='') parser.add_argument('--input', type=argparse.FileType('rb'), default=sys.stdin, help='Input file e.g. hg18.toDanRer5.seqs.txt.') parser.add_argument('-a', type=int, default=6, help='Field number of A (training) sequences.') parser.add_argument('-b', type=int, default=8, help='Field number of B (test) sequences.') parser.add_argument('-c', type=int, default=5, help='Field number of test coordinates.') parser.add_argument('--valid', type=int, default=9, help='Field number of valid test coordinates.') parser.add_argument('-l', type=int, default=None, help='Length of scanning window. Defaults to the average of training sequences.') subparsers = parser.add_subparsers(help='Model algorithm to use.', dest='model') d2z_parser = subparsers.add_parser('d2z', help='D2z scoring metric.') d2z_parser = add_d2z_arguments(d2z_parser) hexmcd_parser = subparsers.add_parser('hexmcd', help='HexMCD algorithm.') hexmcd_parser = add_hexmcd_arguments(hexmcd_parser) # Add more parsers here. OPTS = parser.parse_args() line_tups = read_fields(f=OPTS.input) if OPTS.model == 'd2z': m = D2z(**vars(OPTS)) if OPTS.model == 'hexmcd': m = HexMCD(bg_list=[l[6] for l in line_tups], smoothing='ones', **vars(OPTS)) row_search(OPTS, m, line_tups)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('identifier', help='Identifier name e.g. cne.100899.FST.') parser.add_argument('-f', '--file', nargs='?', type=argparse.FileType('r'), default=sys.stdin, help='Input file e.g. d2z.dat.') parser.add_argument('--extra_data', type=argparse.FileType('r'), default='hg18.toDanRer5.seqs.txt', help='Extra data file e.g. hg18.toDanRer5.seqs.txt.') parser.add_argument('--valid', type=int, default=9, help='Field number of valid test coordinates.') OPTS = parser.parse_args() cne_dict = parse_dat(read_fields(f=OPTS.file)) line_tups = read_fields(f=OPTS.extra_data) cne_valids = {} for l in line_tups: danrer_co, valid_co = parse_coords(l[4]), parse_coords(l[OPTS.valid-1]) valid_indices = (valid_co['start'] - danrer_co['start'], valid_co['end'] - danrer_co['start']) cne_valids[l[0]] = valid_indices plot_cne(OPTS.identifier, cne_dict[OPTS.identifier], cne_valids[OPTS.identifier])
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('-f', '--file', nargs='?', type=argparse.FileType('r'), default=sys.stdin, help='Input file e.g. d2z.dat.') parser.add_argument('--extra_data', type=argparse.FileType('r'), default='hg18.toDanRer5.seqs.txt', help='Extra data file e.g. hg18.toDanRer5.seqs.txt.') parser.add_argument('scoring', choices=['ranked_peaks', 'overlap']) OPTS = parser.parse_args() line_tups = read_fields(f=OPTS.file) cne_dict = parse_dat(line_tups) extra = parse_extra_data(read_fields(f=OPTS.extra_data)) if OPTS.scoring == 'ranked_peaks': results = ranked_peaks(cne_dict, extra) elif OPTS.scoring == 'overlap': results = overlap(cne_dict, extra) for cne, result in sorted(results.iteritems()): sys.stdout.write('\t'.join([cne, str(result['rank']), str(result['places'])]) + '\n')
def main(): import argparse parser = argparse.ArgumentParser(description='Compute d2z scores.') parser = add_d2z_arguments(parser, main=True) OPTS = parser.parse_args() line_tups = read_fields() a_seqs = [l[OPTS.a-1] for l in line_tups] clf = D2z() clf.fit(a_seqs) for l in line_tups: name, b = l[0], l[OPTS.b-1] sys.stdout.write('%s\t%s\n' % (name, clf.scan([b])[0]))