Exemplo n.º 1
0
def report(args):
    """
    Create report in html format
    """
    logger.info("reading sequeces")
    data = load_data(args.json)
    out_dir = os.path.join(args.out, "html")
    safe_dirs(out_dir)

    logger.info("create profile")
    data = make_profile(data, out_dir, args)
    logger.info("create database")
    make_database(data, "seqcluster.db", args.out)

    path_template = os.path.normpath(
        os.path.dirname(os.path.realpath(templates.__file__)))
    css_template = os.path.join(path_template, "info.css")
    js_template = os.path.join(path_template, "jquery.tablesorter.min.js")
    css = os.path.join(out_dir, "info.css")
    js = os.path.join(out_dir, "jquery.tablesorter.min.js")
    if not os.path.exists(css):
        shutil.copy(css_template, css)
        shutil.copy(js_template, js)
    logger.info(
        "Done. Download https://github.com/lpantano/seqclusterViz/archive/master.zip to browse the output."
    )
Exemplo n.º 2
0
def report(args):
    """
    Create report in html format
    """
    logger.info("reading sequeces")
    data = load_data(args.json)

    logger.info("create profile")
    data = make_profile(data, os.path.join(args.out, "profiles"), args)
    logger.info("create database")
    make_database(data, "seqcluster.db", args.out)

    logger.info(
        "Done. Download https://github.com/lpantano/seqclusterViz/archive/master.zip to browse the output."
    )
Exemplo n.º 3
0
def predictions(args):
    """
    Create predictions of clusters
    """

    logger.info("reading sequeces")
    data = load_data(args.json)
    out_dir = os.path.join(args.out, "predictions")
    safe_dirs(out_dir)

    logger.info("make predictions")
    # make_predictions(data, out_dir, args)

    if args.coral:
        logger.info("make CoRaL predictions")
        run_coral(data, out_dir, args)

    logger.info("Done")
Exemplo n.º 4
0
def explore(args):
    """Create mapping of sequences of two clusters
    """
    logger.info("reading sequeces")
    data = load_data(args.json)
    logger.info("get sequences from json")
    #get_sequences_from_cluster()
    c1, c2 = args.names.split(",")
    seqs, names = get_sequences_from_cluster(c1, c2, data[0])
    loci = get_precursors_from_cluster(c1, c2, data[0])
    logger.info("map all sequences to all loci")
    print "%s" % (loci)
    map_to_precursors(seqs, names, loci, os.path.join(args.out, "map.tsv"), args)
    #map_sequences_w_bowtie(sequences, precursors)
    logger.info("plot sequences on loci")
    #get_matrix_position()
    #plot_sequences()
    logger.info("Done")
Exemplo n.º 5
0
def predictions(args):
    """
    Create predictions of clusters
    """

    logger.info(args)
    logger.info("reading sequeces")
    out_file = os.path.abspath(os.path.splitext(args.json)[0] + "_prediction.json")
    data = load_data(args.json)
    out_dir = os.path.abspath(safe_dirs(os.path.join(args.out, "predictions")))

    logger.info("make predictions")
    data = is_tRNA(data, out_dir, args)

    if args.coral:
        logger.info("make CoRaL predictions")
        run_coral(data, out_dir, args)
    write_data(data[0], out_file)
    logger.info("Done")
Exemplo n.º 6
0
def explore(args):
    """Create mapping of sequences of two clusters
    """
    logger.info("reading sequeces")
    data = load_data(args.json)
    logger.info("get sequences from json")
    #get_sequences_from_cluster()
    c1, c2 = args.names.split(",")
    seqs, names = get_sequences_from_cluster(c1, c2, data[0])
    loci = get_precursors_from_cluster(c1, c2, data[0])
    logger.info("map all sequences to all loci")
    print("%s" % (loci))
    map_to_precursors(seqs, names, loci, os.path.join(args.out, "map.tsv"),
                      args)
    #map_sequences_w_bowtie(sequences, precursors)
    logger.info("plot sequences on loci")
    #get_matrix_position()
    #plot_sequences()
    logger.info("Done")
Exemplo n.º 7
0
def report(args):
    """
    Create report in html format
    """
    logger.info("reading sequeces")
    data = load_data(args.json)
    out_dir = os.path.join(args.out, "html")
    safe_dirs(out_dir)

    logger.info("create profile")
    make_profile(data, out_dir, args)

    path_template = os.path.normpath(os.path.dirname(os.path.realpath(templates.__file__)))
    css_template = os.path.join(path_template, "info.css")
    js_template = os.path.join(path_template, "jquery.tablesorter.min.js")
    css = os.path.join(out_dir, "info.css")
    js = os.path.join(out_dir, "jquery.tablesorter.min.js")
    if not os.path.exists(css):
        shutil.copy(css_template, css)
        shutil.copy(js_template, js)
    logger.info("Done")
Exemplo n.º 8
0
def report(args):
    """
    Create report in html format
    """
    logger.info("reading sequeces")
    data = load_data(args.json)
    out_dir = os.path.join(args.out, "html")
    safe_dirs(out_dir)

    logger.info("create profile")
    data = make_profile(data, out_dir, args)
    logger.info("create database")
    make_database(data, "seqcluster.db", args.out)

    path_template = os.path.normpath(os.path.dirname(os.path.realpath(templates.__file__)))
    css_template = os.path.join(path_template, "info.css")
    js_template = os.path.join(path_template, "jquery.tablesorter.min.js")
    css = os.path.join(out_dir, "info.css")
    js = os.path.join(out_dir, "jquery.tablesorter.min.js")
    if not os.path.exists(css):
        shutil.copy(css_template, css)
        shutil.copy(js_template, js)
    logger.info("Done. Download https://github.com/lpantano/seqclusterViz/archive/master.zip to browse the output.")
Exemplo n.º 9
0
def cluster(args):
    args = _check_args(args)
    read_stats_file = op.join(args.dir_out, "read_stats.tsv")
    if file_exists(read_stats_file):
        os.remove(read_stats_file)

    bam_file, seq_obj = _clean_alignment(args)

    logger.info("Parsing matrix file")
    seqL, y, l = parse_ma_file(seq_obj, args.ffile)
    # y, l = _total_counts(seqL.keys(), seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'aligned'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    if len(seqL.keys()) < 10:
        logger.error("It seems you have low coverage. Please check your fastq files have enough sequences.")
        raise ValueError("So few sequences.")

    logger.info("Cleaning bam file")
    y, l = _total_counts(seqL.keys(), seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'cleaned'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    clusL = _create_clusters(seqL, bam_file, args)
    y, l = _total_counts(clusL.seq.keys(), clusL.seq, aligned=True)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'clusters'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    logger.info("Solving multi-mapping events in the network of clusters")
    clusLred = _cleaning(clusL, args.dir_out)
    y, l = _total_counts(clusLred.clus, seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'meta-cluster'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')
    logger.info("Clusters up to %s" % (len(clusLred.clus.keys())))

    if args.show:
        logger.info("Creating sequences alignment to precursor")
        clusLred = show_seq(clusLred, args.index)

    clusLred = peak_calling(clusLred)

    clusLred = _annotate(args, clusLred)
    logger.info("Creating json and count matrix")

    json_file = _create_json(clusLred, args)
    logger.info("Output file in: %s" % args.dir_out)

    if args.db:
        name = args.db + ".db"
        logger.info("Create database: database/" + name)
        data = load_data(json_file)
        out_dir = op.join(args.dir_out, "database")
        make_database(data, name, out_dir)
    logger.info("Finished")
Exemplo n.º 10
0
def cluster(args):
    args = _check_args(args)
    read_stats_file = op.join(args.dir_out, "read_stats.tsv")
    if file_exists(read_stats_file):
        os.remove(read_stats_file)

    bam_file, seq_obj = _clean_alignment(args)

    logger.info("Parsing matrix file")
    seqL, y, l = parse_ma_file(seq_obj, args.ffile)
    # y, l = _total_counts(seqL.keys(), seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'aligned'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    if len(seqL.keys()) < 10:
        logger.error("It seems you have so low coverage. Please check your fastq files have enough sequences.")
        raise ValueError("So few sequences.")

    logger.info("Cleaning bam file")
    y, l = _total_counts(seqL.keys(), seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'cleaned'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    clusL = _create_clusters(seqL, bam_file, args)
    y, l = _total_counts(clusL.seq.keys(), clusL.seq, aligned=True)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'clusters'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    logger.info("Solving multi-mapping events in the network of clusters")
    clusLred = _cleaning(clusL, args.dir_out)
    y, l = _total_counts(clusLred.clus, seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'meta-cluster'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')
    logger.info("Clusters up to %s" % (len(clusLred.clus.keys())))

    if args.show:
        logger.info("Creating sequences alignment to precursor")
        clusLred = show_seq(clusLred, args.index)

    clusLred = peak_calling(clusLred)

    clusLred = _annotate(args, clusLred)
    logger.info("Creating json and count matrix")

    json_file = _create_json(clusLred, args)
    logger.info("Output file in: %s" % args.dir_out)

    if args.db:
        name = args.db + ".db"
        logger.info("Create database: database/" + name)
        data = load_data(json_file)
        out_dir = op.join(args.dir_out, "database")
        make_database(data, name, out_dir)
    logger.info("Finished")