Пример #1
0
def main():
    parser = argparse.ArgumentParser(description='Ferramenta de anotação do PhySketch Dataset')
    parser.add_argument("-a", "--annotator", help="Executa processo de anotação", action='store_true')
    parser.add_argument("-c", "--cropper", help="Executa processo de recorte de base", action='store_true')
    parser.add_argument("-z", "--viewer", help="Executa processo de visualização de base", action='store_true')
    parser.add_argument("-g", "--generator", help="Executa processo de geração de cenário", action='store_true')
    parser.add_argument("-i", "--input", help="Pasta contendo estrutura /Dataset", required=True)
    parser.add_argument("-s", "--startAt", help="Pula -s imagens", default=0, type=int)
    #parser.add_argument("-o", "--output", help="Pasta de destino", required=True)
    parser.add_argument("-v", "--verbose", help="Verbose", action='store_true')
    args = parser.parse_args()


    cfg.OUTPUT_DIR = os.path.join(args.input, "annotated")
    cfg.OUTPUT_CROP_DIR = os.path.join(args.input, "cropped")
    cfg.START_AT = args.startAt

    if args.verbose:
        log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
        log.info("Verbose output.")
    else:
        log.basicConfig(format="%(levelname)s: %(message)s")

    if args.annotator:
        import annotator as an
        cfg.INPUT_DIR = os.path.join(args.input, "cropped")
        ant = an.Annotator()
        ant.run()
    elif args.cropper:
        import cropper as cr
        cfg.INPUT_DIR = os.path.join(args.input, "raw")
        ant = cr.Cropper()
        ant.run()
    elif args.viewer:
        import viewer as vw

        cfg.INPUT_DIR = os.path.join(args.input)

        vw = vw.Viewer()
    elif args.generator:
        import scene_generator as sc

        cfg.INPUT_DIR = os.path.join(args.input)

        sc = sc.SceneGenerator()

    else:
        log.error("ERRO: SELECIONE UM PROCESSO")
Пример #2
0
def main():

    save_splices = False
    save_exon_bounds = True
    save_ann = True

    args = get_args()
    wrapper_path = "/oak/stanford/groups/horence/Roozbeh/single_cell_project/scripts/STAR_wrapper/"
    #annotator_path = "{}annotators/pyensembl_{}.pkl".format(wrapper_path, args.assembly)
    annotator_path = "{}annotators/{}.pkl".format(wrapper_path, args.assembly)
    print(annotator_path)

    gtf_df = get_gtf(args.gtf_path)
    if save_exon_bounds:
        exon_bounds = get_exon_bounds(gtf_df)
        pickle.dump(
            exon_bounds,
            open(
                "{}annotators/{}_exon_bounds.pkl".format(
                    wrapper_path, args.assembly), "wb"))
        print("{}annotators/{}_exon_bounds.pkl".format(wrapper_path,
                                                       args.assembly))

    if save_splices:
        splices = get_splices(gtf_df)
        pickle.dump(
            splices,
            open(
                "{}annotators/{}_splices.pkl".format(wrapper_path,
                                                     args.assembly), "wb"))

        print("{}annotators/{}_splices.pkl".format(wrapper_path,
                                                   args.assembly))


#  if os.path.exists(annotator_path):
#    ann = pickle.load(open(annotator_path, "rb"))
#  else:
#  ann = pyensembl.Genome(reference_name = args.assembly,
#           annotation_name = "my_genome_features",
#           gtf_path_or_url=args.gtf_path)
#  ann.index()

    if save_ann:
        ann = annotator.Annotator(args.gtf_path)
        print("got annotator")
        pickle.dump(ann, open(annotator_path, "wb"))
        print("dumped annotator to {}".format(annotator_path))
Пример #3
0
def main():
    t0 = time.time()
    parser = argparse.ArgumentParser(description="create class input file")
    parser.add_argument("-g",
                        "--gtf_path",
                        help="the path to the gtf file to use for annotation")
    parser.add_argument(
        "-a",
        "--assembly",
        help=
        "The name of the assembly to pre-load annotation (so, mm10 for the 10th mouse assembly)"
    )
    parser.add_argument(
        "-i",
        "--input_path",
        help=
        "the prefix to the STAR Aligned.out.sam and Chimeric.out.sam directory"
    )
    parser.add_argument("-I",
                        "--input_file",
                        help="specify file name of different format",
                        default="")
    parser.add_argument(
        "-s",
        "--single",
        action="store_true",
        help="use this flag if the reads you are running on are single-ended")
    parser.add_argument(
        "-t",
        "--tenX",
        action="store_true",
        help="indicate whether this is 10X data (with UMIs and barcodes)")
    parser.add_argument("-T",
                        "--test",
                        action="store_true",
                        help="save dictionaries and don't write class input")
    parser.add_argument("-n",
                        "--include_one_read",
                        action="store_true",
                        help="also save reads where only r1 maps")
    #  include_one_read = True

    args = parser.parse_args()
    #  gtf_data = pyensembl.Genome(reference_name='hg38', annotation_name='my_genome_features', gtf_path_or_url='/scratch/PI/horence/JuliaO/single_cell/STAR_output/mm10_files/mm10.gtf')
    #  gtf_data.index()

    wrapper_path = "/oak/stanford/groups/horence/Roozbeh/single_cell_project/scripts/STAR_wrapper/"
    #  annotator_path = "{}annotators/pyensembl_{}.pkl".format(wrapper_path, args.assembly)
    annotator_path = "{}annotators/{}.pkl".format(wrapper_path, args.assembly)

    if os.path.exists(annotator_path):
        ann = pickle.load(open(annotator_path, "rb"))
    else:
        ann = annotator.Annotator(args.gtf_path)
        #    ann = pyensembl.Genome(reference_name = args.assembly,
        #             annotation_name = "my_genome_features",
        #             gtf_path_or_url=args.gtf_path)
        #    ann.index()

        pickle.dump(ann, open(annotator_path, "wb"))
    fastqIdStyle = "complete"

    print("initiated annotator: {}".format(time.time() - t0))
    #  gtf_file = "/scratch/PI/horence/JuliaO/single_cell/STAR_output/mm10_files/mm10.gtf"
    #  ann = annotator.Annotator(gtf_file, 10000)
    ##  gtf_dict = get_gtf_dict(gtf_file, 10000)
    #  print("loaded annotator")
    #  regimes = ["priorityAlign", "priorityChimeric"]
    #  for regime in regimes:
    read_junc_dict = {}
    junc_read_dict = {}

    #  fastq_ids = ["SRR65462{}".format(x) for x in range(73,85)]
    #  fastq_ids = ["SRR65462{}".format(x) for x in range(79,84)]

    #  fastq_ids = ["SRR6546284"]
    #  for fastq_id in fastq_ids:
    #  print("{}: {}".format(fastq_id, time.time() - t0))

    #  if args.single:
    #    samFile1 = "{}2Chimeric.out.sam".format(args.input_path)
    #    samFile2 = "{}2Aligned.out.sam".format(args.input_path)

    #  else:
    #    samFile1 = "{}1Chimeric.out.sam".format(args.input_path)
    #    samFile2 = "{}1Aligned.out.sam".format(args.input_path)
    #    samFile3 = "{}2Chimeric.out.sam".format(args.input_path)
    #    samFile4 = "{}2Aligned.out.sam".format(args.input_path)

    if args.input_file != "":
        bamFile1 = args.input_file
    elif args.single:
        bamFile1 = "{}2Aligned.out.bam".format(args.input_path)
    else:
        bamFile1 = "{}1Aligned.out.bam".format(args.input_path)
        bamFile2 = "{}2Aligned.out.bam".format(args.input_path)

    if args.single:
        read_junc_dict, junc_read_dict, genomic_alignments = STAR_parseBAM(
            bamFile1, "r1", read_junc_dict, junc_read_dict, fastqIdStyle, ann)
    else:
        read_junc_dict, junc_read_dict, genomic_alignments = STAR_parseBAM(
            bamFile1, "r1", read_junc_dict, junc_read_dict, fastqIdStyle, ann)
        read_junc_dict, junc_read_dict, _ = STAR_parseBAM(
            bamFile2, "r2", read_junc_dict, junc_read_dict, fastqIdStyle, ann)

# if regime == "priorityAlign":
#   read_junc_dict, junc_read_dict = STAR_parseSam(samFile2, "r1align", read_junc_dict, junc_read_dict, fastqIdStyle, ann)
#   print("parsed r1align", time.time() - t0)
#    read_junc_dict, junc_read_dict = STAR_parseSam(samFile1, "r1chim", read_junc_dict, junc_read_dict, fastqIdStyle, ann)
#    print("parsed r1chim", time.time() - t0)
#  elif regime == "priorityChimeric":
#    read_junc_dict, junc_read_dict = STAR_parseSam(samFile1, "r1chim", read_junc_dict, junc_read_dict, fastqIdStyle, ann)
#    print("parsed r1chim", time.time() - t0)
#    read_junc_dict, junc_read_dict = STAR_parseSam(samFile2, "r1align", read_junc_dict, junc_read_dict, fastqIdStyle, ann)
#    print("parsed r1align", time.time() - t0)
#  if not args.single:
#    if regime == "priorityAlign":
#       read_junc_dict, junc_read_dict = STAR_parseSam(samFile4, "r2align", read_junc_dict, junc_read_dict, fastqIdStyle, ann)
#       print("parsed r2align", time.time() - t0)
#       read_junc_dict, junc_read_dict = STAR_parseSam(samFile3, "r2chim", read_junc_dict, junc_read_dict, fastqIdStyle, ann)
#       print("parsed r2chim", time.time() - t0)
#    elif regime == "priorityChimeric":
#       read_junc_dict, junc_read_dict = STAR_parseSam(samFile3, "r2chim", read_junc_dict, junc_read_dict, fastqIdStyle, ann)
#       print("parsed r2chim", time.time() - t0)
#       read_junc_dict, junc_read_dict = STAR_parseSam(samFile4, "r2align", read_junc_dict, junc_read_dict, fastqIdStyle, ann)
#       print("parsed r2align", time.time() - t0)
#  print("len(junc_read_dict)", len(junc_read_dict))

#  print("parsed all {}".format(regime), time.time() - t0)
#  write_class_file(junc_read_dict,"/scratch/PI/horence/JuliaO/single_cell/scripts/output/create_class_input/{}.tsv".format(fastq_id))
    if args.test:
        pickle.dump(read_junc_dict,
                    open("{}read_junc_dict.pkl".format(args.input_path), "wb"))
        pickle.dump(junc_read_dict,
                    open("{}junc_read_dict.pkl".format(args.input_path), "wb"))
    else:
        write_class_file(
            junc_read_dict,
            "{}class_input_{}.tsv".format(args.input_path, "WithinBAM"),
            args.single, genomic_alignments, args.tenX, args.include_one_read)
        print("genomic alignments", genomic_alignments)
Пример #4
0
import flask
import json

import annotator
import article
import config
import reader
import writer
import numpy as np

application = flask.Flask(__name__)

anne = annotator.Annotator(reader.get_reader(config.reader)(**config.reader_params),
                           writer.get_writer(config.writer)(**config.writer_params))

valid_users = np.loadtxt('usernames.txt', delimiter = ',', dtype = 'str')

"""
Display the main page.
"""
@application.route('/', methods=['GET'])
def index():
    return flask.render_template('index.html')

"""
Start the program.
"""
@application.route('/start/', methods=['GET', 'POST'])
def start():
    userid = flask.request.form['userid']
    if not(userid in valid_users):
Пример #5
0
t0 = time.time()

wrapper_path = "/oak/stanford/groups/horence/Roozbeh/single_cell_project/scripts/STAR_wrapper/"
#annotator_path = "{}annotators/pyensembl_{}.pkl".format(wrapper_path, args.assembly)
annotator_path = "{}annotators/{}.pkl".format(wrapper_path, args.assembly)

if os.path.exists(annotator_path):
    ann = pickle.load(open(annotator_path, "rb"))
else:
    #  ann = pyensembl.Genome(reference_name = args.assembly,
    #           annotation_name = "my_genome_features",
    #           gtf_path_or_url=args.gtf_path)
    #  ann.index()

    ann = annotator.Annotator(args.gtf_path)
    pickle.dump(ann, open(annotator_path, "wb"))

print("initiated annotator: {}".format(time.time() - t0))

if args.single:
    l = 2
else:
    l = 1
for i in range(l, 3):
    SJ_df = pd.read_csv("{}{}SJ.out.tab".format(args.input_path, i),
                        sep="\t",
                        names=[
                            "donor_chromosome", "first_intron_base",
                            "last_intron_base", "strand", "intron_motif",
                            "annotated", "num_uniquely_mapping",