def main(): parser = argparse.ArgumentParser(description='Ferramenta de anotação do PhySketch Dataset') parser.add_argument("-a", "--annotator", help="Executa processo de anotação", action='store_true') parser.add_argument("-c", "--cropper", help="Executa processo de recorte de base", action='store_true') parser.add_argument("-z", "--viewer", help="Executa processo de visualização de base", action='store_true') parser.add_argument("-g", "--generator", help="Executa processo de geração de cenário", action='store_true') parser.add_argument("-i", "--input", help="Pasta contendo estrutura /Dataset", required=True) parser.add_argument("-s", "--startAt", help="Pula -s imagens", default=0, type=int) #parser.add_argument("-o", "--output", help="Pasta de destino", required=True) parser.add_argument("-v", "--verbose", help="Verbose", action='store_true') args = parser.parse_args() cfg.OUTPUT_DIR = os.path.join(args.input, "annotated") cfg.OUTPUT_CROP_DIR = os.path.join(args.input, "cropped") cfg.START_AT = args.startAt if args.verbose: log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG) log.info("Verbose output.") else: log.basicConfig(format="%(levelname)s: %(message)s") if args.annotator: import annotator as an cfg.INPUT_DIR = os.path.join(args.input, "cropped") ant = an.Annotator() ant.run() elif args.cropper: import cropper as cr cfg.INPUT_DIR = os.path.join(args.input, "raw") ant = cr.Cropper() ant.run() elif args.viewer: import viewer as vw cfg.INPUT_DIR = os.path.join(args.input) vw = vw.Viewer() elif args.generator: import scene_generator as sc cfg.INPUT_DIR = os.path.join(args.input) sc = sc.SceneGenerator() else: log.error("ERRO: SELECIONE UM PROCESSO")
def main(): save_splices = False save_exon_bounds = True save_ann = True args = get_args() wrapper_path = "/oak/stanford/groups/horence/Roozbeh/single_cell_project/scripts/STAR_wrapper/" #annotator_path = "{}annotators/pyensembl_{}.pkl".format(wrapper_path, args.assembly) annotator_path = "{}annotators/{}.pkl".format(wrapper_path, args.assembly) print(annotator_path) gtf_df = get_gtf(args.gtf_path) if save_exon_bounds: exon_bounds = get_exon_bounds(gtf_df) pickle.dump( exon_bounds, open( "{}annotators/{}_exon_bounds.pkl".format( wrapper_path, args.assembly), "wb")) print("{}annotators/{}_exon_bounds.pkl".format(wrapper_path, args.assembly)) if save_splices: splices = get_splices(gtf_df) pickle.dump( splices, open( "{}annotators/{}_splices.pkl".format(wrapper_path, args.assembly), "wb")) print("{}annotators/{}_splices.pkl".format(wrapper_path, args.assembly)) # if os.path.exists(annotator_path): # ann = pickle.load(open(annotator_path, "rb")) # else: # ann = pyensembl.Genome(reference_name = args.assembly, # annotation_name = "my_genome_features", # gtf_path_or_url=args.gtf_path) # ann.index() if save_ann: ann = annotator.Annotator(args.gtf_path) print("got annotator") pickle.dump(ann, open(annotator_path, "wb")) print("dumped annotator to {}".format(annotator_path))
def main(): t0 = time.time() parser = argparse.ArgumentParser(description="create class input file") parser.add_argument("-g", "--gtf_path", help="the path to the gtf file to use for annotation") parser.add_argument( "-a", "--assembly", help= "The name of the assembly to pre-load annotation (so, mm10 for the 10th mouse assembly)" ) parser.add_argument( "-i", "--input_path", help= "the prefix to the STAR Aligned.out.sam and Chimeric.out.sam directory" ) parser.add_argument("-I", "--input_file", help="specify file name of different format", default="") parser.add_argument( "-s", "--single", action="store_true", help="use this flag if the reads you are running on are single-ended") parser.add_argument( "-t", "--tenX", action="store_true", help="indicate whether this is 10X data (with UMIs and barcodes)") parser.add_argument("-T", "--test", action="store_true", help="save dictionaries and don't write class input") parser.add_argument("-n", "--include_one_read", action="store_true", help="also save reads where only r1 maps") # include_one_read = True args = parser.parse_args() # gtf_data = pyensembl.Genome(reference_name='hg38', annotation_name='my_genome_features', gtf_path_or_url='/scratch/PI/horence/JuliaO/single_cell/STAR_output/mm10_files/mm10.gtf') # gtf_data.index() wrapper_path = "/oak/stanford/groups/horence/Roozbeh/single_cell_project/scripts/STAR_wrapper/" # annotator_path = "{}annotators/pyensembl_{}.pkl".format(wrapper_path, args.assembly) annotator_path = "{}annotators/{}.pkl".format(wrapper_path, args.assembly) if os.path.exists(annotator_path): ann = pickle.load(open(annotator_path, "rb")) else: ann = annotator.Annotator(args.gtf_path) # ann = pyensembl.Genome(reference_name = args.assembly, # annotation_name = "my_genome_features", # gtf_path_or_url=args.gtf_path) # ann.index() pickle.dump(ann, open(annotator_path, "wb")) fastqIdStyle = "complete" print("initiated annotator: {}".format(time.time() - t0)) # gtf_file = "/scratch/PI/horence/JuliaO/single_cell/STAR_output/mm10_files/mm10.gtf" # ann = annotator.Annotator(gtf_file, 10000) ## gtf_dict = get_gtf_dict(gtf_file, 10000) # print("loaded annotator") # regimes = ["priorityAlign", "priorityChimeric"] # for regime in regimes: read_junc_dict = {} junc_read_dict = {} # fastq_ids = ["SRR65462{}".format(x) for x in range(73,85)] # fastq_ids = ["SRR65462{}".format(x) for x in range(79,84)] # fastq_ids = ["SRR6546284"] # for fastq_id in fastq_ids: # print("{}: {}".format(fastq_id, time.time() - t0)) # if args.single: # samFile1 = "{}2Chimeric.out.sam".format(args.input_path) # samFile2 = "{}2Aligned.out.sam".format(args.input_path) # else: # samFile1 = "{}1Chimeric.out.sam".format(args.input_path) # samFile2 = "{}1Aligned.out.sam".format(args.input_path) # samFile3 = "{}2Chimeric.out.sam".format(args.input_path) # samFile4 = "{}2Aligned.out.sam".format(args.input_path) if args.input_file != "": bamFile1 = args.input_file elif args.single: bamFile1 = "{}2Aligned.out.bam".format(args.input_path) else: bamFile1 = "{}1Aligned.out.bam".format(args.input_path) bamFile2 = "{}2Aligned.out.bam".format(args.input_path) if args.single: read_junc_dict, junc_read_dict, genomic_alignments = STAR_parseBAM( bamFile1, "r1", read_junc_dict, junc_read_dict, fastqIdStyle, ann) else: read_junc_dict, junc_read_dict, genomic_alignments = STAR_parseBAM( bamFile1, "r1", read_junc_dict, junc_read_dict, fastqIdStyle, ann) read_junc_dict, junc_read_dict, _ = STAR_parseBAM( bamFile2, "r2", read_junc_dict, junc_read_dict, fastqIdStyle, ann) # if regime == "priorityAlign": # read_junc_dict, junc_read_dict = STAR_parseSam(samFile2, "r1align", read_junc_dict, junc_read_dict, fastqIdStyle, ann) # print("parsed r1align", time.time() - t0) # read_junc_dict, junc_read_dict = STAR_parseSam(samFile1, "r1chim", read_junc_dict, junc_read_dict, fastqIdStyle, ann) # print("parsed r1chim", time.time() - t0) # elif regime == "priorityChimeric": # read_junc_dict, junc_read_dict = STAR_parseSam(samFile1, "r1chim", read_junc_dict, junc_read_dict, fastqIdStyle, ann) # print("parsed r1chim", time.time() - t0) # read_junc_dict, junc_read_dict = STAR_parseSam(samFile2, "r1align", read_junc_dict, junc_read_dict, fastqIdStyle, ann) # print("parsed r1align", time.time() - t0) # if not args.single: # if regime == "priorityAlign": # read_junc_dict, junc_read_dict = STAR_parseSam(samFile4, "r2align", read_junc_dict, junc_read_dict, fastqIdStyle, ann) # print("parsed r2align", time.time() - t0) # read_junc_dict, junc_read_dict = STAR_parseSam(samFile3, "r2chim", read_junc_dict, junc_read_dict, fastqIdStyle, ann) # print("parsed r2chim", time.time() - t0) # elif regime == "priorityChimeric": # read_junc_dict, junc_read_dict = STAR_parseSam(samFile3, "r2chim", read_junc_dict, junc_read_dict, fastqIdStyle, ann) # print("parsed r2chim", time.time() - t0) # read_junc_dict, junc_read_dict = STAR_parseSam(samFile4, "r2align", read_junc_dict, junc_read_dict, fastqIdStyle, ann) # print("parsed r2align", time.time() - t0) # print("len(junc_read_dict)", len(junc_read_dict)) # print("parsed all {}".format(regime), time.time() - t0) # write_class_file(junc_read_dict,"/scratch/PI/horence/JuliaO/single_cell/scripts/output/create_class_input/{}.tsv".format(fastq_id)) if args.test: pickle.dump(read_junc_dict, open("{}read_junc_dict.pkl".format(args.input_path), "wb")) pickle.dump(junc_read_dict, open("{}junc_read_dict.pkl".format(args.input_path), "wb")) else: write_class_file( junc_read_dict, "{}class_input_{}.tsv".format(args.input_path, "WithinBAM"), args.single, genomic_alignments, args.tenX, args.include_one_read) print("genomic alignments", genomic_alignments)
import flask import json import annotator import article import config import reader import writer import numpy as np application = flask.Flask(__name__) anne = annotator.Annotator(reader.get_reader(config.reader)(**config.reader_params), writer.get_writer(config.writer)(**config.writer_params)) valid_users = np.loadtxt('usernames.txt', delimiter = ',', dtype = 'str') """ Display the main page. """ @application.route('/', methods=['GET']) def index(): return flask.render_template('index.html') """ Start the program. """ @application.route('/start/', methods=['GET', 'POST']) def start(): userid = flask.request.form['userid'] if not(userid in valid_users):
t0 = time.time() wrapper_path = "/oak/stanford/groups/horence/Roozbeh/single_cell_project/scripts/STAR_wrapper/" #annotator_path = "{}annotators/pyensembl_{}.pkl".format(wrapper_path, args.assembly) annotator_path = "{}annotators/{}.pkl".format(wrapper_path, args.assembly) if os.path.exists(annotator_path): ann = pickle.load(open(annotator_path, "rb")) else: # ann = pyensembl.Genome(reference_name = args.assembly, # annotation_name = "my_genome_features", # gtf_path_or_url=args.gtf_path) # ann.index() ann = annotator.Annotator(args.gtf_path) pickle.dump(ann, open(annotator_path, "wb")) print("initiated annotator: {}".format(time.time() - t0)) if args.single: l = 2 else: l = 1 for i in range(l, 3): SJ_df = pd.read_csv("{}{}SJ.out.tab".format(args.input_path, i), sep="\t", names=[ "donor_chromosome", "first_intron_base", "last_intron_base", "strand", "intron_motif", "annotated", "num_uniquely_mapping",