def new_selection(self, project_id, s, job_name, job_description, *args, **kw): ''' Called by the browser. Transform a selection to a new track; ''' user = handler.user.get_user_in_session(request) sels = json.loads(s) project = DBSession.query(Project).filter(Project.id == project_id).first() if project is None : return {'error' : "project id %s doesn't exist" % project_id} path = track.common.temporary_path() with track.new(path, 'sql') as t: t.fields = simple_fields for chromosome in sels: t.write(chromosome, ((marquee['start'], marquee['end'], 0, '', 0 , '') for marquee in sels[chromosome])) t.datatype = constants.FEATURES t.assembly = project.sequence.name task_id, track_id = handler.track.create_track(user.id, project.sequence, f=path, trackname='%s %s' % (job_name, job_description), project=project) if task_id == constants.NOT_SUPPORTED_DATATYPE : return {'error' : "not supported datatype" % project_id} job_id = handler.job.new_sel(user.id, project.id, job_description, job_name, task_id=task_id) return {'job_id' : job_id, 'job_name' : job_name, 'job_description' : job_description, 'status' : 'RUNNING'}
def export(self, path, format=None): with track.new(path, format) as t: for chrom in self: t.write(chrom, self.read(chrom)) t.chrmeta = self.chrmeta t.info = self.info self.close()
def runTest(self): out_path = temporary_path('.sql') with track.new(out_path) as t: for chrom in range(5): t.write(str(chrom), [(0,10,'A',0.0,-1)]) cur = t.cursor() cur.execute("CREATE table tmp (koopa text,troopa text)") cur.execute("INSERT into tmp values (?,?)", (1,2))
def create_repeat_annotation(sequence, fname): if checkfile(fname): return annotation = (x.isupper() for x in sequence) intervals = simulator.sequence_to_intervals(simulator.get_sequence(annotation, sequence), "repeat") with track.new(fname, "bed") as t: t.fields = ["start", "end", "name"] t.write("chr1", intervals)
def runTest(self): out_path = temporary_path('.sql') with track.new(out_path) as t: for chrom in range(5): t.write(str(chrom), [(0, 10, 'A', 0.0, -1)]) cur = t.cursor() cur.execute("CREATE table tmp (koopa text,troopa text)") cur.execute("INSERT into tmp values (?,?)", (1, 2))
def runTest(self): in_path = samples['small_signals'][7]['sql'] out_path = temporary_path('.sql') with track.load(in_path) as i: with track.new(out_path) as o: for chrom in i: o.write(chrom, i.read(chrom)) self.assertEqual(list(o.read('chrI')), list(i.read('chrI'))) os.remove(out_path)
def create_repeat_annotation(sequence, fname): if checkfile(fname): return annotation = (x.isupper() for x in sequence) intervals = simulator.sequence_to_intervals( simulator.get_sequence(annotation, sequence), 'repeat') with track.new(fname, 'bed') as t: t.fields = ['start', 'end', 'name'] t.write("chr1", intervals)
def ucsc_geneid_fix(in_gtf, out_gtf, remote=None, local=None): """Updates 'gene_id' entries in GTF files downloaded from UCSC Table Browser to contain gene IDs instead of transcript IDs. If the output GTF file name already exists, it will be overwritten. :param in_gtf: path to input GTF file :type in_gtf: str :param out_gtf: path to output GTF file :type out_gtf: str :param remote: UCSC database and annotation source to use :type remote: dict('db': str, 'annot_src': str) :param local: two-column file name containing transcript-gene mapping, only when `db` and `annot_src` are None :type local: str :returns: None """ # remote not defined if remote is None: # then local must be defined if local is None: raise ValueError("Missing `remote` or `local` arguments") mapping = get_local_transcript_gene_mapping(local) # remote defined else: # then local can not be defined if local is not None: raise ValueError("Only supply `remote` or `local` argument, " "not both.") # remote must have 'db' if "db" not in remote: raise ValueError("Missing remote database name") # and 'annot_src' if "annot" not in remote: raise ValueError("Missing remote annotation source name") db = remote["db"] annot = remote["annot"] if annot not in QUERIES.keys(): raise ValueError("Invalid annotation source " "name: {0}".format(annot)) mapping = get_ucsc_transcript_gene_mapping(annot, db, cred=CRED) # remove output file if it exists if os.path.exists(out_gtf): os.remove(out_gtf) with track.load(in_gtf, readonly=True) as in_track, track.new(out_gtf, format="gtf") as out_track: # since GTF has custom fields, need to set the out_track to use # in_track's fields out_track.fields = in_track.fields for chrom in in_track.chromosomes: chrom_rec = in_track.read(chrom) out_track.write(chrom, update_gene_id_attr(chrom_rec, mapping))
def newTrack(self, info=None, name=None): # Close previous track # if self.current_track: self.closeCurrentTrack() # Get a file name # path = self.file_paths.next() # Add it to the result # self.tracks.append(path) # Create it # self.current_track = track.new(path) # Add the metadata # if info: self.current_track.info.update(info)
def runTest(self): in_path = samples['small_features'][1]['sql'] out_path = temporary_path('.sql') chrom = 'chrI' with track.load(in_path) as i: with track.new(out_path) as o: o.fields = track.default_fields o.write(chrom, i.read(chrom, ('start', 'end'))) got = tuple(o.read(chrom).next()) expected = (0, 10, None, None, None) self.assertEqual(got, expected) os.remove(out_path)
def runTest(self): in_path = samples['small_features'][1]['sql'] out_path = temporary_path('.sql') chrom = 'chrI' with track.load(in_path) as i: with track.new(out_path) as o: o.fields = track.default_fields o.write(chrom, i.read(chrom, ('start','end'))) got = tuple(o.read(chrom).next()) expected = (0, 10, None, None, None) self.assertEqual(got, expected) os.remove(out_path)
def new_selection(self, project_id, s, job_name, job_description, *args, **kw): ''' Called by the browser. Transform a selection to a new track; ''' user = handler.user.get_user_in_session(request) sels = json.loads(s) project = DBSession.query(Project).filter( Project.id == project_id).first() if project is None: return {'error': "project id %s doesn't exist" % project_id} path = track.common.temporary_path() with track.new(path, 'sql') as t: t.fields = simple_fields for chromosome in sels: t.write(chromosome, ((marquee['start'], marquee['end'], 0, '', 0, '') for marquee in sels[chromosome])) t.datatype = constants.FEATURES t.assembly = project.sequence.name task_id, track_id = handler.track.create_track( user.id, project.sequence, f=path, trackname='%s %s' % (job_name, job_description), project=project) if task_id == constants.NOT_SUPPORTED_DATATYPE: return {'error': "not supported datatype" % project_id} job_id = handler.job.new_sel(user.id, project.id, job_description, job_name, task_id=task_id) return { 'job_id': job_id, 'job_name': job_name, 'job_description': job_description, 'status': 'RUNNING' }
def runTest(self): in_path = temporary_path('.sql') out_path = temporary_path('.sql') with track.new(in_path) as t: t.fields = ('start','end','score') t.assembly = 'sacCer2' t.write('chrI',[(0,2,10),(2,4,20),(6,8,10)]) result = window_smoothing(t, 2) result.export(out_path) with track.load(out_path) as t: data = t.read('chrI') got = map(tuple, data) expected = [(0, 1, 8.0), (1, 3, 12.0), (3, 5, 10.0), (5, 6, 8.0), (6, 9, 4.0), (9, 10, 2.0)] self.assertEqual(got, expected) os.remove(in_path) os.remove(out_path)
def main(n, datadir='data/train_sequences/', fname='simulated_alignment'): s1name = "sequence1" s2name = "sequence2" s3name = "sequence3" annotation_name = 'gene' alignment_extension = ".fa" annotations_extension = ".bed" config_extension = ".js" if len(sys.argv) > 1: n = int(sys.argv[1]) if len(sys.argv) > 2: fname = sys.argv[2] master_gene_sequence = MarkovChain(P_START_GENE, P_STOP_GENE) human_delete_sequence = MarkovChain(P_START_DELETE, P_STOP_DELETE) mouse_delete_sequence = MarkovChain(P_START_DELETE, P_STOP_DELETE) horse_delete_sequence = MarkovChain(P_START_DELETE, P_STOP_DELETE) mutator_coin = BiasedCoin(P_NOT_MUTATE_GENE) master_gene = list() human_gene = list() mouse_gene = list() horse_gene = list() human_dna = list() mouse_dna = list() horse_dna = list() for i in range(n): # create master_gene item g = g2 = g3 = g4 = master_gene_sequence.get_state() # mutate master_gene item if g: g2 = mutator_coin.flip() g3 = mutator_coin.flip() g4 = mutator_coin.flip() dna_mutation_coin = create_dna_mutation_coin(g2 + g3) dna_mutation_coin2 = create_dna_mutation_coin(g2 + g4) # create DNA item c = c2 = c3 = random.randint(0, 3) c2 = mutate(c2, g2 + g3) c, c2, c3 = [DNA_CHARS[i] for i in (c, c2, c3)] if not dna_mutation_coin.flip(): char_index = random.randint(0, 2) if DNA_CHARS[char_index] == c2: char_index = 3 c2 = DNA_CHARS[char_index] if not dna_mutation_coin2.flip(): char_index = random.randint(0, 2) if DNA_CHARS[char_index] == c3: char_index = 3 c3 = DNA_CHARS[char_index] # delete DNA item if human_delete_sequence.get_state(): c = '-' if mouse_delete_sequence.get_state(): c2 = '-' if horse_delete_sequence.get_state(): c3 = '-' # add items to sequence master_gene.append(g) human_gene.append(g2) mouse_gene.append(g3) horse_gene.append(g4) human_dna.append(c) mouse_dna.append(c2) horse_dna.append(c3) # output s1fname = os.path.join( datadir, fname + '_' + s1name + '_' + annotation_name + annotations_extension) if os.path.isfile(s1fname): os.remove(s1fname) s2fname = os.path.join( datadir, fname + '_' + s2name + '_' + annotation_name + annotations_extension) if os.path.isfile(s2fname): os.remove(s2fname) s3fname = os.path.join( datadir, fname + '_' + s3name + '_' + annotation_name + annotations_extension) if os.path.isfile(s3fname): os.remove(s3fname) intervals1 = sequence_to_intervals(get_sequence(human_gene, human_dna), annotation_name) intervals2 = sequence_to_intervals(get_sequence(mouse_gene, mouse_dna), annotation_name) intervals3 = sequence_to_intervals(get_sequence(horse_gene, horse_dna), annotation_name) annotations = Annotations() annotations.setAnnotations([annotation_name]) annotations.addSequences([s1name, s2name, s3name]) annotations.addAnnotationFile(s1name, annotation_name, s1fname) annotations.addAnnotationFile(s2name, annotation_name, s2fname) # annotations.addAnnotationFile(s3name, annotation_name, s3fname) Fasta.save( [ (s1name, ''.join(human_dna)), (s2name, ''.join(mouse_dna)), # (s3name, ''.join(horse_dna)) ], os.path.join(datadir, fname + alignment_extension)) with track.new(s1fname, 'bed') as t: t.fields = ['start', 'end', 'name'] t.write("chr1", intervals1) with track.new(s2fname, 'bed') as t: t.fields = ['start', 'end', 'name'] t.write("chr1", intervals2) # with track.new(s3fname, 'bed') as t: # t.fields = ['start', 'end', 'name'] # t.write("chr1", intervals3) with Open(os.path.join(datadir, fname + config_extension), "w") as f: json.dump(annotations.toJSON(), f)