def run_one(case, t): # Input variables # # Input tracks # if t.get('tracks'): for k,v in t['tracks'].items(): if type(v) == list: t['tracks'][k] = iter(v) else: if t.get('fields') and t['fields'].get(k): with track.load(v) as x: t['tracks'][k] = iter(list(x.read('chr1', fields=t['fields'][k]))) else: with track.load(v) as x: t['tracks'][k] = iter(list(x.read('chr1'))) kwargs = t.get('input', {}) kwargs.update(t['tracks']) else: for i,v in enumerate(t['input']['list_of_tracks']): if type(v) == list: t['input']['list_of_tracks'][i] = iter(v) else: with track.load(v) as x: t['input']['list_of_tracks'][i] = iter(list(x.read('chr1'))) kwargs = t['input'] # Run it # case.assertEqual(list(t['fn'](**kwargs)), t['expected'])
def fix_sqlite_db(database): db = sqlite3.connect(database) cursor = db.cursor() chr_name= cursor.execute("SELECT name FROM chrNames") for chromosome in chr_name.fetchall(): exist = cursor.execute("SELECT COUNT(name) FROM sqlite_master WHERE type='table' AND name='"+chromosome[0]+"';").fetchone()[0] if exist == 0: cursor.execute("DELETE FROM chrNames WHERE name=?", (chromosome[0],)) #~ result = cursor.execute("SELECT name FROM chrNames") #~ for i in result.fetchall(): #~ cursor.execute("CREATE TABLE IF NOT EXISTS '"+i[0]+"'(start INTEGER, end INTEGER, score REAL);") db.commit() cursor.close() db.close() track_scanned_signal_tmp = unique_filename_in() track_scanned_signal = unique_filename_in() # temp result with duplicate element with new(track_scanned_signal_tmp, format = "sql", datatype= "quantitative") as t: with load(database) as b: t.chrmeta = b.chrmeta for chrom in b: for value in b.read(chrom, fields=['start','end','score', 'strand']): if value[3] == 1: t.write(chrom, (t.chrmeta[chrom]-(value[1]+1), t.chrmeta[chrom]-(value[0]), value[2],)) else: t.write(chrom, (value[0:3],)) # result without duplicate element id duplicate element exist take higher score with new(track_scanned_signal, format = "sql", datatype= "quantitative") as t: with load(database) as b: t.chrmeta = b.chrmeta db1 = sqlite3.connect(track_scanned_signal) db2 = sqlite3.connect(track_scanned_signal_tmp) cursor1 = db1.cursor() cursor2 = db2.cursor() chr_name= cursor2.execute("SELECT name FROM chrNames") for chromosome in chr_name.fetchall(): cursor1.execute("CREATE TABLE '"+chromosome[0]+"' (start INTEGER, end INTEGER, score REAL);") #~ values = cursor2.execute("SELECT DISTINCT start, end, score FROM '"+chromosome[0]+"'")* values = cursor2.execute (""" SELECT t.start, t.end, t.score FROM '"""+chromosome[0]+"""' t INNER JOIN ( SELECT start, end, MAX(score) AS MAXSCORE FROM '"""+chromosome[0]+"""' GROUP BY start ) groupedt ON t.start=groupedt.start AND t.score=MAXSCORE; """) for v in values.fetchall(): cursor1.execute("INSERT INTO '"+chromosome[0]+"' VALUES (?,?,?) ", (v[0], v[1], v[2],) ) db1.commit() db2.commit() cursor1.close() cursor2.close() db2.close() db2.close() return track_scanned_signal
def __iter__(self): if self.selection['type'] == 'chr': yield self.track.read(self.selection['chr'], self.fields) elif self.selection['type'] == 'all': for chr in self.track.chrs: yield self.track.read(chr, self.fields) elif self.selection['type'] == 'regions': for span in self.selection['regions']: yield self.track.read(span, self.fields) elif self.selection['type'] == 'trackchr': with load(self.request['selected_regions'], readonly=True) as t: for x in self.make_overlap(t, self.selection['chr']): yield x elif self.selection['type'] == 'track': with load(self.request['selected_regions'], readonly=True) as t: for chrom in self.track.chrs: for x in self.make_overlap(t, chrom): yield x
def runTest(self): sql_path = track_collections['Validation'][2]['path_sql'] bed_path = track_collections['Validation'][2]['path'] files = run( track1 = bed_path, track1_name = 'Validation track two', track1_chrs = yeast_chr_file, operation_type = 'genomic_manip', manipulation = 'bool_not', output_location = tempfile.gettempdir(), ) with load(sql_path, chrmeta=yeast_chr_file, readonly=True) as sql: with load(files[0]) as bed: self.assertEqual(sql.chrmeta, bed.chrmeta) os.remove(files[0])
def runTest(self): t = track_collections["Validation"][1] with track.load(t["path_sql"]) as t["track"]: t["data"] = list(t["track"].read("chr1")) tests = [ {"fn": desc_stat.gmCharacteristic.number_of_features, "input": t["data"], "expected": 12}, {"fn": desc_stat.gmCharacteristic.base_coverage, "input": t["data"], "expected": 85}, { "fn": desc_stat.gmCharacteristic.length, "input": t["data"], "expected": [10, 6, 10, 5, 5, 10, 10, 10, 10, 20, 10, 10], }, { "fn": desc_stat.gmCharacteristic.score, "input": t["data"], "expected": [10.0, 0.0, 10.0, 0.0, 0.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 5.0], }, ] for case in tests: self.assertEqual( case["fn"]( [[d[track.Track.qualitative_fields.index(f)] for f in case["fn"].fields] for d in case["input"]] ), case["expected"], )
def track_cut_down(request, track): regions = request['selected_regions'] if not regions: #--- NO SELECTION ---# if not request['per_chromosome']: yield gmSubtrack(request, track, {'type': 'all'}) else: for chr in track.chrs: yield gmSubtrack(request, track, {'type': 'chr', 'chr': chr}) elif type(regions) == list: #--- STRING SELECTION ---# if not request['per_chromosome']: yield gmSubtrack(request, track, {'type': 'regions', 'regions': regions}, False) if request['compare_parents']: yield gmSubtrack(request, track, {'type': 'all'}, True) else: for chr in track.chrs: subregions = [subr for subr in regions if subr['chr'] == chr] if subregions == []: continue yield gmSubtrack(request, track, {'type': 'regions', 'regions': subregions}, False) if request['compare_parents']: yield gmSubtrack(request, track, {'type': 'chr', 'chr': chr}, True) else: #--- TRACK SELECTION ---# if not request['per_chromosome']: yield gmSubtrack(request, track, {'type': 'track', 'track': request['selected_regions']}, False) if request['compare_parents']: yield gmSubtrack(request, track, {'type': 'all'}, True) else: with load(request['selected_regions'], readonly=True) as t: for chr in track.chrs: if chr not in t: continue yield gmSubtrack(request, track, {'type': 'trackchr', 'chr': chr}, False) if request['compare_parents']: yield gmSubtrack(request, track, {'type': 'chr', 'chr': chr}, True)
def run(**request): # Import the correct operation # if not hasattr(operations, request['operation_type']): try: __import__('gMiner.operations.' + request['operation_type']) except ImportError as err: raise Exception("The operation " + request['operation_type'] + " could not be imported because: " + str(err)) run_op = getattr(operations, request['operation_type']).run # Mandatory request variables # if not request.get('output_location'): raise Exception("There does not seem to be an output location specified in the request.") output_dir = request['output_location'].rstrip('/') if not os.path.isdir(output_dir): raise Exception("The output location '" + output_dir + "' specified is not a directory.") # Optional request variables # request['selected_regions'] = request.get('selected_regions', '') parse_regions(request) request['wanted_chromosomes'] = request.get('wanted_chromosomes', '') parse_chrlist(request) # Prepare the tracks # track_dicts = parse_tracks(request) contexts = [track.load(t['path'], name=t['name'], chrmeta=t.get('chrs'), readonly=True) for t in track_dicts] with nested(*contexts) as tracks: # Assign numbers # for i, t in enumerate(tracks): t.number = i # Determine final chromosome list # if request['wanted_chromosomes']: for t in tracks: t.chrs = (set(t.all_chrs) & set(request['wanted_chromosomes'])) else: for t in tracks: t.chrs = t.all_chrs # Run it # return run_op(request, tracks, output_dir)
def create_bins(X, num_of_bins=10): for x in X: length = (x[1] - x[0]) / num_of_bins for i in xrange(num_of_bins): yield (x[0]+i*length, x[0]+(i+1)*length, x[2], x[3], x[4]) from bbcflib import track from gMiner.operations.genomic_manip.scores import mean_score_by_feature manip = mean_score_by_feature() with track.load('/scratch/genomic/tracks/pol2.sql') as a: with track.load('/scratch/genomic/tracks/ribosome_proteins.sql') as b: with track.new('/tmp/manual.sql') as r: for chrom in a: r.write(chrom, manip(a.read(chrom), create_bins(b.read(chrom)))) r.meta_chr = a.meta_chr r.meta_track = {'datatype': 'qualitative', 'name': 'Mean score per bin', 'created_by': 'gMiner example script'}
def run_request(case, t): files = gMiner.run(**t['kwargs']) with track.load(files[0], chrmeta=yeast_chr_file) as x: data = list(x.read('chr1')) os.remove(files[0]) case.assertEqual(data, t['expected'])