def compute_annotations(args, alignment_filename, model): annotations = dict() if 'trf' in args.tracks: trf = None for trf_executable in args.trf: if os.path.exists(trf_executable): trf = TRFDriver(trf_executable, mathType=args.mathType) #break if trf: repeats = trf.run(alignment_filename) annotations['trf'] = repeats if 'original_repeats' in args.tracks: repeats = json.load(Open(alignment_filename + '.repeats', 'r')) for k, v in repeats.iteritems(): repeats[k] = [Repeat(_v[0], _v[1], _v[2], _v[3], _v[4]) for _v in v] annotations['original_repeats'] = repeats if 'trf_cons' in args.tracks: trf = None for trf_executable in args.trf: if os.path.exists(trf_executable): trf = TRFDriver(trf_executable, mathType=args.mathType) #break if trf: repeats = trf.run(alignment_filename) # repeats = json.load(Open(alignment_filename + '.repeats', # 'r')) # for k, v in repeats.iteritems(): # repeats[k] = [Repeat(_v[0], _v[1], _v[2], _v[3], _v[4]) # for _v in v] annotations['trf_cons'] = {} for seq_name in repeats: cons = set([repeat.consensus for repeat in repeats[seq_name]]) annotations['trf_cons'][seq_name] = cons if 'hmm' in args.tracks: paths = None; if args.trf != None and len(args.trf) > 0: paths = args.trf driver = HMMDriver(paths, args.mathType, model) if driver: repeats = driver.run(alignment_filename) annotations['hmm'] = repeats perf.msg("Hints computed in {time} seconds.") perf.replace() return annotations
def realign_file(args, model, output_filename, alignment_filename): # begin of HACK if args.expand_model: old_tracks = args.tracks args.tracks.add('trf_cons') m = model if args.annotation_model: m = args.annotation_model annotations = compute_annotations(args, alignment_filename, m) if args.expand_model: consensuses = annotations['trf_cons'] args.tracks = old_tracks if 'trf_cons' not in old_tracks: del args.tracks['trf_cons'] # end of HACK with Open(output_filename, 'w') as output_file_object: for aln in Fasta.load( alignment_filename, args.alignment_regexp, Alignment, sequence_selectors=args.sequence_regexp): if len(aln.sequences) < 2: sys.stderr.write("ERROR: not enough sequences in file\n") return 1 if len(args.draw) == 0: drawer = brainwash(AlignmentCanvas)() else: drawer = AlignmentCanvas() drawer.add_original_alignment(aln) aln, unmask_repeats = args.mask_repeats(aln, annotations) seq1, seq2 = tuple(map(Fasta.alnToSeq, aln.sequences[:2])) perf.msg("Data loaded in {time} seconds.") perf.replace() if args.expand_model: # Potrebujem zistit konsenzy A = consensuses[aln.names[0]] B = consensuses[aln.names[1]] cons = list(A.union(B)) real_model = model.expandModel({'consensus': cons}) else: real_model = model realigner = args.algorithm() realigner.setDrawer(drawer) realigner.prepareData(seq1, aln.names[0], seq2, aln.names[1], aln, real_model, annotations, args) aln = realigner.realign(0, len(seq1), 0, len(seq2)) aln = unmask_repeats(aln) perf.msg("Sequence was realigned in {time} seconds.") perf.replace() if len(args.draw) > 0: drawer.add_sequence('X', seq1) drawer.add_sequence('Y', seq2) drawer.add_alignment_line(101, (255, 0, 255, 255), 2, AlignmentPositionGenerator( Alignment([aln[0], aln[2]]))) drawer.draw(args.draw, 2000, 2000) perf.msg("Image was drawn in {time} seconds.") # Save output_file Fasta.saveAlignmentPiece(aln, output_file_object)
def getTable(self, X, x, dx, Y, y, dy, tables, forwardTable=None, backwardTable=None, positionGenerator=None): # Najskor chcem taku, co predpoklada ze obe tabulky su velke # Potom chcem taku, ktora si bude doratavat chybajuce # Potom pridat switch, ktory mi umozni robit optimalizacie. # Ale potom bude treba vediet, ci ist od predu, alebo od zadu # Fetch tables if they are not provided if positionGenerator != None: positionGenerator = list(positionGenerator) perf.push() forwardTable = jcpoint( lambda: forwardTable if forwardTable != None else self.getForwardTableGenerator(X, x, dx, Y, y, dy, positionGenerator=positionGenerator), 'forward_table', self.io_files, self.mathType, ) perf.msg('Forward table was computed in {time} seconds.') perf.replace() backwardTable = jcpoint( lambda: backwardTable if backwardTable != None else self.getBackwardTable(X, x, dx, Y, y, dy, positionGenerator=positionGenerator), 'backward_table', self.io_files, self.mathType, ) perf.msg('Backward table was computed in {time} seconds.') perf.replace() # Sort tables by first element (just in case) sorted(backwardTable,key=lambda (x,_) : x) perf.msg('Tables were sorted in {time} seconds.') perf.replace() # Convert forward table into list #ft = [dict() for _ in range(dx + 1)] #for (i, _x) in forwardTable: # ft[i - x] = _x # Convert backward table into list bt = [dict() for _ in range(dx + 1)] for (i, B) in backwardTable: bt[i] = B perf.msg('Backward table was flattened in {time} seconds.') perf.replace() States = [table(dx, self) for table in tables] index = 0 for i, row in forwardTable: #slice position generator while (index < len(positionGenerator) and positionGenerator[index][0] < i): index += 1 start = index while (index < len(positionGenerator) and positionGenerator[index][0] <= i): index += 1 for table in States: table.processRow(X, x, dx, Y, y, dy, i, row, bt, positionGenerator[start:index]) ret = [table.getData() for table in States] # ret = [table(X, x, dx, Y, y, dy, ft, bt, positionGenerator) # for table in tables] perf.msg('Posterior table was computed in {time} seconds.') perf.pop() return ret
def getTable(self, X, x, dx, Y, y, dy, tables, forwardTable=None, backwardTable=None, positionGenerator=None): # Najskor chcem taku, co predpoklada ze obe tabulky su velke # Potom chcem taku, ktora si bude doratavat chybajuce # Potom pridat switch, ktory mi umozni robit optimalizacie. # Ale potom bude treba vediet, ci ist od predu, alebo od zadu # Fetch tables if they are not provided if positionGenerator != None: positionGenerator = list(positionGenerator) perf.push() forwardTable = jcpoint( lambda: forwardTable if forwardTable != None else self.getForwardTableGenerator( X, x, dx, Y, y, dy, positionGenerator=positionGenerator), 'forward_table', self.io_files, self.mathType, ) perf.msg('Forward table was computed in {time} seconds.') perf.replace() backwardTable = jcpoint( lambda: backwardTable if backwardTable != None else self.getBackwardTable( X, x, dx, Y, y, dy, positionGenerator=positionGenerator), 'backward_table', self.io_files, self.mathType, ) perf.msg('Backward table was computed in {time} seconds.') perf.replace() # Sort tables by first element (just in case) sorted(backwardTable, key=lambda (x, _): x) perf.msg('Tables were sorted in {time} seconds.') perf.replace() # Convert forward table into list #ft = [dict() for _ in range(dx + 1)] #for (i, _x) in forwardTable: # ft[i - x] = _x # Convert backward table into list bt = [dict() for _ in range(dx + 1)] for (i, B) in backwardTable: bt[i] = B perf.msg('Backward table was flattened in {time} seconds.') perf.replace() States = [table(dx, self) for table in tables] index = 0 for i, row in forwardTable: #slice position generator while (index < len(positionGenerator) and positionGenerator[index][0] < i): index += 1 start = index while (index < len(positionGenerator) and positionGenerator[index][0] <= i): index += 1 for table in States: table.processRow(X, x, dx, Y, y, dy, i, row, bt, positionGenerator[start:index]) ret = [table.getData() for table in States] # ret = [table(X, x, dx, Y, y, dy, ft, bt, positionGenerator) # for table in tables] perf.msg('Posterior table was computed in {time} seconds.') perf.pop() return ret