示例#1
0
def realign_file(args, model, output_filename, alignment_filename):
    # begin of HACK
    if args.expand_model:
        old_tracks = args.tracks
        args.tracks.add('trf_cons')
    m = model
    if args.annotation_model:
        m = args.annotation_model
    annotations = compute_annotations(args, alignment_filename, m)
    if args.expand_model:
        consensuses = annotations['trf_cons']
        args.tracks = old_tracks
        if 'trf_cons' not in old_tracks:
            del args.tracks['trf_cons']
    # end of HACK
    with Open(output_filename, 'w') as output_file_object:
        for aln in Fasta.load(
            alignment_filename, 
            args.alignment_regexp, 
            Alignment, 
            sequence_selectors=args.sequence_regexp):
            if len(aln.sequences) < 2:
                sys.stderr.write("ERROR: not enough sequences in file\n")
                return 1
            if len(args.draw) == 0:
                drawer = brainwash(AlignmentCanvas)()
            else:
                drawer = AlignmentCanvas()
                drawer.add_original_alignment(aln)
            aln, unmask_repeats = args.mask_repeats(aln, annotations)
            seq1, seq2 = tuple(map(Fasta.alnToSeq, aln.sequences[:2]))
            perf.msg("Data loaded in {time} seconds.")
            perf.replace()
            if args.expand_model:
                # Potrebujem zistit konsenzy
                A = consensuses[aln.names[0]]
                B = consensuses[aln.names[1]]
                cons = list(A.union(B))
                real_model = model.expandModel({'consensus': cons})
            else: 
                real_model = model
            realigner = args.algorithm()
            realigner.setDrawer(drawer)
            realigner.prepareData(seq1, aln.names[0], seq2, aln.names[1], aln, 
                                  real_model, annotations, args)
                                                              
            aln = realigner.realign(0, len(seq1), 0, len(seq2))
            aln = unmask_repeats(aln)
            perf.msg("Sequence was realigned in {time} seconds.")
            perf.replace()
            if len(args.draw) > 0:
                drawer.add_sequence('X', seq1)
                drawer.add_sequence('Y', seq2)
                drawer.add_alignment_line(101, (255, 0, 255, 255), 2, 
                                          AlignmentPositionGenerator(
                                              Alignment([aln[0], aln[2]])))
                drawer.draw(args.draw, 2000, 2000)
                perf.msg("Image was drawn in {time} seconds.")
            # Save output_file
            Fasta.saveAlignmentPiece(aln, output_file_object)
示例#2
0
def main(files_filename, output_filename, suffix, base_dir):
    X = ""
    Y = ""
    A = ""
    with Open(output_filename, "w") as ff:
        files = json.load(Open(files_filename))
        total = len(files)
        done = 0
        for filename in files:
            if done % 100 == 0:
                print "{}/{} {:.2}%".format(done, total, 100.0 * done / total)
            if filename == "":
                Fasta.saveAlignmentPiece([(X_name, X), (Y_name, Y), (A_name, A)], ff)
                X = ""
                Y = ""
                A = ""
                continue
            done += 1
            old_filename = filename
            keep = False

            if filename.count("keep") == 0:
                filename = filename[:-2] + suffix
                if base_dir != None:
                    filename = base_dir + "/" + filename.split("/")[-1]
                try:
                    with Open(filename, "r") as f:
                        l = len("".join(f).strip())
                    if l == 0:
                        filename = old_filename
                        keep = True
                except IOError:
                    filename = old_filename
                    keep = True
            if filename.count("keep") > 0:
                keep = True
            aln = list(Fasta.load(filename, ""))[0]
            assert len(aln) == 3
            assert len(aln[0][1]) == len(aln[1][1]) == len(aln[2][1])
            X += aln[0][1]
            if keep:
                A += "." * len(aln[0][1])
            else:
                A += aln[1][1]
            Y += aln[2][1]
            X_name = aln[0][0]
            A_name = aln[1][0]
            Y_name = aln[2][0]
示例#3
0
def main(files_filename, output_filename, suffix, base_dir):
    X = ""
    Y = ""
    A = ""
    with Open(output_filename, 'w') as ff:
        files = json.load(Open(files_filename))
        total = len(files)
        done = 0
        for filename in files:
            if done %100 ==0:
                print '{}/{} {:.2}%'.format(done, total, 100.0 * done / total)
            if filename == "":
                Fasta.saveAlignmentPiece([(X_name, X), (Y_name, Y), (A_name, A)], ff)
                X = ""
                Y = ""
                A = ""
                continue
            done += 1
            old_filename = filename
            keep = False
            
            if filename.count('keep') == 0:
                filename = filename[:-2] + suffix
                if base_dir != None:
                    filename = base_dir + '/' + filename.split('/')[-1]
                try:
                    with Open(filename, 'r') as f:
                        l = len(''.join(f).strip())
                    if l == 0:
                        filename = old_filename
                        keep = True
                except IOError:
                    filename = old_filename
                    keep = True
            if filename.count('keep') > 0:
                keep = True
            aln = list(Fasta.load(filename, ''))[0]
            assert(len(aln) == 3)
            assert(len(aln[0][1]) == len(aln[1][1]) == len(aln[2][1]))
            X += aln[0][1]
            if keep:
                A += '.' * len(aln[0][1])
            else: 
                A += aln[1][1]
            Y += aln[2][1]
            X_name = aln[0][0]
            A_name = aln[1][0]
            Y_name = aln[2][0]
示例#4
0
def main(input_file, output_file):

    for trf_executable in trf_paths:
        if os.path.exists(trf_executable):
            trf = TRFDriver(trf_executable)
            #break
    if not trf:
        raise "No trf found"
    repeats = trf.run(input_file)

    with open(output_file, 'w') as f:
        for alignment in Fasta.load(input_file, '\.[0-9]*$', Alignment):
            if len(alignment.sequences) != 2:
                print 'error'
                continue
            #print alignment.names
            annotation = list('.' * len(alignment.sequences[0]))
            annotationX = list('.' * len(alignment.sequences[0]))
            annotationY = list('.' * len(alignment.sequences[0]))
            trf = None
            for seq_name in alignment.names:
                index = None
                for i in range(len(alignment.names)):
                    if seq_name == alignment.names[i]:
                        index = i
                translator = alignment.seq_to_aln[index]
                revtranslator = alignment.aln_to_seq[index]
                for repeat in repeats[seq_name]:
                    for i in range(translator[repeat.start], translator[repeat.end]):
                        annotation[i] = 'R'
                        j = i - translator[repeat.start]
                        if index == 0:
                            annotationX[i] = repeat.consensus[revtranslator[j] % len(repeat.consensus)]
                        else:
                            annotationY[i] = repeat.consensus[revtranslator[j] % len(repeat.consensus)]
            d = defaultdict(int)
            ll = 0
            for v in annotation:
                if v != 'R':
                    if ll > 0:
                        d[ll] += 1
                        ll = 0
                else:  
                    ll += 1
            #for x, y in sorted(d.iteritems(), key=lambda x: x[1]):
            #    print '{}: {}'.format(x, y)
            #if len(d.keys()) > 0:
            #    print('Number of repeats: {}, average length: {}, maximum length: {}, minimum length: {}'.format(
            #        sum(d.values()),
            #        sum([x * y for x, y in d.iteritems()])/ max(sum(d.values()), 1),
            #        max(d.keys()),
            #        min(d.keys())
            #    ))

            seqX = alignment.sequences

            nm = alignment.names[0]
            aln = [(alignment.names[0], alignment.sequences[0].replace('.', '-')), 
                   ('consensusX' + nm, ''.join(annotationX)),
                   ('annotation' + nm, ''.join(annotation)),
                   ('consensusY' + nm, ''.join(annotationY)),
                   (alignment.names[1], alignment.sequences[1].replace('.','-'))]
            Fasta.saveAlignmentPiece(aln, f, -1)
示例#5
0
def main(input_file, output_file):

    for trf_executable in trf_paths:
        if os.path.exists(trf_executable):
            trf = TRFDriver(trf_executable)
            #break
    if not trf:
        raise "No trf found"
    repeats = trf.run(input_file)

    with open(output_file, 'w') as f:
        for alignment in Fasta.load(input_file, '\.[0-9]*$', Alignment):
            if len(alignment.sequences) != 2:
                print 'error'
                continue
            #print alignment.names
            annotation = list('.' * len(alignment.sequences[0]))
            annotationX = list('.' * len(alignment.sequences[0]))
            annotationY = list('.' * len(alignment.sequences[0]))
            trf = None
            for seq_name in alignment.names:
                index = None
                for i in range(len(alignment.names)):
                    if seq_name == alignment.names[i]:
                        index = i
                translator = alignment.seq_to_aln[index]
                revtranslator = alignment.aln_to_seq[index]
                for repeat in repeats[seq_name]:
                    for i in range(translator[repeat.start],
                                   translator[repeat.end]):
                        annotation[i] = 'R'
                        j = i - translator[repeat.start]
                        if index == 0:
                            annotationX[i] = repeat.consensus[
                                revtranslator[j] % len(repeat.consensus)]
                        else:
                            annotationY[i] = repeat.consensus[
                                revtranslator[j] % len(repeat.consensus)]
            d = defaultdict(int)
            ll = 0
            for v in annotation:
                if v != 'R':
                    if ll > 0:
                        d[ll] += 1
                        ll = 0
                else:
                    ll += 1
            #for x, y in sorted(d.iteritems(), key=lambda x: x[1]):
            #    print '{}: {}'.format(x, y)
            #if len(d.keys()) > 0:
            #    print('Number of repeats: {}, average length: {}, maximum length: {}, minimum length: {}'.format(
            #        sum(d.values()),
            #        sum([x * y for x, y in d.iteritems()])/ max(sum(d.values()), 1),
            #        max(d.keys()),
            #        min(d.keys())
            #    ))

            seqX = alignment.sequences

            nm = alignment.names[0]
            aln = [
                (alignment.names[0], alignment.sequences[0].replace('.', '-')),
                ('consensusX' + nm, ''.join(annotationX)),
                ('annotation' + nm, ''.join(annotation)),
                ('consensusY' + nm, ''.join(annotationY)),
                (alignment.names[1], alignment.sequences[1].replace('.', '-'))
            ]
            Fasta.saveAlignmentPiece(aln, f, -1)