Пример #1
0
    def create_merged_seq(self):
        """Writes merged target-ordered query sequence"""
        align = pd.read_table(os.path.join(self.out_dir, "longest_segments.txt"))
        align.sort(columns=["target_name", "target_final_start"], inplace=True)

        out = open(os.path.join(self.out_dir, "merged_seq.fa"), "w")

        curr_target_name = ""
        i = 0
        start = 0
        for row in align.iterrows():
            query = row[1]['query_name']
            target = row[1]['target_name']
            fa = FastaHack(self.query_fas[query])

            if curr_target_name != target:
                if i > 0:
                    out.write("\n")
                    i = 1
                out.write(">{0}\n".format(target))
                curr_target_name = target

            fasub = fa.get_sequence(query)
            if row[1]['query_strand'] == "-":
                fasub = futil.reverse_complement(fasub)
            fasub1, start = futil.format_fasta(fasub, start)
            [out.write(x) for x in fasub1]

            ns = "N" * 1000
            ns1, start = futil.format_fasta(ns, start)
            [out.write(x) for x in ns1]

        out.write("\n")
Пример #2
0
def main(argv):

    infile = open(argv[1])
    outfile = open(os.path.splitext(argv[1])[0] + ".fa", 'w', -1)

    pos = 0
    curr_target_name = ""
    start = 0
    a_pattern = re.compile("^a")
    s_pattern = re.compile("^s")
    for line in infile:
        if s_pattern.search(line):
            sline = line.split()
            if pos == 0:
                # target
                pos = 1
                target = sline[1]
                if curr_target_name != target:
                    if curr_target_name != "":
                        outfile.write("\n")
                        start = 0
                    outfile.write(">{0}\n".format(target))
                    curr_target_name = target
            elif pos == 1:
                # query
                pos = 0
                seq = sline[6].translate(None, "-")
                if sline[4] == "-":
                    seq = futil.reverse_complement(seq)
                start = futil.write_segment(seq, start, outfile)
    outfile.write("\n")