Пример #1
0
def makeclust(ID, datatype, WORK):

    " load tier 2 hits (names,direction) into a Dic with seeds as keys"
    Uin = open(WORK+"prefix/cat.u")
    Fseeds = {}    
    for line in [line.split("\t") for line in Uin.readlines()]:
        if line[1] not in Fseeds:
            Fseeds[line[1]] = [(line[0],line[4])]
        else:
            Fseeds[line[1]].append((line[0],line[4]))
    Uin.close()


    " load tier 1 hits (names,direction) into a Dictionary with seeds as keys"
    FS = glob.glob(WORK+"prefix/cat.u_*")
    Useeds = {}
    for f in FS:
        infile = open(f)
        for line in [line.split("\t") for line in infile.readlines()]:
            if line[1] not in Useeds:
                Useeds[line[1]] = [(line[0],line[4])]
            else:
                Useeds[line[1]].append((line[0],line[4]))
        infile.close()


    " Make one dictionary with combining Fseeds and Useeds matching to Fseeds"
    D = {}
    for seed in Fseeds:
        # add matches to seed to D[seed]
        Fhits = Useeds.get(seed)
        # add matches to hits to seed to D[seed]
        Mhits = []
        for hit in Fseeds[seed]:
            Mhits.append(hit)
            ugh = Useeds.get(hit[0])
            if ugh:
                if hit[1] == "-":
                    if len(ugh) == 1:
                        Mhits += [(ugh[0][0],flip(ugh[0][1]))]
                    elif len(ugh) > 1:
                        for child in ugh:
                            Mhits += [(child[0], flip(child[1]))]
                else:
                    Mhits += ugh
        if Fhits:
            D[(seed,'s')] = Fhits+Mhits
        else:
            D[(seed,'s')] = Mhits
    

    " load seeds of tier 2 into D and set its Useed hits"
    f = open(WORK+"prefix/cat._tempU")
    lines = f.readlines()
    for line in lines:
        if ">" in line:
            if (line.strip()[1:],'s') not in D:
                if Useeds.get(line.strip()[1:]):
                    D[(line.strip()[1:],'s')] = Useeds.get(line.strip()[1:])
    f.close()

    " load .consens files into Dics "
    FS = glob.glob(WORK+"clust"+ID+"/cat.consens_*.gz")
    Seqs = {}
    for f in FS:
        with gzip.open(f) as ff:
            k = itertools.izip(*[iter(ff)]*2)
            while 1:
                try: a = k.next()
                except StopIteration: break
                Seqs[a[0].strip()] = a[1].strip()
    

    " write clust file "
    outfile = gzip.open(WORK+"prefix/cat.clust_.gz", 'w')
    for i in D:
        thisclust = []
        outfile.write(">"+i[0]+'\n'+Seqs[">"+i[0]].upper()+'\n')
        thisclust.append(">"+i[0]+'\n'+Seqs[">"+i[0]].upper())
        for m in D[i]:
            if ">"+m[0]+'\n'+Seqs[">"+m[0]].upper() not in thisclust:
                if m[1] == "-":
                    outfile.write(">"+m[0]+'\n'+comp(Seqs[">"+m[0]].upper())[::-1]+'\n')
                    thisclust.append(">"+m[0]+'\n'+comp(Seqs[">"+m[0]].upper())[::-1])
                else:
                    outfile.write(">"+m[0]+'\n'+Seqs[">"+m[0]].upper()+'\n')
                    thisclust.append(">"+m[0]+'\n'+Seqs[">"+m[0]].upper())
        outfile.write("//\n")
    outfile.close()
Пример #2
0
def makeclust(ID, datatype, WORK):

    " load tier 2 hits (names,direction) into a Dic with seeds as keys"
    Uin = open(WORK + "prefix/cat.u")
    Fseeds = {}
    for line in [line.split("\t") for line in Uin.readlines()]:
        if line[1] not in Fseeds:
            Fseeds[line[1]] = [(line[0], line[4])]
        else:
            Fseeds[line[1]].append((line[0], line[4]))
    Uin.close()

    " load tier 1 hits (names,direction) into a Dictionary with seeds as keys"
    FS = glob.glob(WORK + "prefix/cat.u_*")
    Useeds = {}
    for f in FS:
        infile = open(f)
        for line in [line.split("\t") for line in infile.readlines()]:
            if line[1] not in Useeds:
                Useeds[line[1]] = [(line[0], line[4])]
            else:
                Useeds[line[1]].append((line[0], line[4]))
        infile.close()

    " Make one dictionary with combining Fseeds and Useeds matching to Fseeds"
    D = {}
    for seed in Fseeds:
        # add matches to seed to D[seed]
        Fhits = Useeds.get(seed)
        # add matches to hits to seed to D[seed]
        Mhits = []
        for hit in Fseeds[seed]:
            Mhits.append(hit)
            ugh = Useeds.get(hit[0])
            if ugh:
                if hit[1] == "-":
                    if len(ugh) == 1:
                        Mhits += [(ugh[0][0], flip(ugh[0][1]))]
                    elif len(ugh) > 1:
                        for child in ugh:
                            Mhits += [(child[0], flip(child[1]))]
                else:
                    Mhits += ugh
        if Fhits:
            D[(seed, 's')] = Fhits + Mhits
        else:
            D[(seed, 's')] = Mhits

    " load seeds of tier 2 into D and set its Useed hits"
    f = open(WORK + "prefix/cat._tempU")
    lines = f.readlines()
    for line in lines:
        if ">" in line:
            if (line.strip()[1:], 's') not in D:
                if Useeds.get(line.strip()[1:]):
                    D[(line.strip()[1:], 's')] = Useeds.get(line.strip()[1:])
    f.close()

    " load .consens files into Dics "
    FS = glob.glob(WORK + "clust" + ID + "/cat.consens_*.gz")
    Seqs = {}
    for f in FS:
        with gzip.open(f) as ff:
            k = itertools.izip(*[iter(ff)] * 2)
            while 1:
                try:
                    a = k.next()
                except StopIteration:
                    break
                Seqs[a[0].strip()] = a[1].strip()

    " write clust file "
    outfile = gzip.open(WORK + "prefix/cat.clust_.gz", 'w')
    for i in D:
        thisclust = []
        outfile.write(">" + i[0] + '\n' + Seqs[">" + i[0]].upper() + '\n')
        thisclust.append(">" + i[0] + '\n' + Seqs[">" + i[0]].upper())
        for m in D[i]:
            if ">" + m[0] + '\n' + Seqs[">" + m[0]].upper() not in thisclust:
                if m[1] == "-":
                    outfile.write(">" + m[0] + '\n' +
                                  comp(Seqs[">" + m[0]].upper())[::-1] + '\n')
                    thisclust.append(">" + m[0] + '\n' +
                                     comp(Seqs[">" + m[0]].upper())[::-1])
                else:
                    outfile.write(">" + m[0] + '\n' +
                                  Seqs[">" + m[0]].upper() + '\n')
                    thisclust.append(">" + m[0] + '\n' +
                                     Seqs[">" + m[0]].upper())
        outfile.write("//\n")
    outfile.close()
Пример #3
0
def rawedit(WORK, infile, CUT, pN, trimkeep, strict, Q, datatype):
    """ three functions:
    (1) replaces low quality base calls with Ns,
    (2) checks for adapter sequence if strict set to 1 or 2 """

    if "," in CUT:
        CUT1, CUT2 = CUT.split(',')
    else:
        CUT1 = CUT2 = CUT

    if ".gz" in infile:
        f = gzip.open(infile, 'r')
    else:
        f = open(infile, 'r')
    n = str(infile.split('/')[-1]).replace("_R1.", ".")
    while n.split(".")[-1] in ["fastq", "fastQ", "gz", "fq", "FastQ"]:
        n = n.replace('.' + n.split(".")[-1], "")
    k = itertools.izip(*[iter(f)] * 4)
    writing_r = []
    writing_c = []

    orig = keep = keepcut = 0
    handle = WORK + 'edits/' + str(n) + ".edit"

    while 1:
        try:
            d = k.next()
        except StopIteration:
            break
        orig += 1
        SS = d[1].strip()

        ph = map(ord, d[3].strip('\n'))
        offset = int(Q)
        phred = map(lambda x: x - offset, ph)
        seq = ["N"] * len(phred)
        for base in range(len(phred)):
            if base >= len(CUT1):  ## don't quality check cut site
                if phred[base] >= 20:  ## quality threshold
                    try:
                        seq[base] = SS[base]
                    except IndexError:
                        None
                else:
                    seq[base] = "N"
            else:
                if unambar(CUT1):
                    seq[base] = unambar(CUT1)[0][base]
                else:
                    seq[base] = CUT1[base]
                #try: seq[base] = SS[base]
                #except IndexError:
                #    None

        if not orig % 5000:
            if trimkeep:
                " write full length and fragment reads "
                with open(WORK + 'edits/' + str(n) + ".edit", 'a') as outfile:
                    outfile.write("".join([z for z in writing_r]))
                    outfile.write("".join([z for z in writing_c]))
            else:
                " write only full length reads "
                with open(WORK + 'edits/' + str(n) + ".edit", 'a') as outfile:
                    outfile.write("".join([z for z in writing_r]))
            writing_r = []
            writing_c = []

        s = "".join(seq)
        wheretocut1 = None
        if strict:
            wheretocut1 = Afilter(comp(CUT2)[::-1], s, strict)
            s = s[:wheretocut1]

        if datatype == 'merged':
            " remove extra forward base so forwards match reverse length"
            s = s[:-1]

        if s.count("N") <= pN:  ## max allowed Ns
            if len(s) >= max(
                    32, trimkeep):  ## if read is trimmed, must be minlen long
                if wheretocut1:  ## if it was trimmed...
                    writing_c.append(">" + n + "_" + str(keepcut) + "_c1" +
                                     "\n" + s + "\n")
                    keepcut += 1
                else:
                    writing_r.append(">" + n + "_" + str(keep) + "_r1" + "\n" +
                                     s + "\n")
                    keep += 1

    if trimkeep:
        with open(WORK + 'edits/' + str(n) + ".edit", 'a') as outfile:
            outfile.write("".join([z for z in writing_r]))
            outfile.write("".join([z for z in writing_c]))
    else:
        with open(WORK + 'edits/' + str(n) + ".edit", 'a') as outfile:
            outfile.write("".join([z for z in writing_r]))
    writing_r = []
    writing_c = []

    f.close()
    sys.stderr.write(".")
    if not trimkeep:
        keepcut = 0
    return [
        handle.split("/")[-1].replace(".edit", ""),
        str(orig),
        str(keep),
        str(keepcut)
    ]
Пример #4
0
def rawedit(WORK, infile, CUT, pN, trimkeep, strict, Q, datatype):
    """ three functions:
    (1) replaces low quality base calls with Ns,
    (2) checks for adapter sequence if strict set to 1 or 2 """

    if "," in CUT:
        CUT1,CUT2 = CUT.split(',')
    else:
        CUT1=CUT2=CUT
        
    if ".gz" in infile:
        f = gzip.open(infile, 'r')
    else:
        f = open(infile,'r')
    n = str(infile.split('/')[-1]).replace("_R1.",".")
    while n.split(".")[-1] in ["fastq","fastQ","gz","fq","FastQ"]:
        n = n.replace('.'+n.split(".")[-1], "")
    k = itertools.izip(*[iter(f)]*4)
    writing_r = []
    writing_c = []

    orig = keep = keepcut = 0
    handle = WORK+'edits/'+str(n)+".edit"

    while 1:
        try: d = k.next()
        except StopIteration: break
        orig += 1 
        SS = d[1].strip()
    
        ph = map(ord,d[3].strip('\n'))      
        offset = int(Q) 
        phred = map(lambda x:x-offset,ph)
        seq = ["N"]*len(phred)
        for base in range(len(phred)):
            if base >= len(CUT1):              ## don't quality check cut site
                if phred[base] >= 20:         ## quality threshold
                    try: seq[base] = SS[base]
                    except IndexError:
                        None
                else:
                    seq[base] = "N"
            else:
                if unambar(CUT1):
                    seq[base] = unambar(CUT1)[0][base]
                else:
                    seq[base] = CUT1[base]
                #try: seq[base] = SS[base]
                #except IndexError:
                #    None
            
        if not orig % 5000:
            if trimkeep:
                " write full length and fragment reads "
                with open(WORK+'edits/'+str(n)+".edit",'a') as outfile:
                    outfile.write("".join([z for z in writing_r]))
                    outfile.write("".join([z for z in writing_c]))
            else:
                " write only full length reads "
                with open(WORK+'edits/'+str(n)+".edit",'a') as outfile:
                    outfile.write("".join([z for z in writing_r]))
            writing_r = []
            writing_c = []

        s = "".join(seq)
        wheretocut1 = None
        if strict:
            wheretocut1 = Afilter(comp(CUT2)[::-1],s,strict)
            s = s[:wheretocut1]

        if datatype == 'merged':
            " remove extra forward base so forwards match reverse length"
            s = s[:-1]

        if s.count("N") <= pN:             ## max allowed Ns
            if len(s) >= max(32,trimkeep): ## if read is trimmed, must be minlen long
                if wheretocut1:            ## if it was trimmed...
                    writing_c.append(">"+n+"_"+str(keepcut)+"_c1"+"\n"+s+"\n")
                    keepcut += 1
                else:
                    writing_r.append(">"+n+"_"+str(keep)+"_r1"+"\n"+s+"\n")
                    keep += 1

    if trimkeep:
        with open(WORK+'edits/'+str(n)+".edit",'a') as outfile:
            outfile.write("".join([z for z in writing_r]))
            outfile.write("".join([z for z in writing_c]))
    else:
        with open(WORK+'edits/'+str(n)+".edit",'a') as outfile:
            outfile.write("".join([z for z in writing_r]))
    writing_r = []
    writing_c = []

    f.close()
    sys.stderr.write(".")
    if not trimkeep:
        keepcut = 0
    return [handle.split("/")[-1].replace(".edit",""),str(orig),str(keep),str(keepcut)]
Пример #5
0
def rawedit(WORK, infile, CUT, pN, trimkeep, strict, Q):
    """ three functions:
    (1) replaces low quality base calls with Ns,
    (2) checks for adapter sequence if strict set to 1 or 2 """

    if "," in CUT:
        CUT1, CUT2 = CUT.split(',')
    else:
        CUT1 = CUT2 = CUT

    if ".gz" in infile:
        f = gzip.open(infile, 'r')
    else:
        f = open(infile, 'r')

    " remove name suffix"
    n = str(infile.split('/')[-1]).replace("_R1.", ".")
    while n.split(".")[-1] in ["fastq", "fastQ", "gz", "fq", "FastQ"]:
        n = n.replace('.' + n.split(".")[-1], "")

    " read infile 4 lines at a time, setup counters and lists"
    k = itertools.izip(*[iter(f)] * 4)
    writing_r = []
    writing_c = []
    orig = keep = keepcut = 0
    handle = WORK + 'edits/' + str(n) + ".edit"

    " do a test run on first 1000 reads to find if extra bases on right end of reads"
    rightend = []
    while len(rightend) < 1000:
        try:
            d = k.next()
        except StopIteration:
            break
        s = "".join(d[1].strip())

        " cutters "
        find1 = CUT1
        find2 = comp(CUT2)[::-1]

        " are cutters found on both ends? A correct merge"
        a = s[:len(find1)]
        b = s[-len(find2) - 2:]  ## w/ wiggle room
        if (find1 in a) and (find2 in b):
            xtra = s.rindex(find2) + len(find2)
            rightend.append(len(s) - xtra)

    " find most common element in rightend "
    if rightend:
        a = most_common(rightend)
        if a > 3:
            Roffset = 0
        else:
            Roffset = a
    else:
        Roffset = 0

    " reset iterable "
    if ".gz" in infile:
        f = gzip.open(infile, 'r')
    else:
        f = open(infile, 'r')
    k = itertools.izip(*[iter(f)] * 4)

    " iterate over each read "
    while 1:
        try:
            d = k.next()
        except StopIteration:
            break
        orig += 1
        SS = d[1].strip()

        " apply Phred Q filter "
        ph = map(ord, d[3].strip('\n'))
        offset = int(Q)
        phred = map(lambda x: x - offset, ph)
        seq = ["N"] * len(phred)
        for base in range(len(phred)):
            "don't quality check cut sites "
            if (base >= len(CUT1)) and (base < len(phred) - len(CUT2)):
                if phred[base] >= 20:
                    try:
                        seq[base] = SS[base]
                    except IndexError:
                        None
                else:
                    seq[base] = "N"
            else:
                try:
                    seq[base] = SS[base]
                except IndexError:
                    None

        " write to file "
        if not orig % 5000:
            with open(WORK + 'edits/' + str(n) + ".edit", 'a') as outfile:
                outfile.write("".join([z for z in writing_r]))
            writing_r = []

        s = "".join(seq)

        wheretocut = [None, None, None]
        " filter for N"
        if s.count("N") <= pN:

            " apply filter for Adapters "
            find1 = CUT1
            find2 = comp(CUT2)[::-1]

            if "trim" in d[0]:
                " filters for non-merged, trimmed reads from s2 "
                if (find1 in s[:len(find1)]) or (find2 in s[len(find2) - 2:]):
                    None
                else:
                    " CUT1 rarely missing, CUT2 sometimes missing"
                    s = s[:-len(CUT2) - Roffset]

            else:
                " merged reads. Are cutters found on both ends? A correct merge"
                a = s[:len(find1)]
                b = s[-len(find2) - 2:]  ## w/ wiggle room
                if (find1 in a) and (find2 in b):
                    " find end of read2 "
                    xtra = s.rindex(find2) + len(find2)
                    wheretocut = [None, len(s) - Roffset, 'complete']
                else:
                    " look for CUT2 from right side "
                    if find2 in s[
                            len(s) /
                            2:]:  ## check that this is a good general number...
                        a = s.rindex(find2) + len(find2)
                        wheretocut = [None, a, 'find2 in s']
                    else:
                        "couldn't find cut2, maybe has error, look for adapter"
                        if 'AGATCG' in s:
                            a = s.rindex('AGATCG') - len(CUT2)
                            wheretocut = [None, a, 'AGATCG in s']
                        else:
                            if "TCGGAAG" in s:
                                a = s.rindex('TCGGAAG') - len(CUT2) - 3
                                wheretocut = [None, a, 'TCGGAAG in s']
                            else:
                                "no sign of overshoot to right --->"
                                " look for overshoot on left <---- "
                                wheretocut = [None, len(s) - Roffset, "None"]

                    " look for CUT1 from left side "
                    if CUT1 in s:
                        a = s.index(CUT1)
                        wheretocut[0] = a
                    else:
                        "exclude read"
                        wheretocut[0] = wheretocut[1]

            w1, w2, reason = wheretocut
            if len(s[w1:w2]) > trimkeep:
                #print s[w1:w2], reason, len(s[w1:w2]), trimkeep
                s = s[w1:w2]
            else:
                s = ""

            if len(s) >= max(
                    36, trimkeep):  ## if read is trimmed, must be minlen long
                writing_r.append(">" + n + "_" + str(keep) + "_r1" + "\n" + s +
                                 "\n")
                keep += 1

    with open(WORK + 'edits/' + str(n) + ".edit", 'a') as outfile:
        outfile.write("".join([z for z in writing_r]))
    writing_r = []

    f.close()
    sys.stderr.write(".")
    if not trimkeep:
        keepcut = 0
    return [
        handle.split("/")[-1].replace(".edit", ""),
        str(orig),
        str(keep),
        str(keepcut)
    ]
Пример #6
0
def rawedit(WORK, infile, CUT, pN, trimkeep, strict, Q):
    """ three functions:
    (1) replaces low quality base calls with Ns,
    (2) checks for adapter sequence if strict set to 1 or 2 """

    if "," in CUT:
        CUT1,CUT2 = CUT.split(',')
    else:
        CUT1=CUT2=CUT
        
    if ".gz" in infile:
        f = gzip.open(infile, 'r')
    else:
        f = open(infile,'r')

    " remove name suffix"
    n = str(infile.split('/')[-1]).replace("_R1.",".")
    while n.split(".")[-1] in ["fastq","fastQ","gz","fq","FastQ"]:
        n = n.replace('.'+n.split(".")[-1], "")

    " read infile 4 lines at a time, setup counters and lists"
    k = itertools.izip(*[iter(f)]*4)
    writing_r = []
    writing_c = []
    orig = keep = keepcut = 0
    handle = WORK+'edits/'+str(n)+".edit"


    " do a test run on first 1000 reads to find if extra bases on right end of reads"
    rightend = []
    while len(rightend) < 1000:
        try: d = k.next()
        except StopIteration: break
        s = "".join(d[1].strip())
        
        " cutters "
        find1 = CUT1
        find2 = comp(CUT2)[::-1]

        " are cutters found on both ends? A correct merge"
        a = s[:len(find1)]
        b = s[-len(find2)-2:]  ## w/ wiggle room
        if (find1 in a) and (find2 in b) :
            xtra = s.rindex(find2)+len(find2)
            rightend.append(len(s)-xtra)
            
    " find most common element in rightend "
    if rightend:
        a = most_common(rightend)
        if a>3:
            Roffset = 0
        else:
            Roffset = a
    else:
        Roffset = 0

    " reset iterable "
    if ".gz" in infile:
        f = gzip.open(infile, 'r')
    else:
        f = open(infile,'r')
    k = itertools.izip(*[iter(f)]*4)

    " iterate over each read "
    while 1:
        try: d = k.next()
        except StopIteration: break
        orig += 1 
        SS = d[1].strip()
    
        " apply Phred Q filter "
        ph = map(ord,d[3].strip('\n'))      
        offset = int(Q) 
        phred = map(lambda x:x-offset,ph)
        seq = ["N"]*len(phred)
        for base in range(len(phred)):
            "don't quality check cut sites "
            if (base >= len(CUT1)) and (base < len(phred)-len(CUT2)):
                if phred[base] >= 20:       
                    try: seq[base] = SS[base]
                    except IndexError:
                        None
                else:
                    seq[base] = "N"
            else:
                try: seq[base] = SS[base]
                except IndexError:
                    None

        " write to file "    
        if not orig % 5000:
            with open(WORK+'edits/'+str(n)+".edit",'a') as outfile:
                outfile.write("".join([z for z in writing_r]))
            writing_r = []

        s = "".join(seq)

        wheretocut = [None,None,None]
        " filter for N"
        if s.count("N") <= pN:

            " apply filter for Adapters "
            find1 = CUT1
            find2 = comp(CUT2)[::-1]

            if "trim" in d[0]:
                " filters for non-merged, trimmed reads from s2 "
                if (find1 in s[:len(find1)]) or (find2 in s[len(find2)-2:]):
                    None
                else:
                    " CUT1 rarely missing, CUT2 sometimes missing"
                    s = s[:-len(CUT2)-Roffset]

            else:
                " merged reads. Are cutters found on both ends? A correct merge"
                a = s[:len(find1)]
                b = s[-len(find2)-2:]  ## w/ wiggle room
                if (find1 in a) and (find2 in b) :
                    " find end of read2 "
                    xtra = s.rindex(find2)+len(find2)
                    wheretocut = [None, len(s)-Roffset, 'complete']
                else:
                    " look for CUT2 from right side "
                    if find2 in s[len(s)/2:]:   ## check that this is a good general number...
                        a = s.rindex(find2)+len(find2)
                        wheretocut = [None, a, 'find2 in s']
                    else:
                        "couldn't find cut2, maybe has error, look for adapter"
                        if 'AGATCG' in s:
                            a = s.rindex('AGATCG')-len(CUT2)
                            wheretocut = [None, a, 'AGATCG in s']
                        else:
                            if "TCGGAAG" in s:
                                a = s.rindex('TCGGAAG')-len(CUT2)-3
                                wheretocut = [None, a, 'TCGGAAG in s']
                            else:
                                "no sign of overshoot to right --->"
                                " look for overshoot on left <---- "
                                wheretocut = [None, len(s)-Roffset, "None"]

                    " look for CUT1 from left side "
                    if CUT1 in s:
                        a = s.index(CUT1)
                        wheretocut[0] = a
                    else:
                        "exclude read"
                        wheretocut[0] = wheretocut[1]

            w1,w2,reason = wheretocut
            if len(s[w1:w2]) > trimkeep:
                #print s[w1:w2], reason, len(s[w1:w2]), trimkeep
                s = s[w1:w2]
            else:
                s = ""
                
            if len(s) >= max(36,trimkeep): ## if read is trimmed, must be minlen long
                writing_r.append(">"+n+"_"+str(keep)+"_r1"+"\n"+s+"\n")
                keep += 1

    with open(WORK+'edits/'+str(n)+".edit",'a') as outfile:
        outfile.write("".join([z for z in writing_r]))
    writing_r = []

    f.close()
    sys.stderr.write(".")
    if not trimkeep:
        keepcut = 0
    return [handle.split("/")[-1].replace(".edit",""),str(orig),str(keep),str(keepcut)]