Пример #1
0
    def GetExec(self, optList, frame):
        # Respond to the "muscle" command.
        self.frame = frame
        plugin_exe = r"C:/Program Files (x86)/py27/Lib/site-packages/Muscle.exe"
        self.outfile = r".\plugins\muscle.txt"
        self.outtype = "fasta"
        cline = MuscleCommandline(plugin_exe, out=self.outfile)
        if '1ProfileCheck' in self.frame.paramBoxes:
            if self.frame.paramBoxes['1ProfileCheck'].GetValue():
                cline.profile = True
                cline.in1 = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
                cline.in2 = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
            else:
                cline.input = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
        if '1DiagCheck' in self.frame.paramBoxes:
            if self.frame.paramBoxes['1DiagCheck'].GetValue():
                cline.diags = True
                if "DiagLenSpin" in self.frame.paramBoxes:
                    cline.diaglength = int(
                        self.frame.paramBoxes["DiagLenSpin"])
                if "DiagMargSpin" in self.frame.paramBoxes:
                    cline.diaglength = int(
                        self.frame.paramBoxes["DiagMargSpin"])
                if "DiagBreakSpin" in self.frame.paramBoxes:
                    cline.diaglength = int(
                        self.frame.paramBoxes["DiagBreakSpin"])
            elif "GapPenSpin" in self.frame.paramBoxes:
                cline.gapopen = float(
                    self.frame.paramBoxes["GapPenSpin"].GetValue())
            else:
                cline.input = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
        if self.frame.abet == "AA":
            cline.seqtype = "protein"
        elif self.frame.abet == "DNA" or self.frame.abet == "RNA":
            cline.seqtype = "nucleo"
        else:
            cline.seqtype = "auto"

        if self.frame.options:
            cline.objscore = str(self.boxList[9].GetValue())
            cline.weight1 = str(self.boxList[13].GetValue())
            cline.weight2 = str(self.boxList[15].GetValue())
            cline.anchorspacing = int(self.boxList[17].GetValue())
            cline.center = float(self.boxList[19].GetValue())
            cline.hydro = int(self.boxList[21].GetValue())
            cline.hydrofactor = float(self.boxList[23].GetValue())
            cline.maxhours = float(self.boxList[25].GetValue())
            cline.maxiters = int(self.boxList[27].GetValue())
            cline.maxtrees = int(self.boxList[29].GetValue())
            cline.minbestcolscore = float(self.boxList[31].GetValue())
            cline.minsmoothscore = float(self.boxList[33].GetValue())
            cline.smoothscoreceil = float(self.boxList[35].GetValue())
            cline.smoothwindow = int(self.boxList[37].GetValue())
            cline.sueff = float(self.boxList[39].GetValue())

        return str(cline)
Пример #2
0
 def GetExec(self, optList, frame):
     # Respond to the "muscle" command.
     self.frame = frame
     plugin_exe = r"C:/Program Files (x86)/py27/Lib/site-packages/Muscle.exe"
     self.outfile=r".\plugins\muscle.txt"
     self.outtype="fasta"
     cline = MuscleCommandline(plugin_exe,out=self.outfile)
     if '1ProfileCheck' in self.frame.paramBoxes:
         if self.frame.paramBoxes['1ProfileCheck'].GetValue():
             cline.profile = True
             cline.in1 = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
             cline.in2 = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
         else:
             cline.input = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
     if '1DiagCheck' in self.frame.paramBoxes:
         if self.frame.paramBoxes['1DiagCheck'].GetValue():
             cline.diags=True
             if "DiagLenSpin" in self.frame.paramBoxes:
                 cline.diaglength=int(self.frame.paramBoxes["DiagLenSpin"])
             if "DiagMargSpin" in self.frame.paramBoxes:
                 cline.diaglength=int(self.frame.paramBoxes["DiagMargSpin"])
             if "DiagBreakSpin" in self.frame.paramBoxes:
                 cline.diaglength=int(self.frame.paramBoxes["DiagBreakSpin"])
         elif "GapPenSpin" in self.frame.paramBoxes:
             cline.gapopen=float(self.frame.paramBoxes["GapPenSpin"].GetValue())
         else:
             cline.input=r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
     if self.frame.abet=="AA":
         cline.seqtype="protein"
     elif self.frame.abet=="DNA" or self.frame.abet=="RNA":
         cline.seqtype="nucleo"
     else:
         cline.seqtype="auto"
     
     
     if self.frame.options:
         cline.objscore=str(self.boxList[9].GetValue())
         cline.weight1=str(self.boxList[13].GetValue())
         cline.weight2=str(self.boxList[15].GetValue())
         cline.anchorspacing=int(self.boxList[17].GetValue())
         cline.center=float(self.boxList[19].GetValue())
         cline.hydro=int(self.boxList[21].GetValue())
         cline.hydrofactor=float(self.boxList[23].GetValue())
         cline.maxhours=float(self.boxList[25].GetValue())
         cline.maxiters=int(self.boxList[27].GetValue())
         cline.maxtrees=int(self.boxList[29].GetValue())
         cline.minbestcolscore=float(self.boxList[31].GetValue())
         cline.minsmoothscore=float(self.boxList[33].GetValue())
         cline.smoothscoreceil=float(self.boxList[35].GetValue())
         cline.smoothwindow=int(self.boxList[37].GetValue())
         cline.sueff=float(self.boxList[39].GetValue())
     
     return str(cline)
Пример #3
0
def quickAlign(refseq, testseq, maxiters=None, diags=None, gapopen=None):

    #sanity check
    try:
        refseq = re.sub("-", "", refseq)
    except TypeError:
        #not a string, probably a SeqRecord
        try:
            refseq = str(refseq.seq)
            refseq = re.sub("-", "", refseq)
        except AttributeError:
            #give up
            sys.exit(
                "quickAlign() requires inputs to be either strings or SeqRecord objects"
            )

    try:
        testseq = re.sub("-", "", testseq)
    except TypeError:
        #not a string, probably a SeqRecord
        try:
            testseq = str(testseq.seq)
            testseq = re.sub("-", "", testseq)
        except AttributeError:
            #give up
            sys.exit(
                "quickAlign() requires inputs to be either strings or SeqRecord objects"
            )

    handle = StringIO()
    handle.write(">ref\n%s\n>test\n%s\n" % (refseq, testseq))
    data = handle.getvalue()

    muscle_cline = MuscleCommandline(cmd=muscle, quiet=True)
    if maxiters is not None: muscle_cline.maxiters = maxiters
    if diags is not None: muscle_cline.diags = diag
    if gapopen is not None: muscle_cline.gapopen = gapopen

    stdout, stderr = muscle_cline(stdin=data)

    aligned = dict()
    for p in SeqIO.parse(StringIO(stdout), "fasta"):
        aligned[p.id] = str(p.seq)
    return aligned
Пример #4
0
def quickAlign( refseq, testseq, maxiters=None, diags=None, gapopen=None ):
    
	#sanity check
	refseq	= re.sub( "-", "", str(refseq) )
	testseq = re.sub( "-", "", str(testseq) )

	handle = StringIO()
	handle.write( ">ref\n%s\n>test\n%s\n"%(refseq,testseq) )
	data = handle.getvalue()

	muscle_cline = MuscleCommandline(cmd=muscle, quiet=True)
	if maxiters is not None: muscle_cline.maxiters = maxiters
	if diags    is not None: muscle_cline.diags    = diag
	if gapopen  is not None: muscle_cline.gapopen  = gapopen

	stdout, stderr = muscle_cline(stdin=data)

	aligned = dict()
	for p in SeqIO.parse(StringIO(stdout), "fasta"):
		aligned[ p.id ] = str(p.seq)
	return aligned
def primer3cloning(ccdsfile, cdnafile):
    with open("internal_files/fastaoutput.txt", "w") as tempoutput:
        tempoutput.write("")

    for record in SeqIO.parse(ccdsfile, "fasta"):
        with open("internal_files/fastaoutput.txt", "a") as tempoutput:
            ccds_seq = record.seq
            tempoutput.write('>' + record.id + '\n' + str(ccds_seq.upper()) +
                             '\n')
        ccdsid = record.id

    for recordc in SeqIO.parse(cdnafile, "fasta"):
        with open("internal_files/fastaoutput.txt", "a") as tempoutput:
            cdna_seq = recordc.seq
            tempoutput.write('>' + recordc.id + '\n' + str(cdna_seq.upper()))
        cdnaid = recordc.id

    muscle_cline = MuscleCommandline(input="internal_files/fastaoutput.txt")
    stdout, stderr = muscle_cline()
    muscle_cline.gapopen = -10.0
    align = AlignIO.read(StringIO(stdout), "fasta")
    AlignIO.write([align], "internal_files/muscleoutput.txt", "fasta")

    for recordf in SeqIO.parse("internal_files/muscleoutput.txt", "fasta"):
        if recordf.id == ccdsid:
            ccds = list(str(recordf.seq))

        count = 0
        gene = 0
        for i in ccds:
            if i == "-" and gene == 0:
                count += 1
            elif i == "-":
                pass
            else:
                gene += 1
    bleh = count + gene
    print ccds[bleh - 3:bleh]
    print count, gene
Пример #6
0
def main():

    global inFile, lookup

    oldFiles = (
        glob.glob("%s/infile" % prj_tree.phylo)
        + glob.glob("%s/outtree" % prj_tree.phylo)
        + glob.glob("%s/outfile" % prj_tree.phylo)
    )
    if len(oldFiles) > 0:
        if force:
            for f in oldFiles:
                os.remove(f)
        else:
            sys.exit("Old files exist! Please use the -f flag to force overwrite.")

    if doAlign:

        # first create a working file to align and add the germline and natives
        shutil.copyfile(
            "%s/%s-collected.fa" % (prj_tree.nt, prj_name), "%s/%s_to_align.fa" % (prj_tree.phylo, prj_name)
        )
        handle = open("%s/%s_to_align.fa" % (prj_tree.phylo, prj_name), "a")
        handle.write(">%s\n%s\n" % (germ_seq.id, germ_seq.seq))
        for n in natives.values():
            handle.write(">%s\n%s\n" % (n.id, n.seq))
        handle.close()

        # now run muscle
        run_muscle = MuscleCommandline(
            input="%s/%s_to_align.fa" % (prj_tree.phylo, prj_name), out="%s/%s_aligned.afa" % (prj_tree.phylo, prj_name)
        )
        run_muscle.maxiters = 2
        run_muscle.diags = True
        run_muscle.gapopen = -5000.0  # code requires a float
        print run_muscle
        run_muscle()
        # thisVarHidesTheOutput = run_muscle()

        # change inFile variable so that remaining code is the same for both cases
        # It's probably really bad form to handle this in this way
        inFile = "%s/%s_aligned.afa" % (prj_tree.phylo, prj_name)

    # open the alignment to rename everything and find germline sequence
    # rename is to avoid possible errors with DNAML from sequence ids that are too long
    germ_pos = 1
    with open(inFile, "rU") as handle:
        if doAlign:
            aln = AlignIO.read(handle, "fasta")
        else:
            try:
                aln = AlignIO.read(handle, "phylip")
            except:
                sys.exit("Please make sure custom input is aligned and in PHYLIP format")

    lookup = []
    for seq in aln:
        lookup.append(seq.id)
        if re.search("(IG|VH|VK|VL|HV|KV|LV)", seq.id) is not None:
            germ_pos = len(lookup)
        seq.id = "%010d" % len(lookup)

    with open("%s/infile" % prj_tree.phylo, "w") as output:
        AlignIO.write(aln, output, "phylip")

    # now generate script for DNAML
    # J is "jumble" followed by random seed and number of times to repeat
    # O is outgroup root, followed by position of the germline in the alignment
    # 5 tells DNAML to do the ancestor inference
    # Y starts the run
    with open("%s/dnaml.in" % prj_tree.phylo, "w") as handle:
        seed = random.randint(0, 1e10) * 2 + 1  # seed must be odd
        handle.write("J\n%d\n3\nO\n%d\n5\nY\n" % (seed, germ_pos))

    # change to work directory so DNAML finds "infile" and puts the output where we expect
    os.chdir(prj_tree.phylo)
    with open("%s/dnaml.in" % prj_tree.phylo, "rU") as pipe:
        subprocess.call([DNAML], stdin=pipe)

    # revert names in tree
    with open("%s/outtree" % prj_tree.phylo, "rU") as intree:
        mytree = intree.read()
    fixedtree = re.sub("\d{10}", revertName, mytree)
    with open("%s/%s.tree" % (prj_tree.out, prj_name), "w") as outtree:
        outtree.write(fixedtree)

    # revert names in out file
    with open("%s/outfile" % prj_tree.phylo, "rU") as instuff:
        mystuff = instuff.read()
    fixedstuff = re.sub("\d{10}", revertName, mystuff)
    with open("%s/%s.dnaml.out" % (prj_tree.logs, prj_name), "w") as outstuff:
        outstuff.write(fixedstuff)

    # clean up
    os.remove("infile")
    os.remove("outfile")
    os.remove("outtree")
Пример #7
0
def main():

    oldFiles = glob.glob("%s/infile" % prj_tree.phylo) + glob.glob(
        "%s/%s_igphyml.tree" %
        (prj_tree.out, prj_name)) + glob.glob("%s/%s_igphyml_stats.txt" %
                                              (prj_tree.logs, prj_name))
    if len(oldFiles) > 0:
        if arguments['-f']:
            for f in oldFiles:
                os.remove(f)
        else:
            sys.exit(
                "Old files exist! Please use the -f flag to force overwrite.")

    if arguments['-v'] is not None:

        #do alignment

        #first create a working file to align and add the germline and natives
        shutil.copyfile(arguments['--seqs'],
                        "%s/%s_to_align.fa" % (prj_tree.phylo, prj_name))
        handle = open("%s/%s_to_align.fa" % (prj_tree.phylo, prj_name), "a")
        handle.write("\n>%s\n%s\n" % (germ_seq.id, germ_seq.seq))
        for n in natives.values():
            handle.write(">%s\n%s\n" % (n.id, n.seq))
        handle.close()

        #now run muscle
        run_muscle = MuscleCommandline(
            cmd=muscle,
            input="%s/%s_to_align.fa" % (prj_tree.phylo, prj_name),
            out="%s/%s_aligned.afa" % (prj_tree.phylo, prj_name))
        run_muscle.maxiters = 2
        run_muscle.diags = True
        run_muscle.gapopen = -5000.0  #code requires a float
        print(run_muscle)
        run_muscle()

        #this is probably bad form
        arguments['-i'] = "%s/%s_aligned.afa" % (prj_tree.phylo, prj_name)

    #open the alignment to rename everything and find germline sequence
    with open(arguments['-i'], "r") as handle:
        try:
            aln = AlignIO.read(handle, arguments['--format'])
        except:
            sys.exit("Couldn't read alignment: is %s the correct format?" %
                     arguments['--format'])

    align_len = aln.get_alignment_length()
    extra = align_len % 3
    if extra > 0:
        print("Trimming alignment to even codon length...", file=sys.stderr)
        aln = aln[:, 0:-extra]
        align_len -= extra

    #kill the fasta def line and any usearch/vsearch annotations to avoid formatting foul-ups
    germ_id = ""
    foundRoot = False
    gaps = defaultdict(list)
    for seq in aln:
        seq.id = re.sub("[;:].*", "", seq.id)
        seq.description = ""
        if re.search("(IG|VH|VK|VL|HV|KV|LV)", seq.id, re.I) is not None:
            germ_id = seq.id

        if arguments['--root'] is not None and seq.id == arguments['--root']:
            foundRoot = True

        for g in re.finditer("-+", str(seq.seq)):
            #save gap. value is a field to help me determine what's real in assignGaps
            gaps[seq.id.upper()].append({
                'start': g.start(),
                'end': g.end(),
                'value': 1
            })

    if arguments['--root'] is not None:
        germ_id = arguments['--root']
        if not foundRoot:
            sys.exit("Couldn't find specified root sequence %s in input file" %
                     arguments['--root'])
    elif germ_id == "":
        sys.exit(
            "Couldn't find a germline gene in the alignment, please use the --root option and try again."
        )

    with open("%s/infile" % prj_tree.phylo, "w") as output:
        AlignIO.write(aln, output, "fasta")

    #now call IgPhyML
    #fast initial tree
    opts = ["--threads", arguments['--threads']]
    if not arguments['--quick']:
        opts += ["-s", "SPR"]
    if arguments['--seed'] is not None:
        opts += ["--r_seed", arguments['--seed']]
    #set an environmental variable so that IgPhyML can find its libraries
    os.environ.update(
        {'IGPHYML_PATH': '%s/third-party/src/motifs' % SCRIPT_FOLDER})
    s = subprocess.Popen([
        igphyml, "-i",
        "%s/infile" % prj_tree.phylo, "-m", "GY", "-w", "MO", "-t", "e",
        "--run_id", "gy94"
    ] + opts,
                         universal_newlines=True,
                         stderr=subprocess.PIPE)
    o, e = s.communicate()

    if re.search("error while loading shared libraries", str(e)):
        #Some libraries needed for optimized execution are missing
        #  Try again with a version compiled without optimizations
        s = subprocess.Popen(
            [
                igphyml_slow, "-i",
                "%s/infile" % prj_tree.phylo, "-m", "GY", "-w", "MO", "-t",
                "e", "--run_id", "gy94"
            ] + opts[2:],  #no threading option available
            universal_newlines=True,
            stderr=subprocess.PIPE)
        o, e = s.communicate()

    if e != "" or s.returncode != 0:
        sys.exit("Error running '%s':\n%sExit code %d" %
                 (" ".join(s.args), e, s.returncode))

    #Refine tree with AID-specific hotpsot motifs
    opts = ["--threads", arguments['--threads']]
    if arguments['--quick']:
        opts += ['-o', 'lr']
    else:
        opts += ['-o', 'tlr']
    if arguments['--seed'] is not None:
        opts += ["--r_seed", arguments['--seed']]

    s = subprocess.Popen([
        igphyml, "-i",
        "%s/infile" % prj_tree.phylo, "-m", "HLP17", "--root", germ_id, "-u",
        "%s/infile_igphyml_tree.txt_gy94" % prj_tree.phylo, "--motifs", "FCH",
        "--run_id", "hlp17", "--ambigfile",
        "%s/ambigfile.txt" % prj_tree.phylo
    ] + opts,
                         universal_newlines=True,
                         stderr=subprocess.PIPE)
    o, e = s.communicate()

    if re.search("error while loading shared libraries", str(e)):
        s = subprocess.Popen(
            [
                igphyml_slow, "-i",
                "%s/infile" % prj_tree.phylo, "-m", "HLP17", "--root", germ_id,
                "-u",
                "%s/infile_igphyml_tree.txt_gy94" % prj_tree.phylo, "--motifs",
                "FCH", "--run_id", "hlp17", "--ambigfile",
                "%s/ambigfile.txt" % prj_tree.phylo
            ] + opts[2:],  #no threading option available
            universal_newlines=True,
            stderr=subprocess.PIPE)
        o, e = s.communicate()

    if e != "" or s.returncode != 0:
        sys.exit("Error running '%s':\n%sExit code %d" %
                 (" ".join(s.args), e, s.returncode))

    if not arguments['--noAnc']:
        #now need to set up a config file for ancestor reconstruction
        with open("%s/ar.config" % prj_tree.phylo, "w") as handle:
            handle.write("length\t%d\n" % (align_len / 3))
            handle.write("rooted\t1\noutdir\t%s\n" % prj_tree.phylo)
            handle.write("seqfile\t%s/infile\n" % prj_tree.phylo)
            handle.write("rootid\t%s\n" % germ_id)
            handle.write("igphyml\t%s/%s\n" % (SCRIPT_FOLDER, "third-party"))
            handle.write("stats\t%s/infile_igphyml_stats.txt_hlp17\n" %
                         prj_tree.phylo)
            handle.write("tree\t%s/infile_igphyml_tree.txt_hlp17\n" %
                         prj_tree.phylo)
            handle.write("ambigfile\t%s/ambigfile.txt\n" % prj_tree.phylo)
            handle.write("stem\t%s\n" % prj_name)

        s = subprocess.Popen([
            "perl", "-I",
            "%s/third-party" % SCRIPT_FOLDER, reconstruct,
            "%s/ar.config" % prj_tree.phylo
        ],
                             universal_newlines=True,
                             stderr=subprocess.PIPE)
        o, e = s.communicate()
        if e != "" or s.returncode != 0:
            sys.exit("Error running '%s':\n%sExit code %d" %
                     (" ".join(s.args), e, s.returncode))

        if len(gaps) > 0:
            #fix ancestor inference by putting gaps back in
            #start by reading in inferred sequences and reconstructing the tree
            tree = dict()
            stack = list()
            seqDict = OrderedDict()
            with open("%s/%s.MLcodons.fa" % (prj_tree.phylo, prj_name),
                      "r") as infer:
                for seq in SeqIO.parse(infer, "fasta"):
                    name = seq.id.split(";")[1]
                    seqDict[name] = seq
                    if "," in name:
                        tree[name] = {'id': name, 'children': stack[-2:]}
                        tree[stack.pop()]['parent'] = name
                        tree[stack.pop()]['parent'] = name
                        stack.append(name)
                    else:
                        tree[name] = {'id': name, 'children': []}
                        stack.append(name)

            #now iterate down tree to propogate gaps
            assignGaps(stack[0], tree, gaps)

            #do output
            with open("%s/%s_inferredAncestors.fa" % (prj_tree.nt, prj_name),
                      "w") as handle:
                SeqIO.write(getFinalSeqs(seqDict, gaps), handle, "fasta")
            with open("%s/%s_inferredAncestors.fa" % (prj_tree.aa, prj_name),
                      "w") as handle:
                SeqIO.write(getFinalSeqs(seqDict, gaps, trans=True), handle,
                            "fasta")

        else:
            os.rename("%s/%s.MLcodons.fa" % (prj_tree.phylo, prj_name),
                      "%s/%s_inferredAncestors.fa" % (prj_tree.nt, prj_name))
            os.rename("%s/%s.MLaas.fa" % (prj_tree.phylo, prj_name),
                      "%s/%s_inferredAncestors.fa" % (prj_tree.aa, prj_name))

    #move non-seqeunce outputs to logical places
    os.rename("%s/infile_igphyml_stats.txt_hlp17" % prj_tree.phylo,
              "%s/%s_igphyml_stats.txt" % (prj_tree.logs, prj_name))
    os.rename("%s/infile_igphyml_tree.txt_hlp17" % prj_tree.phylo,
              "%s/%s_igphyml.tree" % (prj_tree.out, prj_name))
Пример #8
0
def main():

    global inFile, lookup, workDir, outTreeFile, outFile, seqFile

    oldFiles = glob.glob("%s/infile"%workDir) + glob.glob("%s/outtree"%workDir) + glob.glob("%s/outfile"%workDir)
    if len(oldFiles) > 0:
        if force:
            for f in oldFiles:
                os.remove(f)
        else:
            sys.exit("Old files exist! Please use the -f flag to force overwrite.")
        

    if doAlign:

        #first create a working file to align and add the germline and natives
        shutil.copyfile(seqFile, "%s/%s_to_align.fa"%(workDir, prj_name))
        handle = open( "%s/%s_to_align.fa"%(workDir, prj_name), "a" )
        handle.write( ">%s\n%s\n" % (germ_seq.id, germ_seq.seq) )
        for n in natives.values():
            handle.write( ">%s\n%s\n" % (n.id, n.seq) )
        handle.close()

        #now run muscle
        run_muscle            = MuscleCommandline( input="%s/%s_to_align.fa" % (workDir, prj_name), out="%s/%s_aligned.afa" % (prj_tree.phylo, prj_name) )
        run_muscle.maxiters   = 2
        run_muscle.diags      = True
        run_muscle.gapopen    = -5000.0 #code requires a float
        print run_muscle
        run_muscle()

        inFile = "%s/%s_aligned.afa" % (workDir, prj_name)


    #open the alignment to rename everything and find germline sequence
    #rename is to avoid possible errors with DNAML from sequence ids that are too long
    germ_pos = 1
    with open(inFile, "rU") as handle:
        if doAlign:
            aln = AlignIO.read(handle, "fasta")
        else: 
            try:
                aln = AlignIO.read(handle, "phylip-relaxed")
            except:
                sys.exit("Please make sure custom input is aligned and in PHYLIP format...")

    lookup = []
    for seq in aln:
        lookup.append( seq.id )
        if re.search("(IG|VH|VK|VL|HV|KV|LV)", seq.id) is not None:
            germ_pos = len( lookup )
        seq.id = "%010d" % len( lookup )


    with open("%s/infile" % workDir, "w") as output:
        AlignIO.write(aln, output, "phylip")


    #now generate script for DNAML
    # J is "jumble" followed by random seed and number of times to repeat
    # O is outgroup root, followed by position of the germline in the alignment
    # 5 tells DNAML to do the ancestor inference
    # Y starts the run
    with open("%s/dnaml.in"%workDir, "w") as handle:
        seed = random.randint(0,1e10) * 2 + 1 #seed must be odd
        handle.write("J\n%d\n5\nG\nO\n%d\n5\nY\n" % (seed, germ_pos))


    # change to work directory so DNAML finds "infile" and puts the output where we expect
    origWD = os.getcwd()
    os.chdir(workDir)
    with open("dnaml.in", "rU") as pipe:
        subprocess.call([dnaml], stdin=pipe)
    os.chdir(origWD)

    #revert names in tree
    with open("%s/outtree"%workDir, "rU") as intree:
        mytree = intree.read()
    fixedtree = re.sub("\d{10}", revertName, mytree)
    with open(outTreeFile, "w") as outtree:
        outtree.write(fixedtree)

    #revert names in out file
    with open("%s/outfile"%workDir, "rU") as instuff:
        mystuff = instuff.read()
    fixedstuff = re.sub("\d{10}", revertName, mystuff)
    with open(outFile, "w") as outstuff:
        outstuff.write(fixedstuff)
        
	
    print "\nOutput in %s and %s\n" % (outTreeFile, outFile)