Пример #1
0
def concat(mypath, same_taxa):
    ''' Combine multiple nexus data matrices in one partitioned file.
        By default this will only work if the same taxa are present in each file
        use  same_taxa=False if you are not concerned by this
        From: http://biopython.org/wiki/Concatenate_nexus
        small change: added onlyfiles block to remove hidden files
    '''

    onlyfiles = []
    for item in os.listdir(mypath):
        if not item.startswith('.') and os.path.isfile(
                os.path.join(mypath, item)):
            onlyfiles.append(item)

    nexi = []
    for nex in onlyfiles:
        nex_open = open(nex, 'r')
        nex_save = Nexus.Nexus(nex_open)
        nexi.append((nex, nex_save))

    if same_taxa:
        if not check_taxa(nexi):
            return Nexus.combine(nexi)
    else:
        return Nexus.combine(nexi)
Пример #2
0
def concatNexusAlignments(processes):
    # take the list of fasta alingments and convert each to a nexus file and
    # concat all the nexus files into 1 alingment
    pool = ThreadPool(processes)
    already_done = [
        x.split('.')[0] for x in os.listdir('{}/nexus'.format(base_dir))
    ]
    fastas = [
        '{}/fasta/{}'.format(base_dir, file)
        for file in os.listdir('{}/fasta'.format(base_dir))
        if file.split('.')[0] not in already_done
    ]
    list(
        tqdm(pool.imap(convertFastaToNexus, fastas),
             total=len(fastas),
             desc='Fastas to Nexus...'))
    combined_nexus = '{}/WGS.nex'.format(base_dir)
    if os.path.isfile(combined_nexus):
        return combined_nexus
    nexus = [
        '{}/nexus/{}'.format(base_dir, file)
        for file in os.listdir('{}/nexus'.format(base_dir))
    ]
    nexus = [(filename, Nexus.Nexus(filename)) for filename in nexus]
    combined = Nexus.combine(nexus)
    combined.write_nexus_data(filename=open(combined_nexus, 'w'))
    return combined_nexus
def concat_alignment(files, output):
    nexi = [(fname.replace(alignments_dir,
                           '').replace(".",
                                       "").replace("-",
                                                   ""), Nexus.Nexus(fname))
            for fname in files]
    combined = Nexus.combine(nexi)
    combined.write_nexus_data(filename=open(output, 'w'))
Пример #4
0
def export_nexus(aln, charset_name):
    nexus_list_names = split_nexus_by_charsets(aln, charset_name)

    nexus_tuples = []
    for name in nexus_list_names:
        nexus_tuples.append((name, Nexus.Nexus(name)))

    concat = Nexus.combine(nexus_tuples)
    concat.write_nexus_data('%s_concat.nex' % (aln.rstrip(".nex")))
def main():
    args = get_args()
    print "Reading files..."
    nexus_files = glob.glob(os.path.join(args.input, '*.nex*'))
    data = [(fname, Nexus.Nexus(fname)) for fname in nexus_files]
    print "Concatenating files..."
    concatenated = Nexus.combine(data)
    print "Writing to phylip..."
    concatenated.export_phylip(args.output)
Пример #6
0
def concat_nexus_alignment(path: str, output_name: str):
    wildcards = path + "/*.nex"
    outname = path + "/" + output_name
    fnames = glob(wildcards)
    nex_list = [(nex, Nexus.Nexus(nex)) for nex in fnames]

    concat = Nexus.combine(nex_list)
    concat.write_nexus_data(filename=open(outname, "w"))
    print(f"DONE! File is written as {outname}")
def main():
    args = get_args()
    print "Reading files..."
    nexus_files = glob.glob(os.path.join(args.input, '*.nex*'))
    data = [(fname, Nexus.Nexus(fname)) for fname in nexus_files]
    print "Concatenating files..."
    concatenated = Nexus.combine(data)
    #print "Writing temp nexus..."
    #fd, temp = tempfile.mkstemp(suffix='.nexus')
    #concatenated.write_nexus_data(filename=os.fdopen(fd, 'w'))
    print "Writing to phylip..."
    concatenated.export_phylip(args.output)
Пример #8
0
def concatNexAlns():
    """Combine multiple nexus data matrices in one partitioned file.
    By default this will only work if the same taxa are present in each file
    use same_taxa=False if you are not concerned by this """
    nexdir = '{}/nexus/'.format(base_dir)
    filelist = [x for x in os.listdir(nexdir) if x.endswith('.nex')]
    nexi = [(os.path.join(nexdir,
                          fname), Nexus.Nexus(os.path.join(nexdir, fname)))
            for fname in filelist]
    coutname = '{}/concat_aln_species_tree.nex'.format(base_dir)
    combined = Nexus.combine(nexi)
    combined.write_nexus_data(filename=open(coutname, 'w'))
    return coutname
Пример #9
0
def concattophylip(directory, outdir):
    print("Making concat and charset files.")
    os.makedirs(outdir + '/phylip')
    os.chdir(directory)
    file_list = glob.glob('*.nex*')
    nexi = [(fname, Nexus.Nexus(fname)) for fname in file_list]
    combined = Nexus.combine(nexi)
    sets = combined.append_sets()
    concat_file = outdir + '/phylip/concatdata.phylip'
    combined.export_phylip(concat_file)
    charset_file = outdir + '/phylip/charsets.charsets'
    with open(charset_file, 'w') as outf:
        outf.write(sets)
    outf.close()
def fully_partition(metadata, aligns):
    to_combine = []
    start = 1
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(
                open(os.path.join(aligns, "{0}.nex".format(locus))))
            end = start + nex.nchar - 1
            metadata[model][locus] = (start, end)
            to_combine.append((locus, nex))
            start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, metadata
def concatNexAlns(
        nexDir,
        outname,
        same_taxa=True):  #from https://biopython.org/wiki/Concatenate_nexus
    """Combine multiple nexus data matrices in one partitioned file.
    By default this will only work if the same taxa are present in each file
    use same_taxa=False if you are not concerned by this """
    filelist = [x for x in os.listdir(nexDir) if x.endswith('.nex')]
    nexi = [(os.path.join(nexDir,
                          fname), Nexus.Nexus(os.path.join(nexDir, fname)))
            for fname in filelist]
    coutname = 'concat_stree_aln_{}.nex'.format(outname)
    combined = Nexus.combine(nexi)
    combined.write_nexus_data(filename=open(coutname, 'w'))
    return coutname
Пример #12
0
def NexusIterator(handle, seq_count=None):
    """Returns SeqRecord objects from a Nexus file.

    Thus uses the Bio.Nexus module to do the hard work.

    You are expected to call this function via Bio.SeqIO or Bio.AlignIO
    (and not use it directly).

    NOTE - We only expect ONE alignment matrix per Nexus file,
    meaning this iterator will only yield one Alignment."""
    n = Nexus.Nexus(handle)
    if not n.matrix:
        #No alignment found
        raise StopIteration
    alignment = Alignment(n.alphabet)

    #Bio.Nexus deals with duplicated names by adding a '.copy' suffix.
    #The original names and the modified names are kept in these two lists:
    assert len(n.unaltered_taxlabels) == len(n.taxlabels)

    if seq_count and seq_count != len(n.unaltered_taxlabels):
        raise ValueError("Found %i sequences, but seq_count=%i" \
               % (len(n.unaltered_taxlabels), seq_count))

    for old_name, new_name in zip(n.unaltered_taxlabels, n.taxlabels):
        assert new_name.startswith(old_name)
        seq = n.matrix[new_name]  #already a Seq object with the alphabet set
        #ToDo - Can we extract any annotation too?
        #ToDo - Avoid abusing the private _records list
        alignment._records.append(
            SeqRecord(seq, id=new_name, name=old_name, description=""))
    #All done
    yield alignment
def main():
    args = get_args()
    #pdb.set_trace()
    # get filenames in directory and convert to array
    files = numpy.array(glob.glob(os.path.join(args.nexus, '*.nex*')))
    # make sure we have enough
    assert len(files) >= args.sample_size, "Sample size must be < number(files)"
    print "Running"
    for i in xrange(args.samples):
        sys.stdout.write('.')
        sys.stdout.flush()
        # get list of random numbers
        sample = numpy.random.random_integers(0, len(files) - 1, args.sample_size)
        # reindex filenames by random selections
        random_files = sorted(files[sample].tolist())
        # concatenate and output
        files_to_combine = [(f, Nexus.Nexus(f)) for f in random_files]
        combined = Nexus.combine(files_to_combine)
        align_name = "random-sample-{}-{}-loci.nex".format(i, args.sample_size)
        # open metadata file
        meta_name = 'META-random-sample-{}-{}-loci.txt'.format(i, args.sample_size)
        meta = open(
                os.path.join(args.output, meta_name), 'w'
            )
        meta.write('{}'.format('\n'.join(random_files)))
        meta.close()
        combined.write_nexus_data(filename=open(
                os.path.join(args.output, align_name), 'w')
            )
    sys.stdout.write("Done")
Пример #14
0
def NexusIterator(handle, seq_count=None):
    """Returns SeqRecord objects from a Nexus file.

    Thus uses the Bio.Nexus module to do the hard work.

    You are expected to call this function via Bio.SeqIO or Bio.AlignIO
    (and not use it directly).

    NOTE - We only expect ONE alignment matrix per Nexus file,
    meaning this iterator will only yield one MultipleSeqAlignment.
    """
    n = Nexus.Nexus(handle)
    if not n.matrix:
        # No alignment found
        raise StopIteration

    # Bio.Nexus deals with duplicated names by adding a '.copy' suffix.
    # The original names and the modified names are kept in these two lists:
    assert len(n.unaltered_taxlabels) == len(n.taxlabels)

    if seq_count and seq_count != len(n.unaltered_taxlabels):
        raise ValueError("Found %i sequences, but seq_count=%i" %
                         (len(n.unaltered_taxlabels), seq_count))

    # TODO - Can we extract any annotation too?
    records = (
        SeqRecord(n.matrix[new_name],
                  id=new_name,
                  name=old_name,
                  description="")
        for old_name, new_name in zip(n.unaltered_taxlabels, n.taxlabels))
    # All done
    yield MultipleSeqAlignment(records, n.alphabet)
Пример #15
0
def parse(handle):
    """Parse the trees in a Nexus file.

    Uses the old Nexus.Trees parser to extract the trees, converts them back to
    plain Newick trees, and feeds those strings through the new Newick parser.
    This way we don't have to modify the Nexus module yet. (Perhaps we'll
    eventually change Nexus to use the new NewickIO parser directly.)
    """
    nex = Nexus.Nexus(handle)

    # NB: Once Nexus.Trees is modified to use Tree.Newick objects, do this:
    # return iter(nex.trees)
    # Until then, convert the Nexus.Trees.Tree object hierarchy:
    def node2clade(nxtree, node):
        subclades = [node2clade(nxtree, nxtree.node(n)) for n in node.succ]
        return Newick.Clade(branch_length=node.data.branchlength,
                            name=node.data.taxon,
                            clades=subclades,
                            confidence=node.data.support,
                            comment=node.data.comment)

    for nxtree in nex.trees:
        newroot = node2clade(nxtree, nxtree.node(nxtree.root))
        yield Newick.Tree(root=newroot,
                          rooted=nxtree.rooted,
                          name=nxtree.name,
                          weight=nxtree.weight)
def main():
    args = get_args()
    # setup logging
    log, my_name = setup_logging(args)
    # read alignments
    log.info("Reading input alignments in NEXUS format")
    nexus_files = glob.glob(os.path.join(args.alignments, '*.nex*'))
    data = [(os.path.basename(fname), Nexus.Nexus(fname)) for fname in nexus_files]
    log.info("Concatenating files")
    concatenated = Nexus.combine(data)
    if not args.nexus:
        concat_file = os.path.join(args.output, os.path.basename(args.alignments) + ".phylip")
        if args.charsets:
            sets = concatenated.append_sets()
            charset_file = os.path.join(args.output, os.path.basename(args.alignments) + ".charsets")
            log.info("Writing charsets to {}".format(
                charset_file
            ))
            with open(charset_file, 'w') as outf:
                outf.write(sets)
        log.info("Writing concatenated PHYLIP alignment to {}".format(concat_file))
        concatenated.export_phylip(concat_file)
    else:
        concat_file = os.path.join(args.output, os.path.basename(args.alignments) + ".nexus")
        if args.charsets:
            log.info("Writing concatenated alignment to NEXUS format (with charsets)")
            concatenated.write_nexus_data(concat_file)
        else:
            log.info("Writing concatenated alignment to NEXUS format (without charsets)")
            concatenated.write_nexus_data(concat_file, append_sets=False)
    # end
    text = " Completed {} ".format(my_name)
    log.info(text.center(65, "="))
Пример #17
0
def model_partition(metadata, aligns):
    to_combine = []
    start = 1
    end = 0
    new_metadata = OrderedDict()
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(
                open(os.path.join(aligns, "{0}.nex".format(locus))))
            end += nex.nchar
            to_combine.append((locus, nex))
        new_metadata[model] = (start, end)
        start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, new_metadata
Пример #18
0
 def test_taxa_and_characters_with_many_codings_two_without_state(self):
     """Taxa and chr blocks, over 9 codings, 2 character without states."""
     nexus6 = Nexus.Nexus()
     # TODO: Implement continuous datatype:
     # Bio.Nexus.Nexus.NexusError: Unsupported datatype: continuous
     self.assertRaises(Nexus.NexusError, nexus6.read,
                       "Nexus/vSysLab_Oreiscelio_discrete+continuous.nex")
Пример #19
0
def main(nexusfile, reftree, burnin=10):

    # Using the Nexus module
    data = Nexus.Nexus(nexusfile)
    taxlabels = data.structured[1].commandlines[1].options.split()
    nb2taxlabels = data.translate
    trees = data.trees
    # Using the Phylo module
    trees = list(Phylo.parse(nexusfile, 'nexus'))

    N0 = len(trees)

    trees = trees[N0 * burnin / 100 + 1:]
    N = N0 * (100 - burnin) / 100

    topologies = Counter()
    topo_groups = defaultdict(list)

    for tree in trees:
        # Ensure all equivalent topologies will be represented the same way
        biophylo_leaf_sort(tree, tree.root)
        topo = biophylo_topology(tree, tree.root)
        topologies[topo] += 1
        topo_groups.append(tree)

    MAP_topology, MAP_count = topologies.most_common(1)[0]
    MAP_proba = float(MAP_count) / sum(topologies.values())

    clades = represent_clades(reftree, BioPhylo.get_children,
                              BioPhylo.get_label)
Пример #20
0
    def write_alignment(self, alignment, interleave=None):
        """Write an alignment to file.

        Creates an empty Nexus object, adds the sequences
        and then gets Nexus to prepare the output.
        Default interleave behaviour: Interleave if columns > 1000
        --> Override with interleave=[True/False]
        """
        if len(alignment) == 0:
            raise ValueError("Must have at least one sequence")
        columns = alignment.get_alignment_length()
        if columns == 0:
            raise ValueError("Non-empty sequences are required")
        minimal_record = (
            "#NEXUS\nbegin data; dimensions ntax=0 nchar=0; format datatype=%s; end;"
            % self._classify_alphabet_for_nexus(alignment._alphabet))
        n = Nexus.Nexus(minimal_record)
        n.alphabet = alignment._alphabet
        for record in alignment:
            n.add_sequence(record.id, str(record.seq))

        # Note: MrBayes may choke on large alignments if not interleaved
        if interleave is None:
            interleave = columns > 1000
        n.write_nexus_data(self.handle, interleave=interleave)
Пример #21
0
def combine(arg):
    file_format = get_format(arg)
    if file_format == 'fasta':
        arg = convert(arg)
    name_data = [(clean_name(name), Nexus.Nexus(name)) for name in arg.input]
    combined = Nexus.combine(name_data)
    combined.write_nexus_data(filename=arg.output)
Пример #22
0
    def write_alignment(self, alignment, interleave=None):
        """Write an alignment to file.

        Creates an empty Nexus object, adds the sequences
        and then gets Nexus to prepare the output.
        Default interleave behaviour: Interleave if columns > 1000
        --> Override with interleave=[True/False]
        """
        if len(alignment) == 0:
            raise ValueError("Must have at least one sequence")
        columns = alignment.get_alignment_length()
        if columns == 0:
            raise ValueError("Non-empty sequences are required")
        datatype = self._classify_mol_type_for_nexus(alignment)
        minimal_record = (
            "#NEXUS\nbegin data; dimensions ntax=0 nchar=0; format datatype=%s; end;"
            % datatype)
        n = Nexus.Nexus(minimal_record)
        for record in alignment:
            # Sanity test sequences (should this be even stricter?)
            if datatype == "dna" and "U" in record.seq:
                raise ValueError(f"{record.id} contains U, but DNA alignment")
            elif datatype == "rna" and "T" in record.seq:
                raise ValueError(f"{record.id} contains T, but RNA alignment")
            n.add_sequence(record.id, str(record.seq))

        # Note: MrBayes may choke on large alignments if not interleaved
        if interleave is None:
            interleave = columns > 1000
        n.write_nexus_data(self.handle, interleave=interleave)
Пример #23
0
def process_dataset_metrics(dataset_path, metrics, minimum_window_size,
                            outfilename):
    '''
    Input: 
        dataset_path: path to a nexus alignment with UCE charsets
        metrics: a list of 'gc', 'entropy' or 'multi'
        outfilename: name for the csv file 
    Output: 
        csv files written to disk
    '''

    print("Sitewise metrics analysis")

    dataset_name = os.path.basename(dataset_path).rstrip(".nex")

    outfile = open(outfilename, 'w')
    outfile.write(
        "name,uce_site,aln_site,window_start,window_stop,type,value,plot_mtx\n"
    )
    outfile.close()

    # write the start blocks of the partitionfinder files
    for m in metrics:
        pfinder_config_file = open(
            '%s_%s_partition_finder.cfg' % (dataset_name, m), 'w')
        pfinder_config_file.write(p_finder_start_block(dataset_name))
        pfinder_config_file.close()

    dat = Nexus.Nexus()
    dat.read(dataset_path)
    aln = AlignIO.read(open(dataset_path), "nexus")

    for name in tqdm(dat.charsets):
        sites = dat.charsets[name]
        start = min(sites)
        stop = max(sites) + 1
        # slice the alignment to get the UCE
        uce_aln = aln[:, start:stop]

        best_windows, metric_array = process_uce(uce_aln, metrics,
                                                 minimum_window_size)

        for i, best_window in enumerate(best_windows):
            pfinder_config_file = open(
                '%s_%s_partition_finder.cfg' % (dataset_name, metrics[i]), 'a')
            pfinder_config_file.write(
                blocks_pfinder_config(best_window, name, start, stop, uce_aln))
            break

        write_csvs(best_windows, metric_array, sites, name, outfilename)

    # write the end blocks of the partitionfinder files
    for m in metrics:
        pfinder_config_file = open(
            '%s_%s_partition_finder.cfg' % (dataset_name, m), 'a')
        pfinder_config_file.write(p_finder_end_block(dataset_name))
        pfinder_config_file.close()
Пример #24
0
 def test_WriteToFileName(self):
     """Test writing to a given filename."""
     filename = "Nexus/test_temp.nex"
     if os.path.isfile(filename):
         os.remove(filename)
     n = Nexus.Nexus(self.handle)
     n.write_nexus_data(filename)
     self.assertTrue(os.path.isfile(filename))
     os.remove(filename)
def main():
    args = get_args()
    print "Reading files..."
    nexus_files = glob.glob(os.path.join(args.input, '*.nex*'))
    data = [(fname, Nexus.Nexus(fname)) for fname in nexus_files]
    print "Concatenating files..."
    concatenated = Nexus.combine(data)
    print "Writing to phylip..."
    concatenated.export_phylip(args.output)
Пример #26
0
def alignment_slicer(input, informat, outformat, SNPs, slide):
	alignment =  AlignIO.read(input, informat, alphabet = generic_dna)
	alignment_seq_count = len(alignment)
	first_seq = (alignment[0].seq)
	length_alignment = len(first_seq)
	chars_to_ignore = ['N']
	
	start = 0
	end = start + args.SNPs_in_window
	while end <= length_alignment:
		with open(input+'_site'+str(start)+'to'+str(end)+'.'+outformat, 'w') as output_handle:
			
	# 		print 'start:', start
	# 		print 'end:', end
			alignment_iteration = MultipleSeqAlignment(alignment[:, start:end], alphabet=generic_dna)
			if outformat.lower() == 'nexus':
				n_alignments = []
				alignment_iteration = alignment_iteration.format('nexus')
				n_alignments.append(('site'+str(start)+'to'+str(end),Nexus.Nexus(alignment_iteration)))
				combined = Nexus.combine(n_alignments)
				combined.write_nexus_data(output_handle)
			else:
				AlignIO.write(alignment_iteration, output_handle, outformat)
	# 		print alignment_iteration
			start += args.slide
			end += args.slide
	else:
		with open(input+'_site'+str(start)+'to'+str(length_alignment)+'.'+outformat, 'w') as output_handle:
			n_alignments = []
	# 		print 'now in else loop\n'
	# 		print 'start:', start
	# 		print 'end:', length_alignment
			alignment_iteration = MultipleSeqAlignment(alignment[:, start:length_alignment], alphabet=generic_dna)
			if outformat.lower() == 'nexus':
				n_alignments = []
				alignment_iteration = alignment_iteration.format('nexus')
				n_alignments.append(('site'+str(start)+'to'+str(end),Nexus.Nexus(alignment_iteration)))
				combined = Nexus.combine(n_alignments)
				combined.write_nexus_data(output_handle)
			else:
				AlignIO.write(alignment_iteration, output_handle, outformat)
	# 		print alignment_iteration
		print "\ndone\n"
Пример #27
0
def write_nexus_non_interleaved(alignment, fh_out):
    ntax = len(alignment)
    nchar = alignment.get_alignment_length()
    minimal_record = "#NEXUS\nbegin data; dimensions ntax=0 nchar=0; " \
                     + "format datatype=dna missing=N; end;"
    n = Nexus.Nexus(minimal_record)
    n.alphabet = alignment._alphabet
    for record in alignment:
        n.add_sequence(record.id, str(record.seq))
    n.write_nexus_data(fh_out, interleave=False)
def model_partition(metadata, aligns):
    to_combine = []
    start = 1
    end = 0
    new_metadata = OrderedDict()
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(
                open(os.path.join(aligns, "{0}.nex".format(locus))))
            #s = sum([1 if 'copy' in n else 0 for n in nex.get_original_taxon_order()])
            #if s > 0:
            #    pdb.set_trace()
            end += nex.nchar
            to_combine.append((locus, nex))
        new_metadata[model] = (start, end)
        start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, new_metadata
def check_alignment(alignment_file):
    # do lots of checks on an alignment
    aln = Nexus.Nexus()
    try:
        aln.read(alignment_file)
    except Exception as e:
        logging.error("Couldn't read nexus file, please check and try again.")
        logging.error("Here's the error from the BioPython Nexus.Nexus module")
        logging.error(e)
        raise ValueError

    # Check that there are just two charpartitions: 'loci' and 'genomes'
    logging.info("        checking correct charpartitions exist")
    if aln.charpartitions.keys() != ['loci', 'genomes']:
        logging.error("There should be exactly two CHARPARTITIONS: 'loci' and 'genomes'. Check and try again.")    
        raise ValueError

    # Check for an 'outgroup' taxset
    logging.info("        checking outgroup taxset exists")
    if aln.taxsets.keys() != ['outgroups']:
        logging.error("There should be exactly one TAXSET: 'outgroups'. Check and try again.")    
        raise ValueError

    # Check that no sites are duplicated in either charpartition
    logging.info("        checking for duplicates sites in charpartitions")
    all_sites = set(range(aln.nchar))

    loci_sites = [x[1] for x in aln.charpartitions['loci'].items()]
    loci_sites = list(itertools.chain.from_iterable(loci_sites))

    if len(loci_sites) > len(all_sites):
        logging.error("The loci charpartition has %d more site(s) than the number of sites in the alignment" %(len(loci_sites) - len(all_sites)))    
        raise ValueError

    geno_sites = [x[1] for x in aln.charpartitions['genomes'].items()]
    geno_sites = list(itertools.chain.from_iterable(geno_sites))

    if len(geno_sites) > len(all_sites):
        logging.error("The genomes charpartition has %d more site(s) than the number of sites in the alignment" %(len(geno_sites) - len(all_sites)))    
        raise ValueError


    # Check that all sites are covered by 'loci' charpartition
    logging.info("        checking that all sites are covered by charpartitions")
    if len(set(loci_sites)) < len(all_sites):
        logging.error("The loci charpartition does not cover the following sites, please fix: %s" %(all_sites.difference(set(loci_sites))))    
        raise ValueError


    # Check that all sites are covered by 'genomes' charpartition
    if len(set(geno_sites)) < len(all_sites):
        logging.error("The genomes charpartition does not cover the following sites, please fix: %s" %(all_sites.difference(set(geno_sites))))    
        raise ValueError

    return(aln)
Пример #30
0
 def parse_nexus_file(self, path_to_nex):
     ''' This function parses a NEXUS file. '''
     from Bio.Nexus import Nexus
     try:
         aln = Nexus.Nexus()
         aln.read(path_to_nex)
         charsets = aln.charsets
         matrix = aln.matrix
     except:
         raise ME.MyException('Parsing of .nex-file unsuccessful.')
     return (charsets, matrix)
Пример #31
0
    def test_TreeTest1(self):
        """Test Tree module."""
        n = Nexus.Nexus(self.handle)
        t3 = n.trees[2]
        t2 = n.trees[2]
        t3.root_with_outgroup(['t1', 't5'])
        self.assertEqual(
            str(t3),
            "tree tree1 = (((((('one should be punished, for (that)!','isn''that [a] strange name?'),'t2 the name'),t8,t9),t6),t7),(t5,t1));"
        )
        self.assertEqual(t3.is_monophyletic(['t8', 't9', 't6', 't7']), -1)
        self.assertEqual(t3.is_monophyletic(['t1', 't5']), 13)
        t3.split(parent_id=t3.search_taxon('t9'))
        stdout = sys.stdout
        try:
            sys.stdout = cStringIO.StringIO()
            t3.display()
            if sys.version_info[0] == 3:
                output = sys.stdout.getvalue()
            else:
                sys.stdout.reset()
                output = sys.stdout.read()
        finally:
            sys.stdout = stdout
        expected = """\
  #                            taxon            prev            succ    brlen blen (sum)  support              comment
  1    'isn''that [a] strange name?'               2              []   100.00     119.84    10.00                    -
  2                                -               4          [3, 1]     0.40      19.84     0.30                    -
  3 'one should be punished, for (that)!'               2              []     0.50      20.34        -                    -
  4                                -               6          [2, 5]     4.00      19.44     3.00                    -
  5                    't2 the name'               4              []     0.30      19.74        -                    -
  6                                -               9       [4, 7, 8]     2.00      15.44     1.00                    -
  7                               t8               6              []     1.20      16.64        -                    -
  8                               t9               6        [17, 18]     3.40      18.84        -                    -
  9                                -              11         [6, 10]     0.44      13.44    33.00                    -
 10                               t6               9              []     1.00      14.44        -                    -
 11                                -              16         [9, 12]    13.00      13.00    12.00                    -
 12                               t7              11              []    99.90     112.90        -                    -
 13                                -              16        [14, 15]     0.00       0.00     0.00                    -
 14                               t5              13              []    99.00      99.00        -                    -
 15                               t1              13              []     0.98       0.98        -                    -
 16                                -            None        [11, 13]     0.00       0.00        -                    -
 17                              t90               8              []     1.00      19.84        -                    -
 18                              t91               8              []     1.00      19.84        -                    -

Root:  16
"""
        self.assertEqual(len(output.split("\n")), len(expected.split("\n")))
        for l1, l2 in zip(output.split("\n"), expected.split("\n")):
            self.assertEqual(l1, l2)
        self.assertEqual(output, expected)
        self.assertEqual(t3.is_compatible(t2, threshold=0.3), [])
def fully_partition(metadata, aligns):
    to_combine = []
    start = 1
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(open(os.path.join(aligns, "{0}.nex".format(locus))))
            end = start + nex.nchar - 1
            metadata[model][locus] = (start, end)
            to_combine.append((locus, nex))
            start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, metadata
def write_alignment(alignment_trans, outformat):
    """
    Read in the translated alignment, write this out to file in any
    format.
    """
    with open(os.path.splitext(ARGS.filename)[0]+"_nametrans."+outformat, "w" \
    ) as output_handle:
        if outformat == "nexus":
            alignment_trans = Nexus.Nexus(alignment_trans.format("nexus"))
            alignment_trans.write_nexus_data(output_handle, interleave=False)
        else:
            AlignIO.write(alignment_trans, output_handle, outformat)
        print '\nAlignment with translated strain names written to "'+\
        output_handle.name+'".'
Пример #34
0
def check_taxa(matrices):
    '''Checks that nexus instances in a list [(name, instance)...] have
        the same taxa, provides useful error if not and returns None if
        everything matches
        From: http://biopython.org/wiki/Concatenate_nexus
    '''
    first_taxa = matrices[0][1].taxlabels
    for name, matrix in matrices[1:]:
        first_only = [t for t in first_taxa if t not in matrix.taxlabels]
        new_only = [t for t in matrix.taxlabels if t not in first_taxa]

        if first_only:
            missing = ', '.join(first_only)
            msg = '%s taxa %s not in martix %s' % (matrices[0][0], missing,
                                                   name)
            raise Nexus.NexusError(msg)

        elif new_only:
            missing = ', '.join(new_only)
            msg = '%s taxa %s not in all matrices' % (name, missing)
            raise Nexus.NexusError(msg)

    return None  # will only get here if it hasn't thrown an exception
Пример #35
0
    def verify_nexus_topology(self, treeseq):
        nexus = treeseq.nexus(precision=16)
        nexus_treeseq = Nexus.Nexus(nexus)
        self.assertEqual(treeseq.num_trees, len(nexus_treeseq.trees))
        for tree, nexus_tree in itertools.zip_longest(treeseq.trees(),
                                                      nexus_treeseq.trees):
            name = nexus_tree.name
            split_name = name.split("_")
            self.assertEqual(len(split_name), 2)
            start = float(split_name[0][4:])
            end = float(split_name[1])
            self.assertAlmostEqual(tree.interval[0], start)
            self.assertAlmostEqual(tree.interval[1], end)

            self.verify_tree(nexus_tree, tree)
def model_partition(metadata, aligns):
    to_combine = []
    start = 1
    end = 0
    new_metadata = OrderedDict()
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(open(os.path.join(aligns, "{0}.nex".format(locus))))
            end += nex.nchar
            to_combine.append((locus, nex))
        new_metadata[model] = (start, end)
        start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, new_metadata
def model_partition(metadata, aligns):
    to_combine = []
    start = 1
    end = 0
    new_metadata = OrderedDict()
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(open(os.path.join(aligns, "{0}.nex".format(locus))))
            #s = sum([1 if 'copy' in n else 0 for n in nex.get_original_taxon_order()])
            #if s > 0:
            #    pdb.set_trace()
            end += nex.nchar
            to_combine.append((locus, nex))
        new_metadata[model] = (start, end)
        start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, new_metadata
def write_variant_sites(alignment, var_sites, outfile):
	nex_aligns = []  #Bio.Nexus.Nexus.Nexus objects
	blocks = list(ranges(var_sites))  #tuples of positions
	for i in blocks:
		alignment_iteration = MultipleSeqAlignment(alignment[:, i[0]:i[1]+1],
			alphabet=generic_dna).format('nexus')
		# if i[0] == i[1]:
		# 	nex_aligns.append(('site {}'.format(str(i[1] + 1)),
		# 		Nexus.Nexus(alignment_iteration)))
		# else:
		# 	nex_aligns.append(('site {} to {}'.format(str(i[0]), str(i[1] + 1)),
		# 		Nexus.Nexus(alignment_iteration)))
		nex_aligns.append(('site {} to {}'.format(str(i[0]), str(i[1]+1)),
			Nexus.Nexus(alignment_iteration)))

	combined = Nexus.combine(nex_aligns)
	with open(outfile, 'w') as out:
		combined.write_nexus_data(out)
	print 'Converted {} informative sites without gaps into nexus alignment'.format(str(len(blocks)))
Пример #39
0
def _write_BEST(dataset, filestem):
  """ write a MrBayes block for BEST species tree estimation 
  
  Used by write_multispecies(), writes a concatenated nexus file and prints
  MrBayes block to screen.
  """
  fname = filestem + ".nex"
  #write a nexus file with partitions for each gene
  nexi = []
  for g in dataset.get_genes():
    nexi.append( (g, _nexify( dataset.get_sequences(g)))) 
  combined = Nexus.combine(nexi)
  combined.write_nexus_data(filename=fname)
  #then build a MrBayes block for BEST
  d = defaultdict(list)
  for sp, i in zip(dataset.get_species(),
                   [str(i) for i in xrange(1,len(dataset)+1)]):
    d[sp].append(i)
  contents = ["begin MyBayes;"]
  for species, OTUs in d.items():
      contents.append("taxset %s = % s" % (species, " ".join(OTUs)))
  print "Add the following to the MrBayes block in %s" % fname
  for line in contents:
      print line
Пример #40
0
def GettingInfoFromInput(NexusInput):
    shape_dict={"1":"JC","2":"HKY","6":"GTR"}
    size_dict={"4by4":"4X4","doublet":"16X16","codon":"64X64"}
    "gettinginfo from mrbayes block in nexus"
    #Function assume that datatype is uniform! all DNA, all protein no mix
    from Bio.Nexus import Nexus
    N=Nexus.Nexus()
    N.read(NexusInput)
    #merging togheter all possible mrbayes block present in the file as a long list of command
    #should I take only the first one?
    cmdblock=sum([sum(x.commandlines,[]) for x in N.unknown_blocks if x.title.lower()=="mrbayes"],[])
    HyppartitionPlan={}
    partitionPlan=["dummy"]
    
    for cmdline in cmdblock:
        CMDline=Nexus.Commandline(cmdline,"mrbayes")
        if CMDline.command=="charset":
            N._charset(CMDline.options)
        elif CMDline.command=="partition":
            nameplan=cmdline.split("=")[0].split()[1]
            HyppartitionPlan[nameplan]=[x.strip() for x in cmdline.split(":")[-1].split(",")]
        elif CMDline.command.find("mcmc")>-1:
            try:
                nruns=CMDline.options["nruns"]
            except KeyError:
                pass
        elif CMDline.command=="set":
            if CMDline.options.has_key("partition"): 
                partitionPlan=HyppartitionPlan[CMDline.options["partition"]]
    #print cmdblock
    #print partitionPlan
    Model={}
    counter=1
    for partition in partitionPlan:
        Model[partition]={"ntaxa":N.ntax,"type":N.datatype.lower().title(),"matrix":{}}
        Model[partition]["partitionSize"]=len(N.charsets[partition])
        Model[partition]["partitionRange"]=Nexus._compact4nexus(N.charsets[partition])
        for cmdline in cmdblock:
            CMDline=Nexus.Commandline(cmdline,"mrbayes")
            if N.datatype.lower()=="dna":
                if CMDline.command=="lset":
                    test1=test2=test3=False
                    if CMDline.options.has_key("applyto"):
                        APP=CMDline.options["applyto"][1:-1].lower()
                        if (APP.strip()=="all"):
                            test2=True
                        elif counter in map(int,APP.split(",")):
                            test3=True
                    else:
                        test1=True
                    if test1 or test2 or test3:
                        Model[partition]["matrix"]["shape"]=shape_dict[CMDline.options["nst"]]
                        if CMDline.options.has_key("nucmodel"):
                            Model[partition]["matrix"]["size"]=size_dict(CMDline.options["nucmodel"])
                        else:
                            Model[partition]["matrix"]["size"]="4X4"
            if N.datatype.lower()=="protein":
                Model[partition]["matrix"]["size"]="20X20"
                if CMDline.command=="prset":
                    mod=CMDline.options["aamodelpr"]
                    if mod.find("fix")>-1:
                       Model[partition]["matrix"]["shape"]=mod[(mod.find("(")+1):(mod.find(")")-1)]
                    elif mod.find("(")==-1:
                       Model[partition]["matrix"]["shape"]=mod 

        counter+=1    

    return Model,partitionPlan, int(nruns)
def read_collapse(file, informat, gapchar):
	with open(file, 'r') as input_handle:

		alignment = AlignIO.read(input_handle, informat, alphabet=generic_dna)
		summary_align = AlignInfo.SummaryInfo(alignment)
		first_seq = (alignment[0].seq)
		length_first_seq = len(first_seq)

# 		chars_to_ignore = ['N']
		my_pssm = summary_align.pos_specific_score_matrix(first_seq)

		index = 0
		count = 0
		invariant_sites_counter = 0
		invariant_position_index = []

		for i in my_pssm.pssm:
			A = i[1]['A']
			C = i[1]['C']
			G = i[1]['G']
			T = i[1]['T']
			if gapchar != None:
				print gapchar
				gap = i[1][gapchar]
				x = [gap, A, C, G, T]
			if gapchar == None:
				x = [A, C, G, T]
				print x
			y = []
			for j in x:
				if j > 0:
					y.append(1)
				else:
					y.append(0)
			if sum(y[1:len(y)]) > 1:
				pass
			else:
				invariant_sites_counter += 1
				invariant_position_index.append(count)
			count += 1

		alignment_indices_to_write = []
		n_alignments = []

		for i in range(0,length_first_seq):
			if i not in invariant_position_index:
				alignment_indices_to_write.append(i)

		def ranges(i):
			for a, b in itertools.groupby(enumerate(i), lambda (x, y): y - x):
				b = list(b)
				yield b[0][1], b[-1][1]

		blocks = list(ranges(alignment_indices_to_write))
		print '\nExcluding', str(len(invariant_position_index)),'sites at positions:',invariant_position_index,'\n'
		print 'Including sites at positions:',blocks,'\n'
		for i in blocks:
			alignment_iteration = MultipleSeqAlignment(alignment[:,i[0]:i[1]+1], alphabet = generic_dna).format('nexus')
			n_alignments.append(('site'+str(i[0])+'to'+str(i[1]+1),Nexus.Nexus(alignment_iteration))) #

		#combine the alignments in n_alignments
		combined = Nexus.combine(n_alignments)
		with open(file+'_collapsed.nexus', 'w') as output_handle:
			print 'Writing collapsed alignment to:',file+'_collapsed.nexus\n'
			combined.write_nexus_data(output_handle)
Пример #42
0
mainDir = os.getcwd()

for g in glob.glob('*_sims'):
	# pull out gene name
	gene = g.split("_")[0]
	# create path to gene folder
	geneDirPath = os.path.join(mainDir,g)
	# move into gene folder
	os.chdir(geneDirPath)
	for p in glob.glob('posterior_predictive_sim_*'):
		simNum = p.split("_")[3]
		# make name for concat nexus file
		concatNex = gene + "_" + simNum + ".nex"
		# make folder for sim seq
		mbRunDirPath = os.path.join(mainDir, gene + "_" + simNum)
		nexOutPath = os.path.join(mbRunDirPath,concatNex)
		if not os.path.exists(mbRunDirPath):
			os.mkdir(mbRunDirPath)
		#debug
		print simNum, concatNex, mbRunDirPath, nexOutPath

		# move into sim seq folder
		os.chdir(p)
		seqList =["phyloSeq[1].nex", "phyloSeq[2].nex", "phyloSeq[3].nex"]
		nexConvert =  [(f, Nexus.Nexus(f)) for f in seqList]
		combine = Nexus.combine(nexConvert)
		combine.write_nexus_data(filename=open(nexOutPath, 'w'))
		os.chdir(geneDirPath)
	os.chdir(mainDir)

Пример #43
0
__author__ = 'anastasiiakorosteleva'
from Bio.Nexus import Nexus
# the combine function takes a list of tuples [(name, nexus instance)...],
#if we provide the file names in a list we can use a list comprehension to
# create these tuples

file_list = ['apoa1.nex', 'apoe.nex', 'cyt450.nex', 'ace.nex', 'ABO.nex', "apoa5.nex", 'apod.nex', 'cdk6.nex', 'CETP.nex',
             'ETV6.nex', 'Gckr.nex', 'gdf5.nex','LDLR.nex', 'lpl.nex', 'NAT2.nex', 'park2.nex', 'SLC22A5.nex', 'UGT1A9.nex',
             'HMGA2.nex', 'apoc1.nex']
nexuses = [(fname, Nexus.Nexus(fname)) for fname in file_list]

combined = Nexus.combine(nexuses)
combined.write_nexus_data(filename=open('combo.nex', 'w'))
Пример #44
0
def Concatenate(prefix):
    file_list = glob('*.nex')   
    nexi =  [(fname, Nexus.Nexus(fname)) for fname in file_list]
    combined = Nexus.combine(nexi)
    combined.write_nexus_data(filename=open('btCOMBINED.nex', 'w'))
    combined.export_phylip(prefix+'.phy')
Пример #45
0
# a little script to concatenate lots of nexus files in a folder
# and write a new one.
from Bio.Nexus import Nexus
import os

infile  = "/Users/robertlanfear/Desktop/turtles-individual-nexus-files-for-loci"


file_list = [x for x in os.walk(infile)][0][2]
nexi =  [(fname, Nexus.Nexus(fname)) for fname in file_list]
combined = Nexus.combine(nexi)
outfile = os.path.join(infile, "alignment.nex")
combined.write_nexus_data(filename=open(outfile, 'w'))
def concat_alignment(files, output):
    nexi = [(fname.replace(alignments_dir, '').replace(".", "").replace("-", ""), Nexus.Nexus(fname)) for fname in
            files]
    combined = Nexus.combine(nexi)
    combined.write_nexus_data(filename=open(output, 'w'))
Пример #47
0
#!/usr/bin/env python

# Author: Gregory S Mendez

# This script will create a super matrix alignment file in nexus format from input alignments in nexus format

# Named variables. Every run needs the following defined:
# 1) --in_dir - The directory containing the nexus alignments that need to be merged.
# 2) --out - The full filepath and name you want for the output file.

from Bio.Nexus import Nexus
import argparse, glob

# Argument Parser
parser = argparse.ArgumentParser(description = 'This script will create a super matrix alignment file from input alignments')
parser.add_argument('--in_dir', required=True, help='The input directory containing alignment files.')
parser.add_argument('--out', required=True, help='The filepath and filename of the output file.')
args = parser.parse_args() 

IN_DIR = args.in_dir
OUT = args.out
FILE_LIST = glob.glob('%s/*.nex' % IN_DIR)
NEXI =  [(FNAME, Nexus.Nexus(FNAME)) for FNAME in FILE_LIST]
COMBINED = Nexus.combine(NEXI)
COMBINED.write_nexus_data(filename=open('%s' % OUT, 'w'))