示例#1
0
def get_response_content(fs):
    # get a properly formatted newick tree with branch lengths
    tree = Newick.parse(fs.tree, SpatialTree.SpatialTree)
    tree.assert_valid()
    if tree.has_negative_branch_lengths():
        msg = 'drawing a tree with negative branch lengths is not implemented'
        raise HandlingError(msg)
    tree.add_branch_lengths()
    # do the layout
    if fs.daylight:
        try:
            layout = FastDaylightLayout.StraightBranchLayout()
            layout.do_layout(tree)
        except RuntimeError as e:
            pass
    elif fs.curved:
        try:
            layout = FastDaylightLayout.CurvedBranchLayout()
            layout.set_min_segment_count(400)
            layout.do_layout(tree)
        except RuntimeError as e:
            pass
    elif fs.arc:
        EqualArcLayout.do_layout(tree)
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return DrawTreeImage.get_tree_image(tree, (640, 480), ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
示例#2
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # define the jukes cantor rate matrix
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
        dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix,
                                               ordered_states)
    # simulate the ancestral alignment
    try:
        alignment = PhyLikelihood.simulate_ancestral_alignment(
            tree, alignment, rate_matrix_object)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment string using an ordering defined by the tree
    arr = []
    for node in tree.preorder():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the response
    return '\n'.join(arr) + '\n'
示例#3
0
def get_response_content(fs):
    # read the points and edges
    points, edges = read_points_and_edges(fs.graph_data)
    # define edge weights
    if fs.weighted:
        np_points = [np.array(p) for p in points]
        dists = [np.linalg.norm(np_points[j] - np_points[i]) for i, j in edges]
        weights = [1.0 / d for d in dists]
    else:
        weights = [1.0 for e in edges]
    # get the width and height of the drawable area of the image
    width = fs.total_width - 2 * fs.border
    height = fs.total_height - 2 * fs.border
    if width < 1 or height < 1:
        msg = 'the image dimensions do not allow for enough drawable area'
        raise HandlingError(msg)
    # define the point colors using the unweighted graph Fiedler loadings
    L = edges_to_laplacian(edges, weights)
    G = np.linalg.pinv(L)
    X = Euclid.dccov_to_points(G)
    points = [(-p[0] if fs.flip else p[0], p[1]) for p in X]
    x_coords, y_coords = zip(*points)
    colors = valuations_to_colors(x_coords)
    # draw the image
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    info = ImageInfo(fs.total_width, fs.total_height, fs.border, ext)
    try:
        return get_image_string(points, edges, colors, fs.black, info)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
示例#4
0
def parse_module_lines(lines):
    """
    @param lines: lines of the MMC csv output
    @return: (gene labels, module indices, gene indices)
    """
    # do some basic validation
    min_nlines = 3
    if len(lines) < min_nlines:
        raise HandlingError('expected at least %d module lines' % min_nlines)
    # extract the parts of the rows of interest
    rows = []
    for line in lines[1:]:
        values = parse_comma_separated_line(line)
        if len(values) != 5:
            raise HandlingError(
                'expected five comma separated values on each module line')
        gene_label, raw_module_index, raw_gene_index, foo, bar = values
        try:
            module_index = int(raw_module_index) - 1
        except ValueError as e:
            raise HandlingError(
                'expected the module index to be an integer: ' +
                raw_module_index)
        try:
            gene_index = int(raw_gene_index) - 1
        except ValueError as e:
            raise HandlingError('expected the gene index to be an integer: ' +
                                raw_gene_index)
        rows.append([gene_label, module_index, gene_index])
    # return the three lists
    return zip(*rows)
示例#5
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the minimum number of segments
    min_segment_count = fs.segments
    # determine the maximum allowed branch length
    total_branch_length = tree.get_total_length()
    max_branch_length = total_branch_length / float(min_segment_count)
    # any branch longer than the max branch length will be broken in half
    while True:
        old_nodes = list(tree.preorder())
        for node in old_nodes:
            if node is tree.root:
                if node.blen is not None:
                    msg = 'the root node should not have a branch length'
                    raise HandlingError(msg)
            elif node.blen is None:
                msg = 'each non-root node should have a branch length'
                raise HandlingError(msg)
            elif node.blen > max_branch_length:
                # create a new node and set its attributes
                new = Newick.NewickNode()
                new.name = node.name
                # insert the new node
                tree.insert_node(new, node.parent, node, .5)
        # if no node was added then break out of the loop
        if len(old_nodes) == len(list(tree.preorder())):
            break
    # return the response
    return tree.get_newick_string() + '\n'
示例#6
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the sequence order if it exists
    ordered_names = Util.get_stripped_lines(fs.order.splitlines())
    if ordered_names:
        observed_name_set = set(ordered_names)
        expected_name_set = set(node.get_name() for node in tree.gen_tips())
        extra_names = observed_name_set - expected_name_set
        missing_names = expected_name_set - observed_name_set
        if extra_names:
            msg_a = 'the list of ordered names includes these names '
            msg_b = 'not found in the tree: %s' % str(tuple(extra_names))
            raise HandlingError(msg_a + msg_b)
        if missing_names:
            msg_a = 'the tree includes these names not found in the list '
            msg_b = 'of ordered names: %s' % str(tuple(missing_names))
            raise HandlingError(msg_a + msg_b)
    else:
        ordered_names = list(tip.get_name() for name in tree.gen_tips())
    # do the sampling
    sampled_sequences = JC69.sample_sequences(tree, ordered_names, fs.length)
    alignment = Fasta.create_alignment(ordered_names, sampled_sequences)
    # return the response
    return alignment.to_fasta_string() + '\n'
示例#7
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    if len(D) < 3:
        msg = 'the distance matrix should have at least three rows'
        raise HandlingError(msg)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    if not ordered_labels:
        raise HandlingError('no ordered labels were provided')
    if len(ordered_labels) != len(D):
        msg_a = 'the number of ordered labels '
        msg_b = 'should be the same as the number of rows in the matrix'
        raise HandlingError(msg_a + msg_b)
    if len(set(ordered_labels)) != len(ordered_labels):
        raise HandlingError('the ordered labels must be unique')
    # read the index of the iteration that will be visualized
    min_iteration = 1
    max_iteration = len(D) - 2
    iteration = fs.iteration
    if not (min_iteration <= iteration <= max_iteration):
        msg_a = 'the iteration index '
        msg_b = 'should be in [%d, %d]' % (min_iteration, max_iteration)
        raise HandlingError(msg_a + msg_b)
    # return the image string
    return get_image_string(D, ordered_labels, iteration)
示例#8
0
def process(args, raw_hud_lines, nseconds=2):
    nwords = args.nwords
    nchars = args.nchars
    names, data = hud.decode(raw_hud_lines)
    out = StringIO()
    if len(data) < nwords:
        msg = 'the number of OTUs is smaller than the desired sample'
        raise HandlingError(msg)
    if len(data[0]) < nchars:
        msg = 'the number of characters is smaller than the desired sample'
        raise HandlingError(msg)
    # create the matrix
    M = np.array(data)
    # select row and column indices
    row_indices, col_indices = get_selections(M, nwords, nchars, nseconds)
    sorted_row_indices = list(sorted(row_indices))
    sorted_col_indices = list(sorted(col_indices))
    # print the separation
    d = get_separation(M, row_indices, col_indices)
    print >> out, 'best separation:', d
    # print the index selections
    print >> out, 'selected row indices:', sorted_row_indices
    print >> out, 'selected column indices:', sorted_col_indices
    # print some selected values
    for i in sorted_row_indices:
        s = ' '.join(str(M[i, j]) for j in sorted_col_indices)
        print >> out, names[i] + '\t' + s
    return out.getvalue().rstrip()
示例#9
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the mixture weights
    weights = [fs.weight_a, fs.weight_b, fs.weight_c]
    # get the matrices
    matrices = [fs.matrix_a, fs.matrix_b, fs.matrix_c]
    for R in matrices:
        if R.shape != (4, 4):
            msg = 'expected each nucleotide rate matrix to be 4x4'
            raise HandlingError(msg)
    # get the nucleotide alignment
    try:
        alignment = Fasta.Alignment(fs.alignment.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # create the mixture proportions
    weight_sum = sum(weights)
    mixture_proportions = [weight / weight_sum for weight in weights]
    # create the rate matrix objects
    ordered_states = list('ACGT')
    rate_matrix_objects = []
    for R in matrices:
        rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions,
                                          rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # return the html string
    return do_analysis(mixture_model, alignment, tree) + '\n'
示例#10
0
def get_response_content(fs):
    # read the matrix from the form data
    R = fs.matrix
    nrows, ncols = R.shape
    # assert that the number of rows and columns is valid for a codon matrix
    states = Codon.g_sorted_non_stop_codons
    if nrows != len(states):
        msg = 'expected %d rows but got %d' % (len(states), nrows)
        raise HandlingError(msg)
    if ncols != len(states):
        msg = 'expected %d columns but got %d' % (len(states), ncols)
        raise HandlingError(msg)
    # define the row and column labels
    labels = []
    for codon in states:
        label = '%s.%s.' % (Codon.g_codon_to_aa_letter[codon], codon)
        labels.append(label)
    row_labels = labels
    column_labels = labels
    # initialize the base class with this row major matrix
    heatmap = HeatMap.LabeledHeatMap(R.tolist(), fs.maxcategories, row_labels,
                                     column_labels)
    renderer = HeatMap.PreHeatMap(heatmap)
    html_string = renderer.get_example_html()
    # return the response
    return html_string + '\n'
示例#11
0
def parse_lines(lines):
    """
    The input lines have a special format.
    The first nonempty line is a header.
    The subsequent lines are whitespace separated values.
    The first value is the city name.
    The next four values are latitude and longitude minutes and degrees.
    @param lines: stripped input lines
    @return: (city, lat_deg, lat_min, lon_deg, lon_min) tuples
    """
    lines = [line for line in lines if line]
    if not lines:
        raise HandlingError('no input was found')
    if len(lines) < 2:
        raise HandlingError('expected at least one header and data line')
    result = []
    for line in lines[1:]:
        values = line.split()
        if len(values) != 5:
            raise HandlingError('expected five values per data line')
        city, latd, latm, lond, lonm = values
        try:
            latd = float(latd)
            latm = float(latm)
            lond = float(lond)
            lonm = float(lonm)
        except ValueError as e:
            raise HandlingError('error reading a value as a number')
        row = (city, latd, latm, lond, lonm)
        result.append(row)
    return result
示例#12
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    if len(D) < 3:
        raise HandlingError('the matrix should have at least three rows')
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(fs.labels.splitlines())
    if len(ordered_labels) != len(D):
        msg_a = 'the number of ordered labels should be the same '
        msg_b = 'as the number of rows in the matrix'
        raise HandlingError(msg_a + msg_b)
    # create the tree building object
    splitter = Clustering.StoneExactDMS()
    tree_builder = NeighborhoodJoining.TreeBuilder(D.tolist(), ordered_labels,
                                                   splitter)
    # Read the recourse string and set the corresponding method
    # in the tree builder.
    recourse_string = fs.getfirst('recourse')
    if fs.njrecourse:
        tree_builder.set_fallback_name('nj')
    elif fs.halvingrecourse:
        tree_builder.set_fallback_name('halving')
    # assert that the computation will not take too long
    if tree_builder.get_complexity() > 1000000:
        raise HandlingError('this computation would take too long')
    # build the tree
    tree = tree_builder.build()
    # return the response
    return NewickIO.get_newick_string(tree) + '\n'
示例#13
0
def get_response_content(fs):
    # read the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
    except Fasta.AlignmentError as e:
        raise HandlingError('fasta alignment error: ' + str(e))
    if alignment.get_sequence_count() != 2:
        raise HandlingError('expected a sequence pair')
    # read the rate matrix
    R = fs.matrix
    # read the ordered states
    ordered_states = Util.get_stripped_lines(fs.states.splitlines())
    if len(ordered_states) != len(R):
        msg_a = 'the number of ordered states must be the same '
        msg_b = 'as the number of rows in the rate matrix'
        raise HandlingError(msg_a + msg_b)
    if len(set(ordered_states)) != len(ordered_states):
        raise HandlingError('the ordered states must be unique')
    # create the rate matrix object using the ordered states
    rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) 
    # create the objective function
    objective = Objective(alignment.sequences, rate_matrix_object)
    # Use golden section search to find the mle distance.
    # The bracket is just a suggestion.
    bracket = (0.51, 2.01)
    mle_distance = optimize.golden(objective, brack=bracket)
    # write the response
    out = StringIO()
    print >> out, 'maximum likelihood distance:', mle_distance
    #distances = (mle_distance, 0.2, 2.0, 20.0)
    #for distance in distances:
        #print >> out, 'f(%s): %s' % (distance, objective(distance))
    return out.getvalue()
示例#14
0
def get_supplementary_object(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a object with all of the information for the supplementary data
    """
    # extract the name sets from the newick tree strings
    archaea_names = newick_string_to_tip_names(g_archaea_data)
    bacteria_names = newick_string_to_tip_names(g_bacteria_data)
    eukaryota_names = newick_string_to_tip_names(g_eukaryota_data)
    all_names = newick_string_to_tip_names(g_full_data)
    # validate the sets of names
    nfull = len(all_names)
    ndisjoint = len(archaea_names) + len(bacteria_names) + len(eukaryota_names)
    if ndisjoint != nfull:
        raise HandlingError('there are %d taxa in the full tree '
                            'but %d taxa in its subtrees' % (nfull, ndisjoint))
    disjoint_union = archaea_names | bacteria_names | eukaryota_names
    if disjoint_union != all_names:
        raise HandlingError('the set of taxa in the full tree '
                            'is not the union of taxa in its subtrees')
    # create the map from taxon name to taxonomic category
    taxon_to_domain = {}
    for name in archaea_names:
        taxon_to_domain[name] = 'archaea'
    for name in bacteria_names:
        taxon_to_domain[name] = 'bacteria'
    for name in eukaryota_names:
        taxon_to_domain[name] = 'eukaryota'
    taxon_to_domain['all-bacteria'] = 'bacteria'
    # create the supplementary object
    use_generalized_nj = fs.like_nj
    supplementary_object = SupplementaryObject(taxon_to_domain, g_full_data,
                                               use_generalized_nj)
    # return the supplementary object
    return supplementary_object
示例#15
0
def get_response_content(fs):
    # read the nucleotide weights
    nt_weights = [fs.A, fs.C, fs.G, fs.T]
    # convert the nucleotide weights to probabilities
    nt_probs = [x / float(sum(nt_weights)) for x in nt_weights]
    # Assert that the kappa value and the nucleotide
    # probabilities are compatible.
    A, C, G, T = nt_probs
    R = float(A + G)
    Y = float(C + T)
    if R <= 0:
        raise HandlingError('the frequency of a purine must be positive')
    if Y <= 0:
        raise HandlingError('the frequency of a pyrimidine must be positive')
    if fs.kappa <= max(-Y, -R):
        msg_a = 'kappa must be greater than max(-R, -Y) '
        msg_b = 'where R and Y are the purine and pyrimidine frequencies'
        raise HandlingError(msg_a + msg_b)
    # Create the rate matrix object
    # which is automatically scaled to a rate of 1.0.
    model = F84.create_rate_matrix(fs.kappa, nt_probs)
    # simulate a pair of sequences
    sequence_pair = PairLikelihood.simulate_sequence_pair(
        fs.distance, model, fs.length)
    # convert the pair of sequences to an alignment object
    aln = StringIO()
    print >> aln, '>first'
    print >> aln, ''.join(sequence_pair[0])
    print >> aln, '>second'
    print >> aln, ''.join(sequence_pair[1])
    return Fasta.Alignment(StringIO(aln.getvalue())).to_fasta_string() + '\n'
示例#16
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    tree.add_branch_lengths()
    if tree.has_negative_branch_lengths():
        msg_a = 'calculating weights for a tree '
        msg_b = 'with negative branch lengths is not implemented'
        raise HandlingError(msg_a + msg_b)
    # get the selected names
    selection = Util.get_stripped_lines(fs.selection.splitlines())
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = 'the following selected names are not valid tips: '
        msg_b = str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    # prune the tree
    for name in set(node.name for node in tree.gen_tips()) - set(selection):
        try:
            node = tree.get_unique_node(name)
        except NewickSearchError as e:
            raise HandlingError(e)
        tree.prune(node)
    # get the weights
    if fs.stone:
        name_weight_pairs = LeafWeights.get_stone_weights(tree)
    elif fs.thompson:
        name_weight_pairs = LeafWeights.get_thompson_weights(tree)
    # report the weights
    lines = ['%s: %f' % pair for pair in name_weight_pairs]
    return '\n'.join(lines) + '\n'
示例#17
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get the selected names
    selection = Util.get_stripped_lines(fs.selection.splitlines())
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = 'the following selected names '
        msg_b = 'are not valid tips: %s' % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    complement_name_set = possible_name_set - selected_name_set
    # assert that neither the selected name set nor its complement is empty
    if not selected_name_set or not complement_name_set:
        raise HandlingError('the selection is degenerate')
    # define an ordering on the tips
    ordered_names = [node.get_name() for node in tree.gen_tips()]
    # convert the selected names to a Y vector
    Y_as_list = []
    for name in ordered_names:
        if name in selected_name_set:
            value = 1
        else:
            value = -1
        Y_as_list.append(value)
    Y = np.array(Y_as_list)
    # get the distance matrix
    D = tree.get_distance_matrix(ordered_names)
    # get the R matrix
    R = Clustering.get_R_balaji(D)
    value = np.dot(np.dot(Y, R), Y.T)
    # return the taxon split evaluation
    return str(value) + '\n'
示例#18
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree
        # and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            msg = 'expected at least 4 tips but found ' + str(len(tip_names))
            raise HandlingError(msg)
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # get the threshold for negligibility of an eigenvector loading
    epsilon = fs.epsilon
    if not (0 <= epsilon < 1):
        raise HandlingError('invalid threshold for negligibility')
    # get the set of selected options
    selected_options = fs.options
    # analyze each tree
    results = []
    for tree in trees:
        results.append(AnalysisResult(tree, epsilon))
    # create the response
    out = StringIO()
    for result in results:
        for line in result.get_response_lines(selected_options):
            print >> out, line
        print >> out
    # return the response
    return out.getvalue()
示例#19
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # read the criterion string, creating the splitter object
    if fs.exact:
        splitter = Clustering.StoneExactDMS()
    elif fs.sign:
        splitter = Clustering.StoneSpectralSignDMS()
    elif fs.nj:
        splitter = Clustering.NeighborJoiningDMS()
    elif fs.random:
        splitter = Clustering.RandomDMS()
    # read the original tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # define the maximum number of steps we want
    max_steps = 1000000
    # Make sure that the splitter object is appropriate
    # for the number of taxa and the number of tree reconstructions.
    ntaxa = len(list(tree.gen_tips()))
    if splitter.get_complexity(ntaxa) * fs.iterations > max_steps:
        msg_a = 'use a faster bipartition function, '
        msg_b = 'fewer taxa, or fewer tree reconstructions'
        raise HandlingError(msg_a + msg_b)
    # define the simulation parameters
    sim = Simulation(splitter, 'nj', 'cgi tree building simulation')
    sim.set_original_tree(tree)
    sim.set_step_limit(max_steps)
    # define an arbitrary but consistent ordering of the taxa
    ordered_names = [node.name for node in tree.gen_tips()]
    # attempt to simulate a bunch of distance matrices
    sampler = DMSampler.DMSampler(tree, ordered_names, fs.length)
    distance_matrices = []
    for result in sampler.gen_samples_or_none():
        # if a proposal was accepted then add it to the list
        if result:
            sequence_list, distance_matrix = result
            distance_matrices.append(distance_matrix)
        # if enough accepted samples have been generated then stop sampling
        remaining_acceptances = fs.iterations - len(distance_matrices)
        if not remaining_acceptances:
            break
        # If the remaining number of computrons is predicted
        # to be too much then stop.
        if sampler.get_remaining_computrons(remaining_acceptances) > max_steps:
            msg_a = 'this combination of parameters '
            msg_b = 'is predicted to take too long'
            raise HandlingError(msg)
    sim.run(distance_matrices, ordered_names)
    # define the response
    out = StringIO()
    print >> out, 'partition error count frequencies:'
    print >> out, sim.get_histogram_string()
    print >> out, ''
    print >> out, 'weighted partition errors:', sim.get_deep_loss()
    # return the response
    return out.getvalue()
示例#20
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips but found ' +
                                str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # begin the response
    out = StringIO()
    # look at each tree
    nerrors = 0
    ncounterexamples = 0
    for tree in trees:
        # get the set of valid partitions implied by the tree
        valid_parts = TreeComparison.get_partitions(tree)
        ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
        # assert that the partition implied by the correct formula is valid
        D = np.array(tree.get_distance_matrix(ordered_tip_names))
        loadings = get_principal_coordinate(D)
        nonneg_leaf_set = frozenset(
            tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip
                                 for tip, v in zip(ordered_tip_names, loadings)
                                 if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            nerrors += 1
            print >> out, 'error: a partition that was supposed to be valid was found to be invalid'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
        # check the validity of the partition implied by the incorrect formula
        Q = D * D
        loadings = get_principal_coordinate(Q)
        nonneg_leaf_set = frozenset(
            tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip
                                 for tip, v in zip(ordered_tip_names, loadings)
                                 if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            ncounterexamples += 1
            print >> out, 'found a counterexample!'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
    print >> out, 'errors found:', nerrors
    print >> out, 'counterexamples found:', ncounterexamples
    # return the response
    return out.getvalue()
示例#21
0
 def get_running_time(self):
     """
     @return: the number of seconds it took to run the simulation
     """
     if self.start_time is None:
         raise HandlingError('the simulation has not been started')
     if self.stop_time is None:
         msg = 'the simulation was not successfully completed'
         raise HandlingError(msg)
     return self.stop_time - self.start_time
示例#22
0
def get_response_content(fs):
    M = fs.matrix
    if M.shape[0] < 3 or M.shape[1] < 3:
        raise HandlingError('expected at least a 3x3 matrix')
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return get_image(M.tolist(), (640, 480), ext, fs.axes, fs.connections,
                         fs.vertices)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
示例#23
0
def get_response_content(fs):
    # use a fixed seed if requested
    if fs.seed:
        random.seed(fs.seed)
    # define the max number of rejection iterations
    limit = fs.npoints * 100
    # validate input
    if fs.axis < 0:
        raise ValueError('the mds axis must be nonnegative')
    # get points defining the boundary of africa
    nafrica = len(g_africa_poly)
    africa_edges = [(i, (i + 1) % nafrica) for i in range(nafrica)]
    # get some points and edges inside africa
    points = sample_with_rejection(fs.npoints, g_africa_poly, limit)
    x_list, y_list = zip(*points)
    tri = Triangulation(x_list, y_list)
    tri_edges = [(i + nafrica, j + nafrica) for i, j in tri.edge_db.tolist()]
    # get the whole list of points
    allpoints = g_africa_poly + points
    # refine the list of edges
    tri_edges = list(gen_noncrossing_edges(tri_edges, africa_edges, allpoints))
    tri_edges = get_mst(tri_edges, allpoints)
    alledges = africa_edges + tri_edges
    # make the graph laplacian
    A = np.zeros((len(points), len(points)))
    for ia, ib in tri_edges:
        xa, ya = allpoints[ia]
        xb, yb = allpoints[ib]
        d = math.hypot(xb - xa, yb - ya)
        A[ia - nafrica, ib - nafrica] = 1 / d
        A[ib - nafrica, ia - nafrica] = 1 / d
    L = Euclid.adjacency_to_laplacian(A)
    ws, vs = EigUtil.eigh(np.linalg.pinv(L))
    if fs.axis >= len(ws):
        raise ValueError('choose a smaller mds axis')
    v = vs[fs.axis]
    # get the color and sizes for the points
    v /= max(np.abs(v))
    colors = [(0, 0, 0)] * nafrica + [get_color(x) for x in v]
    radii = [2] * nafrica + [5 for p in points]
    # get the width and height of the drawable area of the image
    width = fs.total_width - 2 * fs.border
    height = fs.total_height - 2 * fs.border
    if width < 1 or height < 1:
        msg = 'the image dimensions do not allow for enough drawable area'
        raise HandlingError(msg)
    # draw the image
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    try:
        helper = ImgHelper(allpoints, alledges, fs.total_width,
                           fs.total_height, fs.border)
        return helper.get_image_string(colors, radii, ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
示例#24
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    alphabetically_ordered_states = list(
        sorted(node.name for node in tree.gen_tips()))
    n = len(alphabetically_ordered_states)
    if n < 2:
        raise HandlingError('the newick tree should have at least two leaves')
    # read the ordered labels
    states = Util.get_stripped_lines(StringIO(fs.inlabels))
    if len(states) > 1:
        if set(states) != set(alphabetically_ordered_states):
            msg_a = 'if ordered labels are provided, '
            msg_b = 'each should correspond to a leaf of the newick tree'
            raise HandlingError(msg_a + msg_b)
    else:
        states = alphabetically_ordered_states
    # create the distance matrix
    D = tree.get_distance_matrix(states)
    # create the perturbed distance matrix if necessary
    if fs.strength:
        P = [row[:] for row in D]
        for i in range(n):
            for j in range(i):
                x = random.normalvariate(0, fs.strength)
                new_distance = D[i][j] * math.exp(x)
                P[i][j] = new_distance
                P[j][i] = new_distance
    else:
        P = D
    # start collecting the paragraphs
    paragraphs = []
    # show the distance matrix if requested
    if fs.perturbed:
        paragraph = StringIO()
        print >> paragraph, 'a perturbed distance matrix:'
        print >> paragraph, MatrixUtil.m_to_string(P)
        paragraphs.append(paragraph.getvalue().strip())
    # show the distance matrix if requested
    if fs.distance:
        paragraph = StringIO()
        print >> paragraph, 'the original distance matrix:'
        print >> paragraph, MatrixUtil.m_to_string(D)
        paragraphs.append(paragraph.getvalue().strip())
    # show the ordered labels if requested
    if fs.outlabels:
        paragraph = StringIO()
        print >> paragraph, 'ordered labels:'
        print >> paragraph, '\n'.join(states)
        paragraphs.append(paragraph.getvalue().strip())
    # return the response
    return '\n\n'.join(paragraphs) + '\n'
示例#25
0
 def run(self, distance_matrices, ordered_names):
     """
     This function stores the losses for each reconstruction.
     @param distance_matrices: a sequence of distance matrices
     @param ordered_names: order of taxa in the distance matrix
     """
     if self.start_time is not None:
         msg = 'each simulation object should be run only once'
         raise HandlingError(msg)
     if not distance_matrices:
         raise HandlingErrror('no distance matrices were provided')
     tip_name_set = set(node.name for node in self.original_tree.gen_tips())
     if tip_name_set != set(ordered_names):
         raise HandlingError('leaf name mismatch')
     self.start_time = time.time()
     # Define the reference tree and its maximum cost
     # under different loss functions.
     reference_tree = self.original_tree
     max_error_count = TreeComparison.get_nontrivial_split_count(
         reference_tree)
     max_loss_value = TreeComparison.get_weighted_split_count(
         reference_tree)
     for distance_matrix in distance_matrices:
         # create the tree builder
         tree_builder = NeighborhoodJoining.TreeBuilder(
             distance_matrix, ordered_names, self.splitter)
         # set parameters of the validating tree builder
         tree_builder.set_fallback_name(self.fallback_name)
         # build the tree
         try:
             query_tree = tree_builder.build()
         except NeighborhoodJoining.NeighborhoodJoiningError as e:
             raise HandlingError(e)
         # Note the number and weight of partition errors
         # during the reconstruction.
         error_count = TreeComparison.get_split_distance(
             query_tree, reference_tree)
         loss_value = TreeComparison.get_weighted_split_distance(
             query_tree, reference_tree)
         # make sure that the summary is internally consistent
         assert error_count <= max_error_count, (error_count,
                                                 max_error_count)
         assert loss_value <= max_loss_value, (loss_value, max_loss_value)
         # save the reconstruction characteristics to use later
         self.error_counts.append(error_count)
         self.loss_values.append(loss_value)
         self.max_error_counts.append(max_error_count)
         self.max_loss_values.append(max_loss_value)
     self.stop_time = time.time()
示例#26
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the node
    try:
        node = tree.get_unique_node(fs.node)
    except Newick.NewickSearchError as e:
        raise HandlingError(e)
    if node is tree.root:
        raise HandlingError('the root cannot be removed')
    # remove the node
    tree.remove_node(node)
    # return the response
    return tree.get_newick_string() + '\n'
示例#27
0
def get_response_content(fs):
    # read the matrix
    D = fs.matrix
    if len(D) < 3:
        raise HandlingError('the matrix should have at least three rows')
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(fs.labels.splitlines())
    if len(ordered_labels) != len(D):
        msg_a = 'the number of ordered labels should be the same '
        msg_b = 'as the number of rows in the matrix'
        raise HandlingError(msg_a + msg_b)
    # get the newick tree
    tree = NeighborJoining.make_tree(D.tolist(), ordered_labels)
    # return the response
    return NewickIO.get_newick_string(tree) + '\n'
示例#28
0
def get_response_content(fs):
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # get the image extension
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    # get the scaling factors and offsets
    if fs.hticks < 2:
        msg = 'expected at least two ticks on the horizontal axis'
        raise HandlingError(msg)
    width, height = physical_size
    xoffset = fs.border
    yoffset = fs.border
    yscale = float(height - 2 * fs.border)
    xscale = (width - 2 * fs.border) / float(fs.hticks - 1)
    # define the eigendecomposition function
    if fs.slow:
        fn = get_augmented_spectrum
    elif fs.fast:
        fn = get_augmented_spectrum_fast
    # define the target eigenvalues
    tip_ids = [id(node) for node in tree.gen_tips()]
    D_tips = np.array(tree.get_partial_distance_matrix(tip_ids))
    G_tips = Euclid.edm_to_dccov(D_tips)
    target_ws = scipy.linalg.eigh(G_tips, eigvals_only=True) * fs.denom
    # draw the image
    return create_image(ext, physical_size, xscale, yscale, xoffset, yoffset,
                        D, nleaves, fs.hticks, fs.denom, fn, target_ws)
示例#29
0
def get_response_content(fs):
    # check input compatibility
    if fs.nvertices < fs.naxes+1:
        msg_a = 'attempting to plot too many eigenvectors '
        msg_b = 'for the given number of vertices'
        raise ValueError(msg_a + msg_b)
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # get the points
    L = create_laplacian_matrix(fs.nvertices)
    D = Euclid.laplacian_to_edm(L)
    HSH = Euclid.edm_to_dccov(D)
    W, VT = np.linalg.eigh(HSH)
    V = VT.T.tolist()
    if fs.eigenvalue_scaling:
        vectors = [np.array(v)*w for w, v in list(reversed(sorted(zip(np.sqrt(W), V))))[:-1]]
    else:
        vectors = [np.array(v) for w, v in list(reversed(sorted(zip(np.sqrt(W), V))))[:-1]]
    X = np.array(zip(*vectors))
    # transform the points to eigenfunctions such that the first point is positive
    F = X.T[:fs.naxes]
    for i in range(fs.naxes):
        if F[i][0] < 0:
            F[i] *= -1
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return create_image_string(ext, physical_size, F, fs.xaxis_length)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
示例#30
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    ordered_names = list(sorted(node.name for node in tree.gen_tips()))
    n = len(ordered_names)
    if n < 2:
        raise HandlingError('the newick tree should have at least two leaves')
    # get the eigendecomposition
    D = np.array(tree.get_distance_matrix(ordered_names))
    G = (-0.5) * MatrixUtil.double_centered(D)
    eigenvalues, eigenvector_transposes = np.linalg.eigh(G)
    eigenvectors = eigenvector_transposes.T
    sorted_eigensystem = list(reversed(list(sorted((w, v) for w, v in zip(eigenvalues, eigenvectors)))))
    sorted_eigenvalues, sorted_eigenvectors = zip(*sorted_eigensystem)
    M = zip(*sorted_eigenvectors)
    # write the html
    out = StringIO()
    print >> out, '<html>'
    print >> out, '<body>'
    print >> out, HtmlTable.get_labeled_table_string(
            sorted_eigenvalues, ordered_names, M)
    print >> out, '</body>'
    print >> out, '</html>'
    # write the response
    return out.getvalue()