Exemplo n.º 1
0
def cross_distances_by_class(tests, cross_distances, membership):
    references = cross_distances.keys()
    membership = p.filter_membership(membership, references)
    clusters = p.membership_to_clusters(membership)

    distances = {}
    for y in tests:
        test_distances = {}
        for cluster, members in clusters.items():
            test_distances[cluster] = [cross_distances[x][y] for x in members]
        distances[y] = test_distances
    return distances
Exemplo n.º 2
0
def self_distances_by_class(references, self_distances, membership):
    membership = p.filter_membership(membership, references)
    clusters = p.membership_to_clusters(membership)

    distances = {}
    for x in references:
        ref_distances = {}
        for cluster, members in clusters.items():
            ref_distances[cluster] = [
                self_distances[x][y] for y in members if x != y
            ]
        distances[x] = ref_distances
    return distances
Exemplo n.º 3
0
def quartet_test(references,
                 test,
                 membership,
                 distances,
                 use_counterproof=False,
                 dist_args={}):
    """Tests whether an instance belongs to each cluster."""
    classification = {}
    clusters = p.membership_to_clusters(membership)
    for cluster, members in clusters.items():
        category = {'proof': [], 'counter': []}
        for ref in references:
            if ref in members: category['proof'].append(ref)
            else: category['counter'].append(ref)

        U, V = category['proof'], category['counter']
        if len(U) <= 1 or len(V) <= 1:
            conclusion = 'neutral' if not use_counterproof else 'inconclusive'
            classification[cluster] = conclusion
            continue

        proof_matrix = make_distance_matrix(test, U, V, distances, **dist_args)
        proof_tree = nj.neighbor_joining(proof_matrix, ['U', 'V', 'u', 'x'])

        if not use_counterproof:
            conclusion = single_split[get_split(proof_tree)]
        else:
            counter_matrix = make_distance_matrix(test, V, U, distances,
                                                  **dist_args)
            counter_tree = nj.neighbor_joining(proof_matrix,
                                               ['V', 'U', 'v', 'x'])

            proof = proof_case[get_split(proof_tree)]
            counter = counter_case[get_split(counter_tree)]
            conclusion = conclusion_matrix[(proof, counter)]

        classification[cluster] = conclusion
    return classification
Exemplo n.º 4
0
def partition(names, membership, num_parts, prng=Random()):
  membership = p.filter_membership(membership, names)
  clusters = p.membership_to_clusters(membership)

  part = [[] for _ in xrange(num_parts)]
  remaining = []
  for cluster, members in clusters.items():
    members = list(members)
    prng.shuffle(members)
    n = len(members)

    per_part, rem = n / num_parts, n % num_parts
    if per_part:
      for i in xrange(num_parts):
        part[i].extend(members[i * per_part: (i + 1) * per_part])
    
    if rem:
      remaining.extend(members[-rem:])
 
  for i, element in enumerate(remaining):
    part[i % num_parts].append(element)

  prng.shuffle(part)
  return part
Exemplo n.º 5
0
def select(names, membership, fraction, force_fraction=True, prng=Random()):
  membership = p.filter_membership(membership, names)
  clusters = p.membership_to_clusters(membership)

  sizes = [len(members) for cluster, members in clusters.items()]
  min_fraction = 1.0 / min(sizes)
  if fraction < min_fraction:
    if force_fraction:
      warnings.warn("Some clusters will not be represented")
    else:
      fraction = min_fraction

  selected = []
  for cluster, members in clusters.items():
    members = list(members)
    # Choose which way to round randomly, favoring the closest integer.
    frac, floor = math.modf(fraction * len(members))
    if prng.random() < frac: num_selected = int(floor + 1)
    else:                    num_selected = int(floor)
    
    prng.shuffle(members)
    selected.extend(members[:num_selected])

  return selected
Exemplo n.º 6
0
      f.write(newick_format(phylo_tree))

  # Outputs graph image
  if a.graph_image:
    tree_style = graph_style(tree_graph, leaf_ids)
    igraph.plot(clustering, target=a.graph_image, **tree_style)

  # Outputs clustering result
  with open_outfile(output) as f:
    f.write(p.membership_csv_format(membership))

  # Outputs index, if comparison reference was provided
  if a.compare_to:
    with open_outfile(a.partition_output) as f:
      reference_cluster = p.membership_parse(a.compare_to, as_clusters=True)
      obtained_cluster = p.membership_to_clusters(membership)
      index = p.compare_clusters(reference_cluster, obtained_cluster,
          index_name=a.partition_index)
      f.write('%s\n' % pformat(index))

  # Output everything to directory
  if a.results_dir:
    # Create dir if it does not exist
    if not os.path.exists(a.results_dir):
      os.mkdir(a.results_dir)

    # Creates subdirectory containing results for this run
    subdirname = dir_basename(a.input[0])
    subpath = os.path.join(a.results_dir, subdirname)
    if not os.path.exists(subpath):
      os.mkdir(subpath)
Exemplo n.º 7
0
    distances, self_results, cross_results, classes = (result['distances'],
                                                       result['self_results'],
                                                       result['cross_results'],
                                                       result['classes'])

    with open_outfile(output) as f:
        writer = csv.DictWriter(f, fieldnames=['name', 'class', 'trust'])
        writer.writeheader()
        writer.writerows(
            dict(classification, name=name)
            for name, classification in classes.items())

    if a.partition_index:
        missing = set(name for name, classification in classes.items()
                      if not classification['class'])
        conflicts = set(name for name, classification in classes.items()
                        if isinstance(classification['class'], list))
        obtained_membership = dict(
            (name, classification['class'])
            for name, classification in classes.items()
            if name not in missing and name not in conflicts)

        print '\n%d missing class, %d conflicts, %d classified' % (
            len(missing), len(conflicts), len(obtained_membership))
        test_membership = p.filter_membership(membership,
                                              obtained_membership.keys())
        pprint(
            p.compare_clusters(p.membership_to_clusters(test_membership),
                               p.membership_to_clusters(obtained_membership),
                               index_name=a.partition_index))
Exemplo n.º 8
0
 def test_membership_to_clusters(self):
     self.assertEqual(c1, p.membership_to_clusters(m1))
     self.assertEqual(c2, p.membership_to_clusters(m2))