Пример #1
0
 def test_invalid_distance_measures(self):
     invalid = ("L1", 1, 2, None)
     obj1 = np.array([1, 0.5, 0.4])
     obj2 = np.array([0.9, 0.8, 0.7])
     for measure in invalid:
         with pytest.raises(ValueError):
             ConvergenceIterator.get_distance(measure, obj1, obj2)
Пример #2
0
 def test_did_not_converge(self):
     it = ConvergenceIterator(DistanceMeasures.L_INF, 0.5, limit=200)
     it_count = 0
     with pytest.raises(ConvergenceError):
         while not it.finished():
             it_count += 1
             it.compare(np.array([1]), np.array([2]))
     assert it_count == 200
Пример #3
0
 def test_basic(self):
     threshold = 0.51
     it_count = 0
     it = ConvergenceIterator(DistanceMeasures.L1, threshold)
     current_distance = 1
     while not it.finished():
         it_count += 1
         current_distance -= 0.02
         it.compare(np.array([1]), np.array([1 + current_distance]))
     assert it_count == 25
Пример #4
0
    def test_truthfinder(self, data):
        it = ConvergenceIterator(DistanceMeasures.COSINE, 0.001)
        truthfinder = TruthFinder(iterator=it)

        def imp(var, val1, val2):
            diff = val1 - val2
            return np.exp(-0.5 * diff**2)

        data = MatrixDataset(data.sv, implication_function=imp)
        self.check_results(truthfinder, data, "truthfinder_results.json")
Пример #5
0
 def test_converge_to_zero(self):
     """"
     Run Investment till convergence with input that is known to cause
     problems as trust drains to zero, which causes division by zero.
     """
     data = Dataset([("s1", "x", "one"), ("s2", "x", "zero"),
                     ("s3", "x", "one"), ("s1", "y", "zero"),
                     ("s3", "y", "one"), ("s4", "y", "one"),
                     ("s2", "z", "zero"), ("s3", "z", "one")])
     it = ConvergenceIterator(DistanceMeasures.L2, 0.1e-100)
     res = Investment(iterator=it).run(data)
     assert res.iterations == 41
Пример #6
0
def main(csv_file):
    """
    Perform the test
    """
    print("Loading data...")
    sup = SupervisedData.from_csv(csv_file)
    fig, ax = plt.subplots()
    fig.suptitle(
        "Convergence experiment\n"
        "(synthetic data with {d.num_sources} sources, {d.num_variables} "
        "variables)".format(d=sup.data)
    )
    ax.set_xlabel("Iteration number")
    ax.set_ylabel(r"$\ell_2$ distance between old and new trust (log scale)")

    # map algorithm names to list of distances over time
    distances = {}
    iterator = ConvergenceIterator(MEASURE, 0, limit=100, debug=True)
    for cls in ALGORITHMS:
        name = cls.__name__
        print("running {} using {} measure".format(name, MEASURE))
        alg = cls(iterator=iterator)
        stdout = StringIO()
        sys.stdout = stdout
        try:
            _res = alg.run(sup.data)
        except ConvergenceError:
            pass
        finally:
            sys.stdout = sys.__stdout__

        distances[name] = []
        for line in stdout.getvalue().split("\n"):
            if not line:
                continue
            _, dist = line.split(",")
            distances[name].append(float(dist))

    max_its = max(len(dists) for dists in distances.values())
    x = range(1, max_its + 1)

    for name, dists in distances.items():
        while len(dists) < max_its:
            dists.append(None)
        ax.semilogy(x, dists, label=name, linewidth=3)
    ax.legend()
    plt.show()
Пример #7
0
    def get_iterator(self, it_string, max_limit=200):
        """
        Parse an :any:`Iterator` object from a string representation
        """
        fixed_regex = re.compile(r"fixed-(?P<limit>\d+)$")
        convergence_regex = re.compile(
            r"(?P<measure>[^-]+)-convergence-(?P<threshold>[^-]+)"
            r"(-limit-(?P<limit>\d+))?$"  # optional limit
        )
        fixed_match = fixed_regex.match(it_string)
        if fixed_match:
            limit = int(fixed_match.group("limit"))
            if limit > max_limit:
                raise ValueError(
                    "Cannot perform more than {} iterations".format(max_limit)
                )
            return FixedIterator(limit=limit)

        convergence_match = convergence_regex.match(it_string)
        if convergence_match:
            measure_str = convergence_match.group("measure")
            try:
                measure = DistanceMeasures(measure_str)
            except ValueError:
                raise ValueError(
                    "invalid distance measure '{}'".format(measure_str)
                )
            threshold = float(convergence_match.group("threshold"))
            limit = max_limit
            if convergence_match.group("limit") is not None:
                limit = int(convergence_match.group("limit"))
                if limit > max_limit:
                    raise ValueError(
                        "Upper iteration limit cannot exceed {}"
                        .format(max_limit)
                    )
            return ConvergenceIterator(measure, threshold, limit)

        raise ValueError(
            "invalid iterator specification '{}'".format(it_string)
        )
Пример #8
0
    def test_basic(self, data):
        """
        Perform Sums on a small graph. The expected results were obtained by
        finding eigenvectors of suitable matrices (using numpy "by hand"), as
        per Kleinberg paper for Hubs and Authorities
        """
        sums = Sums(iterator=ConvergenceIterator(DistanceMeasures.L1, 0.00001))
        results = sums.run(data)
        assert np.isclose(results.trust["s1"], 1)
        assert np.isclose(results.trust["s2"], 0.53208889)
        assert np.isclose(results.trust["s3"], 0.34729636)

        assert set(results.belief["x"].keys()) == {"one"}
        assert np.isclose(results.belief["x"]["one"], 1)

        assert set(results.belief["y"].keys()) == {"eight", "nine"}
        assert np.isclose(results.belief["y"]["nine"], 0.65270364)
        assert np.isclose(results.belief["y"]["eight"], 0.34729636)

        assert set(results.belief["z"].keys()) == {"seven"}
        assert np.isclose(results.belief["z"]["seven"], 0.87938524)
Пример #9
0
    def test_reset(self):
        limit = 25
        it = FixedIterator(limit)
        # Run the iterator down
        while not it.finished():
            it.compare(1, 2)
        # Reset: should no longer be finished
        it.reset()
        assert not it.finished()
        # Check can run it down again
        it_count = 0
        while not it.finished():
            it_count += 1
            it.compare(1, 2)
        assert it_count == limit
        assert it.it_count == limit

        # Perform the same test for a convergence iterator
        current_distance = 1
        conv_it = ConvergenceIterator(DistanceMeasures.L1, 0.501)
        while not conv_it.finished():
            current_distance -= 0.02
            conv_it.compare(np.array([1]), np.array([1 + current_distance]))
        conv_it.reset()
        assert not conv_it.finished()
        it_count = 0
        current_distance = 1
        while not conv_it.finished():
            current_distance -= 0.02
            it_count += 1
            conv_it.compare(np.array([1]), np.array([1 + current_distance]))
        assert it_count == limit
        assert conv_it.it_count == limit
Пример #10
0
 def check(self, measure, obj1, obj2, exp_distance):
     got = ConvergenceIterator.get_distance(measure, np.array(obj1),
                                            np.array(obj2))
     assert got == exp_distance
Пример #11
0
        out_path = sys.argv[1]
    except IndexError:
        print("usage: {} DEST".format(sys.argv[0]), file=sys.stderr)
        sys.exit(1)

#     tuples = [
#         ("source 1", "x", 4),
#         ("source 1", "y", 7),
#         ("source 2", "y", 7),
#         ("source 2", "z", 5),
#         ("source 3", "x", 3),
#         ("source 3", "z", 5),
#         ("source 4", "x", 3),
#         ("source 4", "y", 6),
#         ("source 4", "z", 8)
#     ]
#     mydata = Dataset(tuples)

    mydata = MatrixDataset(
        ma.masked_values(
            [[1, 9, 3, 4], [2, 2, 9, 9], [9, 9, 7, 9], [1, 2, 5, 9]], 9))

    it = ConvergenceIterator(DistanceMeasures.L2, 0.001)
    algorithm = Investment(iterator=it)

    cs = ResultsGradientColourScheme(algorithm.run(mydata))
    rend = MatrixDatasetGraphRenderer(zero_indexed=False, colours=cs)
    animator = GifAnimator(renderer=rend, frame_duration=0.2)
    with open(out_path, "wb") as outfile:
        animator.animate(outfile, algorithm, mydata)