def test_invalid_distance_measures(self): invalid = ("L1", 1, 2, None) obj1 = np.array([1, 0.5, 0.4]) obj2 = np.array([0.9, 0.8, 0.7]) for measure in invalid: with pytest.raises(ValueError): ConvergenceIterator.get_distance(measure, obj1, obj2)
def test_did_not_converge(self): it = ConvergenceIterator(DistanceMeasures.L_INF, 0.5, limit=200) it_count = 0 with pytest.raises(ConvergenceError): while not it.finished(): it_count += 1 it.compare(np.array([1]), np.array([2])) assert it_count == 200
def test_basic(self): threshold = 0.51 it_count = 0 it = ConvergenceIterator(DistanceMeasures.L1, threshold) current_distance = 1 while not it.finished(): it_count += 1 current_distance -= 0.02 it.compare(np.array([1]), np.array([1 + current_distance])) assert it_count == 25
def test_truthfinder(self, data): it = ConvergenceIterator(DistanceMeasures.COSINE, 0.001) truthfinder = TruthFinder(iterator=it) def imp(var, val1, val2): diff = val1 - val2 return np.exp(-0.5 * diff**2) data = MatrixDataset(data.sv, implication_function=imp) self.check_results(truthfinder, data, "truthfinder_results.json")
def test_converge_to_zero(self): """" Run Investment till convergence with input that is known to cause problems as trust drains to zero, which causes division by zero. """ data = Dataset([("s1", "x", "one"), ("s2", "x", "zero"), ("s3", "x", "one"), ("s1", "y", "zero"), ("s3", "y", "one"), ("s4", "y", "one"), ("s2", "z", "zero"), ("s3", "z", "one")]) it = ConvergenceIterator(DistanceMeasures.L2, 0.1e-100) res = Investment(iterator=it).run(data) assert res.iterations == 41
def main(csv_file): """ Perform the test """ print("Loading data...") sup = SupervisedData.from_csv(csv_file) fig, ax = plt.subplots() fig.suptitle( "Convergence experiment\n" "(synthetic data with {d.num_sources} sources, {d.num_variables} " "variables)".format(d=sup.data) ) ax.set_xlabel("Iteration number") ax.set_ylabel(r"$\ell_2$ distance between old and new trust (log scale)") # map algorithm names to list of distances over time distances = {} iterator = ConvergenceIterator(MEASURE, 0, limit=100, debug=True) for cls in ALGORITHMS: name = cls.__name__ print("running {} using {} measure".format(name, MEASURE)) alg = cls(iterator=iterator) stdout = StringIO() sys.stdout = stdout try: _res = alg.run(sup.data) except ConvergenceError: pass finally: sys.stdout = sys.__stdout__ distances[name] = [] for line in stdout.getvalue().split("\n"): if not line: continue _, dist = line.split(",") distances[name].append(float(dist)) max_its = max(len(dists) for dists in distances.values()) x = range(1, max_its + 1) for name, dists in distances.items(): while len(dists) < max_its: dists.append(None) ax.semilogy(x, dists, label=name, linewidth=3) ax.legend() plt.show()
def get_iterator(self, it_string, max_limit=200): """ Parse an :any:`Iterator` object from a string representation """ fixed_regex = re.compile(r"fixed-(?P<limit>\d+)$") convergence_regex = re.compile( r"(?P<measure>[^-]+)-convergence-(?P<threshold>[^-]+)" r"(-limit-(?P<limit>\d+))?$" # optional limit ) fixed_match = fixed_regex.match(it_string) if fixed_match: limit = int(fixed_match.group("limit")) if limit > max_limit: raise ValueError( "Cannot perform more than {} iterations".format(max_limit) ) return FixedIterator(limit=limit) convergence_match = convergence_regex.match(it_string) if convergence_match: measure_str = convergence_match.group("measure") try: measure = DistanceMeasures(measure_str) except ValueError: raise ValueError( "invalid distance measure '{}'".format(measure_str) ) threshold = float(convergence_match.group("threshold")) limit = max_limit if convergence_match.group("limit") is not None: limit = int(convergence_match.group("limit")) if limit > max_limit: raise ValueError( "Upper iteration limit cannot exceed {}" .format(max_limit) ) return ConvergenceIterator(measure, threshold, limit) raise ValueError( "invalid iterator specification '{}'".format(it_string) )
def test_basic(self, data): """ Perform Sums on a small graph. The expected results were obtained by finding eigenvectors of suitable matrices (using numpy "by hand"), as per Kleinberg paper for Hubs and Authorities """ sums = Sums(iterator=ConvergenceIterator(DistanceMeasures.L1, 0.00001)) results = sums.run(data) assert np.isclose(results.trust["s1"], 1) assert np.isclose(results.trust["s2"], 0.53208889) assert np.isclose(results.trust["s3"], 0.34729636) assert set(results.belief["x"].keys()) == {"one"} assert np.isclose(results.belief["x"]["one"], 1) assert set(results.belief["y"].keys()) == {"eight", "nine"} assert np.isclose(results.belief["y"]["nine"], 0.65270364) assert np.isclose(results.belief["y"]["eight"], 0.34729636) assert set(results.belief["z"].keys()) == {"seven"} assert np.isclose(results.belief["z"]["seven"], 0.87938524)
def test_reset(self): limit = 25 it = FixedIterator(limit) # Run the iterator down while not it.finished(): it.compare(1, 2) # Reset: should no longer be finished it.reset() assert not it.finished() # Check can run it down again it_count = 0 while not it.finished(): it_count += 1 it.compare(1, 2) assert it_count == limit assert it.it_count == limit # Perform the same test for a convergence iterator current_distance = 1 conv_it = ConvergenceIterator(DistanceMeasures.L1, 0.501) while not conv_it.finished(): current_distance -= 0.02 conv_it.compare(np.array([1]), np.array([1 + current_distance])) conv_it.reset() assert not conv_it.finished() it_count = 0 current_distance = 1 while not conv_it.finished(): current_distance -= 0.02 it_count += 1 conv_it.compare(np.array([1]), np.array([1 + current_distance])) assert it_count == limit assert conv_it.it_count == limit
def check(self, measure, obj1, obj2, exp_distance): got = ConvergenceIterator.get_distance(measure, np.array(obj1), np.array(obj2)) assert got == exp_distance
out_path = sys.argv[1] except IndexError: print("usage: {} DEST".format(sys.argv[0]), file=sys.stderr) sys.exit(1) # tuples = [ # ("source 1", "x", 4), # ("source 1", "y", 7), # ("source 2", "y", 7), # ("source 2", "z", 5), # ("source 3", "x", 3), # ("source 3", "z", 5), # ("source 4", "x", 3), # ("source 4", "y", 6), # ("source 4", "z", 8) # ] # mydata = Dataset(tuples) mydata = MatrixDataset( ma.masked_values( [[1, 9, 3, 4], [2, 2, 9, 9], [9, 9, 7, 9], [1, 2, 5, 9]], 9)) it = ConvergenceIterator(DistanceMeasures.L2, 0.001) algorithm = Investment(iterator=it) cs = ResultsGradientColourScheme(algorithm.run(mydata)) rend = MatrixDatasetGraphRenderer(zero_indexed=False, colours=cs) animator = GifAnimator(renderer=rend, frame_duration=0.2) with open(out_path, "wb") as outfile: animator.animate(outfile, algorithm, mydata)