Exemplos de MatrixDataset em Python, exemplos de truthdiscovery.input.MatrixDataset em Python

Exemplo n.º 1

0

Exibir arquivo

    def test_create(self):
        # From a regular numpy array
        normal_arr = np.ones((2, 3))
        MatrixDataset(normal_arr)

        # From masked array
        masked_arr = ma.array(normal_arr,
                              mask=[[True, False, True], [False, True, False]])
        MatrixDataset(masked_arr)

Exemplo n.º 2

0

Exibir arquivo

 def test_export_to_csv(self):
     data = MatrixDataset(
         ma.masked_values(
             [
                 # All full row
                 [1, 2, 3, 4, 5, 6, 7, 8],
                 # Mixed row
                 [1, 2, 0, -123, 4, -2.3, 99.123, -123],
                 # All empty row
                 [-123, -123, -123, -123, -123, -123, -123, -123]
             ],
             -123))
     expected = "\n".join(("1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0",
                           "1.0,2.0,0.0,,4.0,-2.3,99.123,", ",,,,,,,"))
     assert data.to_csv() == expected

Exemplo n.º 3

0

Exibir arquivo

Arquivo: cli.py Projeto: sdimple-chf/truthdiscovery-2

 def generate_graph(self, args, parser):
     try:
         renderer = self.get_graph_renderer(args)
         dataset = MatrixDataset.from_csv(args.dataset)
     except ValueError as ex:  # pragma: no cover
         parser.error(ex)
     renderer.render(dataset, args.outfile)

Exemplo n.º 4

0

Exibir arquivo

 def test_mutual_exclusion_matrix(self):
     data = MatrixDataset(
         ma.masked_values(
             [[7, 4, 7], [5, 1, -1], [-1, 2, 4], [7, -1, 2], [-1, 1, 2]],
             -1))
     # Claims are:
     # 0: x=7
     # 1: y=4
     # 2: z=7
     # 3: x=5
     # 4: y=1
     # 5: y=2
     # 6: z=4
     # 7: z=2
     # Mutual exclusion groups are {0, 3}, {1, 4, 5}, {2, 6, 7}
     expected_mut_ex_mat = np.array([
         [1, 0, 0, 1, 0, 0, 0, 0],
         [0, 1, 0, 0, 1, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 1, 1],
         [1, 0, 0, 1, 0, 0, 0, 0],
         [0, 1, 0, 0, 1, 1, 0, 0],
         [0, 1, 0, 0, 1, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 1, 1],
         [0, 0, 1, 0, 0, 0, 1, 1],
     ])
     assert data.mut_ex.shape == expected_mut_ex_mat.shape
     assert np.array_equal(data.mut_ex.toarray(), expected_mut_ex_mat)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: timing_test.py Projeto: sdimple-chf/truthdiscovery-2

def perform_test(master_sv, data_shapes):
    """
    Perform a number of timing tests

    :param master_sv:   source-variabels matrix to extract smaller datasets
                        from
    :param data_shapes: an iterable of (num_sources, num_variables) for the
                        sizes of data to test with
    :return: an OrderedDict {alg_label: list_of_timings, ...}
    """
    timings = OrderedDict()
    for num_sources, num_vars in data_shapes:
        print("getting reduced dataset: {} sources, {} variables...".format(
            num_sources, num_vars),
              file=sys.stderr)
        data = MatrixDataset(master_sv[0:num_sources, 0:num_vars])

        for alg_label, alg in ALGORITHMS.items():
            print("  running {}...".format(alg_label), end="", file=sys.stderr)
            res = alg.run(data)
            print(" {:.3f} seconds".format(res.time_taken), file=sys.stderr)

            if alg_label not in timings:
                timings[alg_label] = []
            timings[alg_label].append(res.time_taken)
    return timings

Exemplo n.º 6

0

Exibir arquivo

Arquivo: cli.py Projeto: sdimple-chf/truthdiscovery-2

    def run_algorithm(self, args, parser):
        alg_objs = []
        all_params = dict(args.alg_params or [])
        for cls in args.alg_classes:
            params, ignored = self.get_algorithm_params(cls, all_params)
            alg_objs.append(self.get_algorithm_object(cls, params))

            if ignored:
                msg = self.get_ignored_parameters_message(cls, ignored)
                print("WARNING: {}".format(msg), file=sys.stderr)

        sup_data = None
        dataset = None
        if args.supervised:
            sup_data = SupervisedData.from_csv(args.dataset)
            dataset = sup_data.data
        else:
            # Catch error early if accuracy requested in output but dataset is
            # not supervised
            if OutputFields.ACCURACY in args.output_fields:
                parser.error("cannot calculate accuracy without --supervised")
            dataset = MatrixDataset.from_csv(args.dataset)

        output_obj = {}
        for alg in alg_objs:
            results = alg.run(dataset).filter(sources=args.sources,
                                              variables=args.variables)

            # Get results to display
            label = self.ALG_LABEL_MAPPING.inverse[alg.__class__]
            output_obj[label] = self.get_output_obj(
                results, output_fields=args.output_fields, sup_data=sup_data)
        print(yaml.dump(output_obj, indent=2, default_flow_style=False))

Exemplo n.º 7

0

Exibir arquivo

 def test_from_csv_empty_rows(self, tmpdir):
     filepath = tmpdir.join("data.csv")
     filepath.write("\n".join(["1,2,", ",,", " ,\t,", "3,4,5"]))
     data = MatrixDataset.from_csv(filepath.open())
     expected_matrix = ma.masked_values(
         [[1, 2, 0], [0, 0, 0], [0, 0, 0], [3, 4, 5]], 0)
     assert np.array_equal(data.sv.mask, expected_matrix.mask)
     assert (data.sv == expected_matrix).all()

Exemplo n.º 8

0

Exibir arquivo

    def test_truthfinder(self, data):
        it = ConvergenceIterator(DistanceMeasures.COSINE, 0.001)
        truthfinder = TruthFinder(iterator=it)

        def imp(var, val1, val2):
            diff = val1 - val2
            return np.exp(-0.5 * diff**2)

        data = MatrixDataset(data.sv, implication_function=imp)
        self.check_results(truthfinder, data, "truthfinder_results.json")

Exemplo n.º 9

0

Exibir arquivo

 def test_results(self, csv_dataset, csv_fileobj, capsys):
     self.run("run", "-a", "average_log", "-f", csv_dataset)
     got_results = yaml.safe_load(capsys.readouterr().out)
     assert "average_log" in got_results
     alg_results = got_results["average_log"]
     assert isinstance(alg_results, dict)
     exp_results = AverageLog().run(MatrixDataset.from_csv(csv_fileobj))
     assert alg_results["trust"] == exp_results.trust
     assert alg_results["belief"] == exp_results.belief
     assert alg_results["iterations"] == exp_results.iterations

Exemplo n.º 10

0

Exibir arquivo

    def test_from_csv_single_row_or_column(self, tmpdir):
        filepath1 = tmpdir.join("data1.csv")
        filepath1.write("1,,3,2,6")
        data1 = MatrixDataset.from_csv(filepath1.open())
        exp_sv1 = ma.masked_values([[1, 0, 3, 2, 6]], 0)
        assert data1.num_sources == 1
        assert data1.num_variables == 4
        assert data1.num_claims == 4
        assert np.array_equal(data1.sv.mask, exp_sv1.mask)
        assert (data1.sv == exp_sv1).all()

        filepath2 = tmpdir.join("data2.csv")
        filepath2.write("1\n\n3\n2\n6")
        data2 = MatrixDataset.from_csv(filepath2.open())
        exp_sv2 = exp_sv1.T
        assert data2.num_sources == 4
        assert data2.num_variables == 1
        assert data2.num_claims == 4
        assert np.array_equal(data2.sv.mask, exp_sv2.mask)
        assert (data2.sv == exp_sv2).all()

Exemplo n.º 11

0

Exibir arquivo

 def test_claims_matrix(self):
     data = MatrixDataset(
         ma.masked_values(
             [[7, 4, 7], [5, 1, -1], [-1, 2, 4], [7, -1, 2], [-1, 1, 2]],
             -1))
     expected_claim_mat = np.array([[1, 1, 1, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 1, 1, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 1, 1, 0],
                                    [1, 0, 0, 0, 0, 0, 0, 1],
                                    [0, 0, 0, 0, 1, 0, 0, 1]])
     assert data.sc.shape == expected_claim_mat.shape
     assert np.array_equal(data.sc.toarray(), expected_claim_mat)

Exemplo n.º 12

0

Exibir arquivo

 def test_belief_stats(self, csv_dataset, csv_fileobj, capsys):
     self.run("run", "-a", "sums", "-f", csv_dataset, "-o", "belief_stats")
     results = yaml.safe_load(capsys.readouterr().out)["sums"]
     assert set(results.keys()) == {"belief_stats"}
     exp_belief_stats = (Sums().run(
         MatrixDataset.from_csv(csv_fileobj)).get_belief_stats())
     assert results["belief_stats"] == {
         var: {
             "mean": mean,
             "stddev": stddev
         }
         for var, (mean, stddev) in exp_belief_stats.items()
     }

Exemplo n.º 13

0

Exibir arquivo

    def test_trust_invalid(self):
        """
        In theory trust scores cannot be 1 for any source. In practise scores
        get so close to 1 that they are rounded to 1, which causes problems
        when we do log(1 - trust).

        This test checks that iteration stops in this case
        """
        data = MatrixDataset(
            np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]))
        it = FixedIterator(100)
        alg = TruthFinder(iterator=it)
        res = alg.run(data)
        # Iteration should stop after only 7 iterations, instead of 100
        assert it.it_count == 7
        assert res.iterations == 7

Exemplo n.º 14

0

Exibir arquivo

    def test_custom_output(self, csv_fileobj, csv_dataset, capsys):
        self.run("run", "-a", "sums", "-f", csv_dataset, "-o", "time")
        results = yaml.safe_load(capsys.readouterr().out)["sums"]
        assert set(results.keys()) == {"time"}

        self.run("run", "-a", "sums", "-f", csv_dataset, "-o", "time",
                 "iterations")
        results = yaml.safe_load(capsys.readouterr().out)["sums"]
        assert set(results.keys()) == {"time", "iterations"}

        self.run("run", "-a", "sums", "-f", csv_dataset, "-o", "trust",
                 "trust_stats")
        results = yaml.safe_load(capsys.readouterr().out)["sums"]
        assert set(results.keys()) == {"trust", "trust_stats"}
        exp_mean, exp_stddev = (Sums().run(
            MatrixDataset.from_csv(csv_fileobj)).get_trust_stats())
        assert results["trust_stats"] == {
            "mean": exp_mean,
            "stddev": exp_stddev
        }

Exemplo n.º 15

0

Exibir arquivo

    def test_from_csv(self, tmpdir):
        filepath = tmpdir.join("data.csv")
        filepath.write("\n".join([
            "1,,3, 2,6  ",  # extra whitespace should not matter
            ", 9,0,2,5",
            "3,9,  ,,1",
            "1,9  , 5.7,3,4",
            "5,1,3,1,1",
            "\n"  # new lines at the end of file should not matter
        ]))

        data = MatrixDataset.from_csv(filepath.open())
        expected_matrix = ma.masked_values(
            [[1, 999, 3, 2, 6], [999, 9, 0, 2, 5], [3, 9, 999, 999, 1],
             [1, 9, 5.7, 3, 4], [5, 1, 3, 1, 1]], 999)
        assert data.num_sources == 5
        assert data.num_variables == 5
        assert data.num_claims == 15
        assert np.array_equal(data.sv.mask, expected_matrix.mask)
        assert (data.sv == expected_matrix).all()

Exemplo n.º 16

0

Exibir arquivo

def main():
    """
    Run an algorithm and print results
    """
    data = MatrixDataset(
        ma.masked_values(
            [[1, 4, 0, 5, 7, 0], [0, 2, 0, 4, 9, 0], [1, 0, 2, 3, 8, 4],
             [4, 0, 2, 5, 0, 0], [4, 0, 3, 3, 4, 4]], 0))

    alg = AverageLog()

    results = alg.run(data)
    print("trust scores:")
    for source, trust_val in results.trust.items():
        print("  source {}: {:.3f}".format(source, trust_val))
    print("")
    print("belief scores:")
    for var in sorted(results.belief):
        print("  variable {}:".format(var))
        beliefs = results.belief[var]
        for val in sorted(beliefs):
            print("    {}: {:.3f}".format(val, beliefs[val]))

Exemplo n.º 17

0

Exibir arquivo

    def test_matrix_renderer(self):
        buf = StringIO()
        buf.write(",5,7\n,,\n1,2,3")
        buf.seek(0)
        dataset = MatrixDataset.from_csv(buf)
        rend1 = MatrixDatasetGraphRenderer()
        rend2 = MatrixDatasetGraphRenderer(zero_indexed=False)

        rend1.render(dataset, BytesIO())
        rend2.render(dataset, BytesIO())

        assert rend1.get_source_label(0) == "s0"
        assert rend2.get_source_label(0) == "s1"
        assert rend1.get_var_label(0) == "v1"
        assert rend2.get_var_label(0) == "v2"

        # Note that source 1 (in 0-index terms) makes no claims: ID 1 should
        # therefore be source 2 (in 0-index terms)
        assert rend1.get_source_label(1) == "s2"
        assert rend2.get_source_label(1) == "s3"

        assert rend1.get_claim_label(0, 1) == "v1=7"
        assert rend2.get_claim_label(0, 1) == "v2=7"

Exemplo n.º 18

0

Exibir arquivo

    def test_get_output_obj(self, csv_fileobj):
        dataset = MatrixDataset.from_csv(csv_fileobj)
        alg = Sums(iterator=FixedIterator(5))
        # Default should be all fields if none are given, but not accuracy
        # unless supervised data given
        results = alg.run(dataset)
        out1 = BaseClient().get_output_obj(results)
        exp_keys = {
            f.value
            for f in OutputFields if f != OutputFields.ACCURACY
        }
        assert set(out1.keys()) == exp_keys

        sup_data = SupervisedData.from_csv(csv_fileobj)
        sup_results = alg.run(sup_data.data)
        out2 = BaseClient().get_output_obj(sup_results, sup_data=sup_data)
        assert set(out2.keys()) == {f.value for f in OutputFields}
        assert out2["trust"] == sup_results.trust
        assert out2["belief"] == sup_results.belief

        out3 = BaseClient().get_output_obj(results,
                                           output_fields=[OutputFields.TRUST])
        assert set(out3.keys()) == {"trust"}

Exemplo n.º 19

0

Exibir arquivo

    def test_results_diff(self, test_client):
        dataset1 = MatrixDataset(ma.masked_values([[1, 0], [0, 2]], 0))
        dataset2 = MatrixDataset(ma.masked_values([[1, 2], [0, 2]], 0))

        request_data1 = {"algorithm": "voting", "matrix": dataset1.to_csv()}
        resp1 = test_client.get("/run/", query_string=request_data1)
        assert resp1.status_code == 200

        request_data2 = {
            # Test diffs with multiple algorithms
            "algorithm": ["voting", "investment"],
            "matrix": dataset2.to_csv(),
            "previous_results": json.dumps(resp1.json["data"]["voting"])
        }
        resp2 = test_client.get("/run/", query_string=request_data2)
        assert resp2.status_code == 200
        vot_out = resp2.json["data"]["voting"]
        assert "diff" in vot_out
        assert vot_out["diff"]["trust"] == {"0": 0, "1": 0}  # no trust changes
        assert vot_out["diff"]["belief"] == {
            "0": {
                "1.0": -0.5
            },
            "1": {
                "2.0": 0
            }
        }
        inv_out = resp2.json["data"]["investment"]
        assert "diff" in inv_out
        assert inv_out["diff"]["trust"] == {"0": -0.7731216864273125, "1": 0.0}
        assert inv_out["diff"]["belief"] == {
            "0": {
                "1.0": -0.9354768146506873
            },
            "1": {
                "2.0": 0.0
            }
        }

Exemplo n.º 20

0

Exibir arquivo

    def run(self):
        """
        Run an algorithm on a user-supplied dataset. Required HTTP parameters:
        * 'algorithm'
        * 'matrix'

        Optional parameters:
        * 'parameters'
        * 'previous_results'

        Responses are JSON objects of the form
        ``{"ok": True, "data": ...}``
        or
        ``{"ok": False, "error": ...}``
        """
        alg_labels = request.args.getlist("algorithm")
        matrix_csv = request.args.get("matrix")

        if not alg_labels or not matrix_csv:
            err_msg = "'algorithm' and 'matrix' parameters are required"
            return jsonify(ok=False, error=err_msg), 400

        matrix_csv = matrix_csv.replace("_", "")
        params_str = request.args.get("parameters")
        try:
            all_params = self.get_param_dict(params_str)
            dataset = MatrixDataset.from_csv(StringIO(matrix_csv))
        except ValueError as ex:
            return jsonify(ok=False, error=str(ex)), 400

        messages = []
        all_output = {}
        for alg_label in alg_labels:
            try:
                alg_cls = self.algorithm_cls(alg_label)
                params, ignored = self.get_algorithm_params(
                    alg_cls, all_params)
                alg = self.get_algorithm_object(alg_cls, params)
            except ValueError as ex:
                return jsonify(ok=False, error=str(ex)), 400

            # Show a message for each of the ignored parameters
            if ignored:
                msg = self.get_ignored_parameters_message(alg_cls, ignored)
                messages.append(msg)

            try:
                results = alg.run(dataset)
            except ConvergenceError as ex:
                return jsonify(ok=False, error=str(ex)), 500
            except EmptyDatasetError as ex:
                return jsonify(ok=False, error=str(ex)), 400

            output = self.get_output_obj(results)

            # Construct a graph and/or animation
            output["imagery"] = {}
            cs = ResultsGradientColourScheme(results)
            renderer = self.get_graph_renderer(colours=cs)
            json_buffer = StringIO()
            renderer.render(dataset, json_buffer)
            output["imagery"]["graph"] = json_buffer.getvalue()
            # Note: can only produce animation for iterative algorithms
            if isinstance(alg, BaseIterativeAlgorithm):
                animator = JsonAnimator(renderer=self.get_graph_renderer())
                json_buffer = StringIO()
                # Note: empty data and convergence error would already have
                # been caught above, so no need to check here
                animator.animate(json_buffer,
                                 alg,
                                 dataset,
                                 show_progress=False)
                output["imagery"]["animation"] = json_buffer.getvalue()

            # Include diff between previous results if available
            prev_results = request.args.get("previous_results")
            if prev_results is not None:
                try:
                    obj = self.get_results_object(prev_results)
                except ValueError as ex:
                    err_msg = "'previous_results' is invalid: {}".format(ex)
                    return jsonify(ok=False, error=err_msg), 400

                # Previous results have been converted to JSON, which may have
                # changed numeric keys to strings: to ensure results can be
                # compared, convert the current results to and from JSON
                current_results = self.get_results_object(json.dumps(output))
                diff = ResultDiff(obj, current_results)
                output["diff"] = {
                    "trust": diff.trust,
                    "belief": diff.belief,
                    "time_taken": diff.time_taken,
                    "iterations": diff.iterations
                }

            all_output[alg_label] = output

        return jsonify({"ok": True, "data": all_output, "messages": messages})

Exemplo n.º 21

0

Exibir arquivo

 def dataset(self):
     return MatrixDataset(
         ma.masked_values(
             [[1, 2, 3, 2], [3, 0, 1, 2], [2, 2, 0, 0], [0, 1, 0, 3]], 0))

Exemplo n.º 22

0

Exibir arquivo

 def data(self):
     data_path = self.get_filepath("data.csv")
     with open(data_path) as csv_file:
         return MatrixDataset.from_csv(csv_file)

Exemplo n.º 23

0

Exibir arquivo

Arquivo: animation.py Projeto: sdimple-chf/truthdiscovery-2

        out_path = sys.argv[1]
    except IndexError:
        print("usage: {} DEST".format(sys.argv[0]), file=sys.stderr)
        sys.exit(1)

#     tuples = [
#         ("source 1", "x", 4),
#         ("source 1", "y", 7),
#         ("source 2", "y", 7),
#         ("source 2", "z", 5),
#         ("source 3", "x", 3),
#         ("source 3", "z", 5),
#         ("source 4", "x", 3),
#         ("source 4", "y", 6),
#         ("source 4", "z", 8)
#     ]
#     mydata = Dataset(tuples)

    mydata = MatrixDataset(
        ma.masked_values(
            [[1, 9, 3, 4], [2, 2, 9, 9], [9, 9, 7, 9], [1, 2, 5, 9]], 9))

    it = ConvergenceIterator(DistanceMeasures.L2, 0.001)
    algorithm = Investment(iterator=it)

    cs = ResultsGradientColourScheme(algorithm.run(mydata))
    rend = MatrixDatasetGraphRenderer(zero_indexed=False, colours=cs)
    animator = GifAnimator(renderer=rend, frame_duration=0.2)
    with open(out_path, "wb") as outfile:
        animator.animate(outfile, algorithm, mydata)

Exemplo n.º 24

0

Exibir arquivo

 def test_num_sources_variables_claims(self):
     mat = MatrixDataset(
         np.array([[1, 4, 5], [2, 0, 5], [1, 1, 5], [3, 2, 5]]))
     assert mat.num_sources == 4
     assert mat.num_variables == 3
     assert mat.num_claims == 8

Exemplo n.º 25

0

Exibir arquivo

 def test_dimension(self):
     arr = np.zeros((3, 3, 3))
     with pytest.raises(ValueError):
         MatrixDataset(arr)

Exemplo n.º 26

0

Exibir arquivo

 def test_invalid_csv_shape(self, tmpdir):
     filepath = tmpdir.join("data.csv")
     filepath.write("\n".join(["1,2,", "1,2"]))
     with pytest.raises(ValueError) as excinfo:
         MatrixDataset.from_csv(filepath.open())
     assert "Expected 3 entries in row 2, got 2" in str(excinfo.value)