示例#1
0
    def it_saves_and_loads_array_results():
        with tmp.tmp_folder() as folder:
            with local.cwd(folder):
                shape = (100, 87)
                arr = ArrayResult("arr.arr", dtype=np.float64, shape=shape, mode="w+")
                r = np.random.uniform(size=shape)
                arr[:] = r

                res1 = ComplexPropertyResult(foo=3, arr=arr)
                res1.save()

                pickle_file = local.path(ComplexPropertyResult.filename)
                assert (
                    pickle_file.stat().st_size < 200
                )  # The important part is that it doesn't include the array!

                arr_file = local.path("arr.arr")
                assert (
                    arr_file.stat().st_size == shape[0] * shape[1] * 8
                )  # 8 bytes for a float64

            # It should go back to a different folder
            # but the load_from_folder() should be able
            # deal with that
            assert local.cwd != folder

            res2 = ComplexPropertyResult.load_from_folder(folder)
            assert res2.foo == 3
            assert np.all(res2.arr == r)
示例#2
0
 def field_chcy_ims(self, field_i):
     if field_i not in self._cache_field_chcy_ims:
         self._cache_field_chcy_ims[field_i] = ArrayResult(
             self._field_ims_filename(field_i),
             dtype=np.dtype(self.dtype),
             shape=(self.n_channels, self.n_cycles, self.dim, self.dim),
         )
     return self._cache_field_chcy_ims[field_i].arr()
示例#3
0
 def _make_arrays(name, n_peps, n_samples):
     dyemat = ArrayResult(
         f"{name}_dyemat",
         shape=(n_peps, n_samples, sim_params.n_channels,
                sim_params.n_cycles),
         dtype=np.uint8,
         mode="w+",
     )
     radmat = ArrayResult(
         f"{name}_radmat",
         shape=(n_peps, n_samples, sim_params.n_channels,
                sim_params.n_cycles),
         dtype=np.float32,
         mode="w+",
     )
     recall = ArrayResult(
         f"{name}_recall",
         shape=(n_peps, ),
         dtype=np.float32,
         mode="w+",
     )
     return dyemat, radmat, recall
示例#4
0
 def it_returns_an_open_array_without_overwrite():
     with tmp.tmp_folder(chdir=True):
         ar = ArrayResult("test1", shape=(10, 5), dtype=np.uint8, mode="w+")
         fp = ar.arr()
         ar[:] = np.arange(10 * 5).astype(np.uint8).reshape((10, 5))
         _fp = ar.arr()
         assert _fp is fp
         ar.flush()
         assert local.path("test1").stat().st_size == 10 * 5
示例#5
0
def test_nn(test_nn_params,
            prep_result,
            sim_result,
            progress=None,
            pipeline=None):
    n_channels, n_cycles = sim_result.params.n_channels_and_cycles

    n_phases = 6 if test_nn_params.include_training_set else 3
    if pipeline is not None:
        pipeline.set_phase(0, n_phases)

    shape = sim_result.test_radmat.shape
    assert len(shape) == 4
    test_radmat = sim_result.test_radmat.reshape(
        (shape[0] * shape[1], shape[2], shape[3]))
    test_dyemat = sim_result.test_dyemat.reshape(
        (shape[0] * shape[1], shape[2], shape[3]))
    test_result = nn(
        test_nn_params,
        sim_result,
        radmat=test_radmat,
        true_dyemat=test_dyemat,
        progress=progress,
    )

    test_result.true_pep_iz = ArrayResult(
        filename="test_true_pep_iz",
        shape=(shape[0] * shape[1], ),
        dtype=IndexType,
        mode="w+",
    )
    test_result.true_pep_iz[:] = np.repeat(
        np.arange(shape[0]).astype(IndexType), shape[1])
    check.t(test_result.true_pep_iz, ArrayResult)
    check.t(test_result.pred_pep_iz, ArrayResult)

    call_bag = CallBag(
        true_pep_iz=test_result.true_pep_iz.arr(),
        pred_pep_iz=test_result.pred_pep_iz.arr(),
        scores=test_result.scores.arr(),
        prep_result=prep_result,
        sim_result=sim_result,
    )

    if pipeline is not None:
        pipeline.set_phase(1, n_phases)

    test_result.peps_pr = call_bag.pr_curve_by_pep(progress=progress)

    # If there is abundance information, compute the abundance-adjusted PR
    # This call returns None if there is no abundance info avail.
    if pipeline is not None:
        pipeline.set_phase(2, n_phases)

    test_result.peps_pr_abund = call_bag.pr_curve_by_pep_with_abundance(
        progress=progress)

    if test_nn_params.include_training_set:
        # Permit testing for over-fitting by classifying on the train data

        if pipeline is not None:
            pipeline.set_phase(3, n_phases)

        real_pep_iz = prep_result.peps__no_decoys().pep_i.values
        keep_rows = np.isin(sim_result.train_true_pep_iz, real_pep_iz)
        train_radmat = sim_result.train_radmat[keep_rows]
        train_dyemat = sim_result.train_dyemat[keep_rows]

        assert train_radmat.shape == shape

        train_result = nn(
            test_nn_params.use_gmm,
            sim_result,
            radmat=train_radmat,
            true_dyemat=train_dyemat,
            progress=progress,
        )
        train_result.true_pep_iz = sim_result.train_true_pep_iz
        train_result.true_pep_iz = ArrayResult(
            filename="train_true_pep_iz",
            shape=(shape[0] * shape[1], ),
            dtype=IndexType,
            mode="w+",
        )
        train_result.true_pep_iz[:] = np.repeat(
            np.arange(shape[0]).astype(IndexType), shape[1])
        check.t(train_result.true_pep_iz, ArrayResult)
        check.t(train_result.pred_pep_iz, ArrayResult)

        call_bag = CallBag(
            true_pep_iz=train_result.true_pep_iz.arr(),
            pred_pep_iz=train_result.pred_pep_iz.arr(),
            scores=train_result.scores.arr(),
            prep_result=prep_result,
            sim_result=sim_result,
        )

        if pipeline is not None:
            pipeline.set_phase(4, n_phases)

        train_result.peps_pr = call_bag.pr_curve_by_pep(progress=progress)

        if pipeline is not None:
            pipeline.set_phase(5, n_phases)

        train_result.peps_pr_abund = call_bag.pr_curve_by_pep_with_abundance(
            progress=progress)

    else:
        train_result = {k: None for k in test_result.keys()}

    def rename(d, prefix):
        return {f"{prefix}{k}": v for k, v in d.items()}

    return TestNNResult(
        params=test_nn_params,
        **rename(test_result, "test_"),
        **rename(train_result, "train_"),
    )
示例#6
0
 def allocate_field(self, field_i, shape, dtype):
     filename = self._field_ims_filename(field_i)
     return ArrayResult(filename, dtype, shape, mode="w+")