Exemplo n.º 1
0
def main():
    print("\n", "/" * 120, "\n")
    print(d.now())
    print("\nOUTPUTS DIRECTORY:\n{}\n".format(activity_dir))

    print("\nTST H5:\n{}\n".format(tst_h5))
    tst_ip = ip.for_callids(
        tst_h5,
        callids=tst_callids,
        data_context=dctx,
        add_channel_at_end=add_channel_dim,
        label_subcontext=lctx,
        label_from_subcontext_fn=lctx_fn,
        steps_per_chunk=steps_per_chunk,
        classkeyfn=np.argmax,  # for categorical labels
        class_subsample_to_ratios=tst_class_subsampling,
        shuffle_seed=None,  # never shuffled
        npasses=1,
        mean_it=mean_it,
        std_it=std_it,
    )

    print(
        "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}"
        .format("TST", tst_ip.totlen, tst_ip.nchunks, tst_ip.steps_per_pass,
                tst_ip.npasses))
    print("data shape: {}; label shape: {}".format(tst_ip.inputdatashape,
                                                   tst_ip.inputlabelshape))

    with hFile(priors_fp, 'r') as pf:
        init = np.array(pf['init'])
        tran = np.array(pf['tran'])
        priors = np.array(pf['priors'])

    init = init / init.sum()
    tran = nu.normalize_confusion_matrix(tran)[1]
    priors = priors / priors.sum()

    print("\n", "/" * 120, "\n")
    print("DONE SETTING PRIORS - NOW, MODEL SUMMARY")
    kmodel.summary()

    print("\n", "/" * 120, "\n")
    print("PREDICTING ON TEST")
    predict_on_inputs_provider(
        model=kmodel,
        inputs_provider=tst_ip,
        subsampling=sub,
        export_to_dir=os.path.join(activity_dir, activity_name),
        init=init,
        tran=tran,
        priors=priors,
    )

    print("\nDONE")
    print(d.now())
Exemplo n.º 2
0
def test_predP_batB_seqlQ_normconfmat(predP_batB_seqlQ_cls3_preds_confmat):
    provider = predP_batB_seqlQ_cls3_preds_confmat
    confmat, confrecall, confprecision = [
        provider[k] for k in ['confmat', 'confrecall', 'confprecision']
    ]

    print(confmat.shape)

    confprecp, confrecp = nu.normalize_confusion_matrix(confmat)
    assert_almost_equal(confprecp, confprecision)
    assert_almost_equal(confrecp, confrecall)

    confmatg, confrecallg, confprecisiong = [
        provider[k] for k in ['confmatg', 'confrecallg', 'confprecisiong']
    ]

    print("G:", confmatg.shape)

    confprecp, confrecp = nu.normalize_confusion_matrix(confmatg)
    assert_almost_equal(confprecp, confprecisiong)
    assert_almost_equal(confrecp, confrecallg)
Exemplo n.º 3
0
def predict_on_inputs_provider(  # pylint: disable=too-many-locals,too-many-statements
        model, inputs_provider, export_to, init, tran):
    def _save(paths, datas):
        with hFile(export_to, 'a') as f:
            for path, data in zip(paths, datas):
                if path not in f.keys():
                    f.create_dataset(path,
                                     data=data,
                                     compression='lzf',
                                     fletcher32=True)

            f.flush()

    currn = None
    ctrue = []
    cpred = []

    tot_conf = None
    tot_conf_vp = None
    for xy, (_, chunking) in inputs_provider.flow(
            indefinitely=False,
            only_labels=False,
            with_chunking=True,
    ):

        ctrue.append(xy[1])
        cpred.append(model.predict_on_batch(xy[0]))

        if currn is None:
            currn = chunking.labelpath
            continue

        if chunking.labelpath != currn:
            t = np.concatenate(ctrue[:-1])
            p = np.concatenate(cpred[:-1])

            if sub != 'keepzero':  # from activity_name above
                z = t[:, 0].astype(bool)
                p[z, 0] = 1.
                p[z, 1:] = 0.

            # raw confusion
            conf = nu.confusion_matrix_forcategorical(
                t, nu.to_categorical(p.argmax(axis=-1), nclasses=t.shape[-1]))

            # viterbi decoded - no scaling
            vp = lu.viterbi_smoothing(p, init, tran)
            conf_vp = nu.confusion_matrix_forcategorical(
                t, nu.to_categorical(vp, nclasses=t.shape[-1]))

            _save(
                paths=["{}/{}".format(_p, currn) for _p in ('raw', 'viterbi')],
                datas=[conf, conf_vp],
            )

            print(currn, end=' ')
            nu.print_prec_rec(*nu.normalize_confusion_matrix(conf),
                              onlydiag=True)

            if tot_conf is None:
                tot_conf = conf
                tot_conf_vp = conf_vp
            else:
                tot_conf += conf
                tot_conf_vp += conf_vp

            currn = chunking.labelpath
            ctrue = ctrue[-1:]
            cpred = cpred[-1:]

    # last chunking
    t = np.concatenate(ctrue)
    p = np.concatenate(cpred)

    if sub != 'keepzero':  # from activity_name above
        z = t[:, 0].astype(bool)
        p[z, 0] = 1.
        p[z, 1:] = 0.

    conf = nu.confusion_matrix_forcategorical(
        t, nu.to_categorical(p.argmax(axis=-1), nclasses=t.shape[-1]))

    vp = lu.viterbi_smoothing(p, init, tran)
    conf_vp = nu.confusion_matrix_forcategorical(
        t, nu.to_categorical(vp, nclasses=t.shape[-1]))

    _save(
        paths=["{}/{}".format(_p, currn) for _p in ('raw', 'viterbi')],
        datas=[conf, conf_vp],
    )

    print(currn, end=' ')
    nu.print_prec_rec(*nu.normalize_confusion_matrix(conf), onlydiag=True)

    tot_conf += conf
    tot_conf_vp += conf_vp

    # print out total-statistics
    _save(
        paths=["{}/{}".format(_p, 'final') for _p in ('raw', 'viterbi')],
        datas=[tot_conf, tot_conf_vp],
    )

    print("\nFINAL - RAW", end=' ')
    nu.print_prec_rec(*nu.normalize_confusion_matrix(tot_conf), onlydiag=False)

    print("\nFINAL - VITERBI", end=' ')
    nu.print_prec_rec(*nu.normalize_confusion_matrix(tot_conf_vp),
                      onlydiag=False)
Exemplo n.º 4
0
def predict_on_inputs_provider(model, inputs_provider, subsampling,
                               export_to_dir, init, tran, priors):
    export_to = os.path.join(export_to_dir, "confs.h5")

    def _save(paths, datas):
        with hFile(export_to, 'a') as f:
            for path, data in zip(paths, datas):
                if path not in f.keys():
                    f.create_dataset(path,
                                     data=data,
                                     compression='lzf',
                                     fletcher32=True)

            f.flush()

    currn = None
    ctrue = []
    cpred = []

    tot_conf = None
    tot_conf_vp = None
    tot_conf_svp = None
    for xy, (_, chunking) in inputs_provider.flow(
            indefinitely=False,
            only_labels=False,
            with_chunking=True,
    ):

        ctrue.append(xy[1])
        cpred.append(model.predict_on_batch(xy[0]))

        if currn is None:
            currn = chunking.labelpath
            continue

        if chunking.labelpath != currn:
            t = np.concatenate(ctrue[:-1])
            p = np.concatenate(cpred[:-1])

            if subsampling != 'nosub':
                z = t[:, 0].astype(bool)
                p[z, 0] = 1.
                p[z, 1:] = 0.

            # raw confusion
            conf = nu.confusion_matrix_forcategorical(
                t, nu.to_categorical(p.argmax(axis=-1), nclasses=t.shape[-1]))

            # viterbi decoded - no scaling
            vp = viterbi(p, init, tran, priors=None)
            conf_vp = nu.confusion_matrix_forcategorical(
                t, nu.to_categorical(vp, nclasses=t.shape[-1]))

            # viterbi decoded - scaling
            vp = viterbi(p, init, tran, priors=priors)
            conf_svp = nu.confusion_matrix_forcategorical(
                t, nu.to_categorical(vp, nclasses=t.shape[-1]))

            _save(
                paths=[
                    "{}/{}".format(_p, currn)
                    for _p in ('raw', 'viterbi', 'sviterbi')
                ],
                datas=[conf, conf_vp, conf_svp],
            )

            print(currn, end=' ')
            nu.print_prec_rec(*nu.normalize_confusion_matrix(conf),
                              onlydiag=True)

            if tot_conf is None:
                tot_conf = conf
                tot_conf_vp = conf_vp
                tot_conf_svp = conf_svp
            else:
                tot_conf += conf
                tot_conf_vp += conf_vp
                tot_conf_svp += conf_svp

            currn = chunking.labelpath
            ctrue = ctrue[-1:]
            cpred = cpred[-1:]

    _save(
        paths=[
            "{}/{}".format(_p, 'final')
            for _p in ('raw', 'viterbi', 'sviterbi')
        ],
        datas=[tot_conf, tot_conf_vp, tot_conf_svp],
    )

    print("\nFINAL - RAW", end=' ')
    nu.print_prec_rec(*nu.normalize_confusion_matrix(tot_conf), onlydiag=False)

    print("\nFINAL - VITERBI", end=' ')
    nu.print_prec_rec(*nu.normalize_confusion_matrix(tot_conf_vp),
                      onlydiag=False)

    print("\nFINAL - VITERBI - SCALED", end=' ')
    nu.print_prec_rec(*nu.normalize_confusion_matrix(tot_conf_svp),
                      onlydiag=False)