Exemplo n.º 1
0
def significance_c1_vs_c2(config_path1: str, config_path2: str):
    config1 = loadDBReader(config_path1).config
    config2 = loadDBReader(config_path2).config

    eval_conf1 = get_best_eval_config(config_path1)
    eval_conf2 = get_best_eval_config(config_path2)

    conversations = read_conversations(config1)

    eval_conversations = sorted(conversations['eval'])

    r1 = []
    r2 = []
    for conv in tqdm([*eval_conversations
                      ]):  # , *sorted(conversations['validate'])
        for channel in ["A", "B"]:
            convid = f"{conv}-{channel}"
            _, res1 = evaluate_conv(config_path1, convid, {
                **eval_conf1, 'min_talk_len': 5
            })
            _, res2 = evaluate_conv(config_path2, convid, {
                **eval_conf2, 'min_talk_len': 5
            })
            r1.append(precision_recall(res1)['f1_score'])
            r2.append(precision_recall(res2)['f1_score'])

    print(f"r1: f1 = {meanpm(r1)}")
    print(f"r2: f1 = {meanpm(r2)}")
    print(f"differ: p = {stats.ttest_ind(r1, r2, equal_var=False).pvalue}")
Exemplo n.º 2
0
def get_word2vec(adc_path: str, sample_window_ms: int, convid: str, feat_dim: int, T="") -> Feature:
    from extract import readDB
    cp = "trainNN/out/v050-finunified-16-g1be124b-dirty:lstm-best-features-power,pitch,ffv,word2vec_dim30-slowbatch/config.json"
    model = readDB.word_to_vec(
        cp,
        dimension=feat_dim,
        T=T)
    # for dimensions
    pow = pure_get_power(adc_path, sample_window_ms, convid)
    frames, _ = pow.shape
    w2v = np.zeros((frames, feat_dim), dtype=np.float32)
    reader = readDB.loadDBReader(
        "configs/finunified/vary-features/lstm-best-features-power,ffv.json")  # exact config file is unimportant
    words = [(float(word['to']), reader.noise_filter(word['text'])) for word in
             readDB.get_all_nonsilent_words(reader, convid) if reader.noise_filter(word['text']) in model]
    inx = 0

    def inxtotime(sample_index):
        return (sample_window_ms / 2 + sample_index * pow.frame_shift_ms) / 1000

    for frame in range(frames):
        time = inxtotime(frame)
        if inx < len(words) - 1 and words[inx + 1][0] <= time:
            inx += 1
        w2v[frame] = model[words[inx][1]]
    return Feature(w2v, infofrom=pow)
Exemplo n.º 3
0
def stat(config_path: str):
    reader = loadDBReader(config_path)
    convs = read_conversations(reader.config)
    convids = [
        "{}-{}".format(conv, channel) for conv in convs['eval']
        for channel in ["A", "B"]
    ]
    total_s = 0
    mono_s = 0
    bc_count = 0
    utt_count = 0
    mono_bc_count = 0
    for convid in convids:
        utts = list(reader.get_utterances(convid))
        utt_count += len(utts)
        start = float(utts[0][1]['from'])
        end = float(utts[-1][1]['to'])
        total_s += end - start
        monosegs = list(
            get_monologuing_segments(reader, convid, min_talk_len=5))
        mono_s += sum(end - start for (start, end) in monosegs)
        bcs = list(reader.get_backchannels(utts))
        bcs = [reader.getBcRealStartTime(utt) for utt, uttInfo in bcs]
        bc_count += len(bcs)
        bcs = list(filter_ranges(bcs, monosegs))
        mono_bc_count += len(bcs)
    print(
        dict(total_s=total_s,
             mono_s=mono_s,
             bc_count=bc_count,
             utt_count=utt_count,
             mono_bc_count=mono_bc_count))
Exemplo n.º 4
0
def get_predictions(config_path: str, convid: str, eval_config: dict):
    reader = loadDBReader(config_path)
    smoothed = cached_smoothed_netout(config_path, convid,
                                      eval_config['epoch'],
                                      Hashabledict(eval_config['smoother']))
    return list(
        predict_bcs(reader, 1 - smoothed[:, [0]], eval_config['threshold'],
                    eval_config['at_start']))
def get_boring_bcs(config_path: str, convid: str):
    reader = loadDBReader(config_path)
    bcs = reader.get_backchannels(list(reader.get_utterances(convid)))
    l = []
    for (bc, bcInfo) in bcs:
        text = bcInfo['text']  # type: str
        if "[laughter" in text or "[noise" in text:
            continue
        filtered = reader.noise_filter(text).lower()
        if reader.bc_to_category[filtered] != 'neutral':
            continue
        l.append((bc, bcInfo))
    return l
Exemplo n.º 6
0
def significance_better_than_random():
    config_path = sys.argv[1]
    config = loadDBReader(config_path).config
    eval_conf = get_best_eval_config(config_path, margin=(0, 1))

    conversations = read_conversations(config)

    eval_conversations = sorted(conversations['eval'])

    nn = []
    rand = []
    for conv in tqdm(eval_conversations):
        for channel in ["A", "B"]:
            convid = f"{conv}-{channel}"
            _, res = evaluate_conv(config_path, convid, eval_conf)
            _, randres = evaluate_conv(config_path, convid, {
                **eval_conf, 'random_baseline': {}
            })
            nn.append(precision_recall(res)['f1_score'])
            rand.append(precision_recall(randres)['f1_score'])

    print(f"nn: f1 = {meanpm(nn)}")
    print(f"rand: f1 = {meanpm(rand)}")
    print(f"differ: p = {stats.ttest_ind(nn, rand, equal_var=False).pvalue}")
Exemplo n.º 7
0
def significance_better_than_mmueller():
    config_path = sys.argv[1]
    config = loadDBReader(config_path).config

    conversations = read_conversations(config)

    eval_conversations = sorted(conversations['eval'])

    def filt(res):
        [l, r] = res['config']['margin_of_error']
        return res['config']['min_talk_len'] is None and r - l < 0.41

    eval_conf = get_best_eval_config(config_path, filter=filt)
    nn = []
    for conv in tqdm(eval_conversations):
        for channel in ["A", "B"]:
            convid = f"{conv}-{channel}"
            _, res = evaluate_conv(config_path, convid, eval_conf)
            nn.append(precision_recall(res)['f1_score'])

    mmueller = 0.109
    print(
        f"ours ({np.mean(nn)}) is better than mmueller ({mmueller}) with p={stats.ttest_1samp(nn, mmueller).pvalue}"
    )
Exemplo n.º 8
0
async def sendFeature(ws, id: str, conv: str, featFull: str, micro):
    if featFull[0] != '/':
        raise Exception("featname must start with /")
    channel, category, *path = featFull.split("/")[1:]
    convid = conv + "-" + channel
    if category == "transcript":
        (featname,) = path
        reader = origReader
        if featname == "bc":
            await sendOtherFeature(ws, id,
                                   {"typ": "highlights", "data": getHighlights(reader, conv, channel)})
        elif featname == "is_talking":
            await sendOtherFeature(ws, id, dict(typ="highlights", data=list(get_talking_feature(reader, convid))))
        elif featname == "is_silent":
            await sendOtherFeature(ws, id, dict(typ="highlights", data=list(get_silent_feature(reader, convid))))
        elif featname == "is_monologuing":
            await sendOtherFeature(ws, id, dict(typ="highlights", data=list(get_monologuing_feature(reader, convid))))
        elif featname == "text":
            await sendOtherFeature(ws, id, segsToJSON(reader, convid, featFull))
        elif featname == "words":
            await sendOtherFeature(ws, id, segsToJSON(origReader, convid, featFull, words=True))
        else:
            raise Exception("unknown trans feature: " + featname)
    elif category == "NN outputs":
        if path[-1].endswith(".thres"):
            path[-1] = path[-1][:-len(".thres")]
            config_path, eval_config, feature = get_net_output(convid, path)
            feature = maybe_onedim(feature)
            onedim = feature if feature.shape[1] == 1 else 1 - feature[:, [0]]
            await sendOtherFeature(ws, id, get_larger_threshold_feature(onedim, origReader, featFull,
                                                                        threshold=eval_config['threshold']))
        elif path[-1].endswith(".bc"):
            path[-1] = path[-1][:-len(".bc")]
            config_path, eval_config, feature = get_net_output(convid, path)
            feature = maybe_onedim(feature)
            onedim = feature if feature.shape[1] == 1 else 1 - feature[:, [0]]
            predictions = evaluate.get_predictions(config_path, convid, eval_config)
            _orig_audio = origReader.features.get_adc(convid)
            import random
            st = random.choice(write_wavs.good_bc_sample_tracks)
            print(f"bcs from {st}")
            bc_audio = write_wavs.get_bc_audio(origReader, _orig_audio.size, list(
                write_wavs.bcs_to_samples(
                    readDB.loadDBReader(config_path),
                    write_wavs.get_boring_bcs(config_path, st))),
                                               predictions)
            await sendNumFeature(ws, id, conv, featFull, bc_audio)
        else:
            _, _, feature = get_net_output(convid, path)
            feature = maybe_onedim(feature)
            await sendNumFeature(ws, id, conv, featFull, feature)
    elif category == "extracted":
        featname, = path
        if channel == "microphone":
            feats = micro.features
            if featname in feats:
                return await sendNumFeature(ws, id, conv, featFull, feats[featname])
        else:
            feats = get_extracted_features(origReader)
            if featname in feats:
                featout = feats[featname](convid)
                # if featname == "pitch":
                #    featout = -featout
                return await sendNumFeature(ws, id, conv, featFull, featout)

        raise Exception("feature not found: {}".format(featFull))
    else:
        raise Exception("unknown category " + category)
Exemplo n.º 9
0
def train():
    from . import network_model, evaluate
    global reader
    global backchannels
    global config_path

    config_path = sys.argv[1]
    config = load_config(config_path)
    version = subprocess.check_output("git describe --dirty",
                                      shell=True).decode('ascii').strip()

    if config_path.startswith("trainNN/out"):
        out_dir = os.path.dirname(config_path)
        print("Continuing training from folder " + out_dir)
        load_stats = config['train_output']['stats']
        load_epoch = max([int(epoch) for epoch in load_stats.keys()])
        load_params = os.path.join(
            out_dir,
            config['train_output']['stats'][str(load_epoch)]['weights'])
        print(
            f"Continuing training from folder {out_dir}, epoch={load_epoch}, params={load_params}"
        )
        config.setdefault('train_output_old',
                          {})[load_epoch] = config['train_output']
    else:
        load_epoch = -1
        load_stats = {}
        load_params = None
        out_dir = os.path.join("trainNN", "out",
                               version + ":" + config['name'])
        if os.path.isdir(out_dir):
            print(
                "Output directory {} already exists, aborting".format(out_dir))
            sys.exit(1)
        os.makedirs(out_dir, exist_ok=True)
    LOGFILE = os.path.join(out_dir, "train.log")
    logging.root.handlers.clear()
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s %(levelname)-8s %(message)s',
        handlers=[logging.FileHandler(LOGFILE),
                  logging.StreamHandler()])

    logging.debug("version={}:{}".format(version, config['name']))
    reader = readDB.loadDBReader(config_path)
    train_config = config['train_config']
    context_stride = train_config['context_stride']
    context_frames = int(train_config['context_ms'] / 10 / context_stride)
    train_config['context_frames'] = context_frames

    gaussian = train_config['gaussian']
    out_all = {'all': True, 'single': False}[train_config['output_type']]
    if gaussian:
        raise Exception("not implemented")
        # train_data = load_numpy_file(os.path.join(dir, train_config['files']['train']))
        # validate_data = load_numpy_file(os.path.join(dir, train_config['files']['validate']))
        # train_inputs, train_outputs = train_data[:, :input_dim], train_data[:, input_dim]
        # validate_inputs, validate_outputs = validate_data[:, :input_dim], validate_data[:, input_dim]
    else:
        batchers = {}
        for t in 'train', 'validate':
            # with open(os.path.join(dir, train_config['files'][t]['ids'])) as f:
            #    meta = json.load(f)
            # groups = [slice(begin, end) for begin, end in meta['ranges']]
            # inputs = load_numpy_file(os.path.join(dir, train_config['files'][t]['input']))
            # outputs = load_numpy_file(os.path.join(dir, train_config['files'][t]['output']))
            convos = readDB.read_conversations(config)
            balance_method = train_config.get('balance_method', None)
            uttids = [
                bc for bc in readDB.all_uttids(config_path, convos[t])
                if extract(bc) is not None
            ]
            if balance_method is None:
                backchannels = list(readDB.balance_data(config_path, uttids))
            elif balance_method == "weighted":
                backchannels = list(
                    readDB.get_balanced_weights(config_path, uttids))
            else:
                raise Exception(f"unknown balance method {balance_method}")
            input_dim = extract(backchannels[0])[0].shape[1]
            logging.debug(f"set input dim to {input_dim}")
            inxtoname = {
                **{v: k
                   for k, v in reader.category_to_index.items()}, 0: None
            }

            train_config['input_dim'] = input_dim
            if config['extract_config'].get('categories', None) is not None:
                category_names = [
                    inxtoname[inx]
                    for inx in range(len(reader.categories) + 1)
                ]
                train_config['category_names'] = category_names
                train_config['num_labels'] = len(category_names)
            else:
                train_config['num_labels'] = 2
            logging.debug(f"input dim = {input_dim}")
            context_stride = train_config['context_stride']
            context_length = int(train_config['context_ms'] / 10 /
                                 context_stride)
            sequence_length = int(
                (reader.method['nbc'][1] - reader.method['nbc'][0]) * 1000 /
                10)
            inner_indices = windowed_indices(sequence_length, context_length,
                                             context_stride)
            all_elements = list(itertools.product(backchannels, inner_indices))
            batchers[t] = partial(iterate_minibatches, train_config,
                                  all_elements, out_all)
            # batchers[t] =  iterate_faster_minibatches(train_config, all_elements, out_all)
            before = time.perf_counter()
            before_cpu = time.process_time()
            logging.debug("loading data into ram")
            i = 0
            for backchannel in tqdm(backchannels):
                extract(backchannel)
            logging.debug(
                f"loading data took {time.perf_counter () - before:.3f}s (cpu: {time.process_time()-before_cpu:.3f}s)"
            )

    create_network = getattr(network_model, train_config['model_function'])
    model = create_network(train_config)
    out_layer = model['output_layer']

    resume_parameters = train_config.get('resume_parameters', None)
    finetune_config = train_config.get("finetune", None)
    if finetune_config is not None:
        import lasagne
        if load_params is not None or resume_parameters is not None:
            raise Exception("cant finetune and load")
        ft_config_path = finetune_config['config']
        epoch = finetune_config['epoch']
        which_layers = finetune_config['layers']
        ft_layers, _ = evaluate.get_network_outputter(ft_config_path,
                                                      epoch,
                                                      batch_size=250)
        layers = lasagne.layers.get_all_layers(out_layer)
        for inx, (layer_config, layer,
                  ft_layer) in enumerate(zip(which_layers, layers, ft_layers)):
            do_load = layer_config['load']
            do_freeze = layer_config['freeze']
            if do_load:
                for param, ft_param in zip(layer.get_params(),
                                           ft_layer.get_params()):
                    param.set_value(ft_param.get_value())
                logging.info(
                    f"loaded layer {inx} ({ {repr(p): p.get_value().shape for p in layer.get_params()} })"
                )
            if do_freeze:
                logging.info(f"freezing layer {inx}")
                train_func.freeze(layer)

    stats_generator = train_func.train_network(
        network=out_layer,
        twodimensional_output=False,
        scheduling_method=None,
        start_epoch=load_epoch + 1,
        resume=load_params if load_params is not None else resume_parameters,
        l2_regularization=train_config.get("l2_regularization", None),
        # scheduling_params=(0.8, 0.000001),
        update_method=train_config['update_method'],
        num_epochs=train_config['epochs'],
        learning_rate_num=train_config['learning_rate'],
        iterate_minibatches_train=batchers['train'],
        iterate_minibatches_validate=batchers['validate'],
        categorical_output=not gaussian,
        output_prefix=os.path.join(out_dir, "epoch"))
    config_out = os.path.join(out_dir, "config.json")
    for stats in stats_generator:
        for k, v in stats.items():
            v['weights'] = os.path.basename(v['weights'])
        with open(config_out, "w") as f:
            json.dump(
                {
                    **config, 'train_output': {
                        'stats': {
                            **load_stats,
                            **stats
                        },
                        'source': config_path,
                        'environment': dict(os.environ)
                    }
                },
                f,
                indent='\t')
        logging.info("Wrote output to " + config_out)
    latest_path = os.path.join("trainNN", "out", "latest")
    with contextlib.suppress(FileNotFoundError):
        os.remove(latest_path)
    os.symlink(version, latest_path)
Exemplo n.º 10
0
    "sw3715", "sw2027", "sw2849", "sw2787", "sw3357", "sw2389"
]
# assume problems are symmetric
bad_eval_convos = [track.split("-")[0] for track in bad_eval_tracks]

good_eval_tracks = []

if __name__ == '__main__':
    config_path = sys.argv[1]
    args = config_path.split("/")
    version = "None"
    if len(args) == 4:
        _, _, version, _ = args

    config = load_config(config_path)
    reader = loadDBReader(config_path)
    conversations = read_conversations(config)
    eval_conversations = sorted(conversations['eval'])
    eval_conversations = [
        convo for convo in eval_conversations if convo not in bad_eval_convos
    ]
    # valid_conversations = sorted(conversations['validate'])
    write_wavs(reader,
               eval_conversations,
               1e10,
               version,
               good_bc_sample_tracks,
               write_mono=True,
               write_nn=True,
               write_orig=False,
               write_truthrandom=True,
Exemplo n.º 11
0
def evaluate_conv_multiclass(config_path: str, convid: str, config: dict):
    # ooh boy this code be stupdid
    reader = loadDBReader(config_path)
    bc_convid = swap_speaker(convid)
    _correct_bcs = [(reader.getBcRealStartTime(utt),
                     bc_to_category(reader, uttInfo))
                    for utt, uttInfo in reader.get_backchannels(
                        list(reader.get_utterances(bc_convid)))]
    correct_bcs = [time for time, _ in _correct_bcs]
    correct_categories = [cat for _, cat in _correct_bcs]
    if 'random_baseline' in config:
        raise Exception("cant baseline multiclass")
    if 'sigma_ms' in config:
        if 'smoother' in config:
            raise Exception('conflicting options: smoother and sigma')
        smoother = {'type': 'gauss', 'sigma_ms': config['sigma_ms']}
    else:
        smoother = config['smoother']
    net_output = cached_smoothed_netout(config_path, convid, config["epoch"],
                                        Hashabledict(smoother))
    any_predictor = 1 - net_output[:, [0]]
    predicted_bcs = list(
        predict_bcs(reader,
                    any_predictor,
                    threshold=config['threshold'],
                    at_start=config['at_start']))
    predicted_count = len(predicted_bcs)
    predicted_inx = 0
    predicted_categories = [
        np.argmax(net_output[net_output.time_to_sample_index(time)][1:]) + 1
        for time in predicted_bcs
    ]
    if predicted_count > 0:
        for correct_bc in correct_bcs:
            while predicted_inx < predicted_count - 1 and nearer(
                    predicted_bcs[predicted_inx + 1],
                    predicted_bcs[predicted_inx], correct_bc):
                predicted_inx += 1
            if bc_is_within_margin_of_error(predicted_bcs[predicted_inx],
                                            correct_bc,
                                            config['margin_of_error']):
                predicted_bcs[predicted_inx] = correct_bc

    correct = {
        time: category
        for time, category in zip(correct_bcs, correct_categories)
    }
    predicted = {
        time: category
        for time, category in zip(predicted_bcs, predicted_categories)
    }

    if config['min_talk_len'] is not None:
        segs = list(
            get_monologuing_segments(reader,
                                     convid,
                                     min_talk_len=config['min_talk_len']))
        predicted_bcs = list(filter_ranges(predicted_bcs, segs))
        correct_bcs = list(filter_ranges(correct_bcs, segs))

    category_count = len(reader.categories.keys()) + 1
    confusion = np.zeros((category_count, category_count), dtype=np.int32)

    for time in correct_bcs:
        confusion[correct[time]][predicted.get(time, 0)] += 1
    for time in predicted_bcs:
        if time not in correct:
            confusion[0][predicted[time]] += 1
    # https://www.wikiwand.com/en/Precision_and_recall
    selected = set(predicted_bcs)
    relevant = set(correct_bcs)
    true_positives = selected & relevant
    false_positives = selected - relevant
    false_negatives = relevant - selected

    return convid, dict(selected=len(selected),
                        relevant=len(relevant),
                        true_positives=len(true_positives),
                        false_positives=len(false_positives),
                        false_negatives=len(false_negatives),
                        confusion_matrix=confusion)
Exemplo n.º 12
0
def evaluate_conv(config_path: str, convid: str, config: dict):
    reader = loadDBReader(config_path)
    if reader.config['extract_config'].get('categories', None) is not None:
        return evaluate_conv_multiclass(config_path, convid, config)
    bc_convid = swap_speaker(convid)
    correct_bcs = [
        reader.getBcRealStartTime(utt)
        for utt, uttInfo in reader.get_backchannels(
            list(reader.get_utterances(bc_convid)))
    ]

    if 'random_baseline' in config:
        predicted_bcs = random_predictor(reader, convid, config)
    else:
        if 'sigma_ms' in config:
            if 'smoother' in config:
                raise Exception('conflicting options: smoother and sigma')
            smoother = {
                'type': 'gauss',
                'sigma_ms': config['sigma_ms'],
                'cutoff_sigma': 2
            }
        else:
            smoother = config['smoother']
        net_output = cached_smoothed_netout(config_path, convid,
                                            config["epoch"],
                                            Hashabledict(smoother))
        net_output = 1 - net_output[:, [0]]
        predicted_bcs = list(
            predict_bcs(reader,
                        net_output,
                        threshold=config['threshold'],
                        at_start=config['at_start']))
    predicted_count = len(predicted_bcs)
    predicted_inx = 0
    if predicted_count > 0:
        for correct_bc in correct_bcs:
            while predicted_inx < predicted_count - 1 and nearer(
                    predicted_bcs[predicted_inx + 1],
                    predicted_bcs[predicted_inx], correct_bc):
                predicted_inx += 1
            if bc_is_within_margin_of_error(predicted_bcs[predicted_inx],
                                            correct_bc,
                                            config['margin_of_error']):
                predicted_bcs[predicted_inx] = correct_bc

    if config['min_talk_len'] is not None:
        segs = list(
            get_monologuing_segments(reader,
                                     convid,
                                     min_talk_len=config['min_talk_len']))
        predicted_bcs = filter_ranges(predicted_bcs, segs)
        correct_bcs = filter_ranges(correct_bcs, segs)
    # https://www.wikiwand.com/en/Precision_and_recall
    selected = set(predicted_bcs)
    relevant = set(correct_bcs)
    true_positives = selected & relevant
    false_positives = selected - relevant
    false_negatives = relevant - selected

    return convid, dict(selected=len(selected),
                        relevant=len(relevant),
                        true_positives=len(true_positives),
                        false_positives=len(false_positives),
                        false_negatives=len(false_negatives))
Exemplo n.º 13
0
def cached_smoothed_netout(config_path, convid, epoch, smoother):
    return loadDBReader(config_path).features.smooth(convid, epoch, smoother)