Пример #1
0
def main():
    """
    Creates a temporary file for the given input which is
    used to create a dataset, that is then evaluated on the given model.
    The generated summary is printed to standard out.
    """
    args, unknown_args = prepare_arg_parser().parse_known_args()
    model_file = args.model_file

    with suppress_stdout_stderr():
        model, _optimizer, vocab, _stats, cfg = train.load_model(
            model_file, unknown_args
        )

    _, filename = tempfile.mkstemp()
    try:
        with open(filename, "a") as f:
            input_ = sys.stdin.read()
            article = preprocess.parse(input_)
            print(f"{article}\tSUMMARY_STUB", file=f)

        with suppress_stdout_stderr():
            dataset = Dataset(filename, vocab, cfg)

        batch = next(dataset.generator(1, cfg.pointer))

        # don't enforce any min lengths (useful for short cmdline summaries")
        setattr(cfg, "min_summary_length", 1)
        bs = BeamSearch(model, cfg=cfg)
        summary = evaluate.batch_to_text(bs, batch)[0]
        print(f"SUMMARY:\n{summary}")
    finally:
        os.remove(filename)
def main():
    """
    Run evaluation for a model on a test file of given set size.
    The model will by default run 50 samples.

    Prints rouge variance of rouge scores and validation score to console,
    as well as the avg. set validation time.
    """
    args, unknown_args = prepare_arg_parser().parse_known_args()
    model_file = args.model_file
    test_file = args.test_file
    set_size = args.set_size
    samples = args.samples

    with suppress_stdout_stderr():
        model, _optimizer, vocab, _stats, cfg = train.load_model(
            model_file, unknown_args)

    model.eval()
    dataset = Dataset(test_file, vocab, cfg)
    r1_scores = []
    r2_scores = []
    rl_scores = []
    validation_scores = []
    times = []
    for i in range(samples):
        print("Step:", i)
        start = time.time()
        with suppress_stdout_stderr():
            scores = evaluate.evaluate(model,
                                       dataset,
                                       cfg,
                                       limit=set_size,
                                       shuffle=True)
        r1_scores.append(scores["rouge-1"]["f"] * 100)
        r2_scores.append(scores["rouge-2"]["f"] * 100)
        rl_scores.append(scores["rouge-l"]["f"] * 100)
        validation_scores.append(calc_validation_score(scores))
        times.append(time.time() - start)

    print(f"Test File: {test_file}, Set Size: {set_size}, Samples: {samples}")
    r1_v, r2_v, rl_v = pvariance(r1_scores), pvariance(r2_scores), pvariance(
        rl_scores)
    valid_v = pvariance(validation_scores)
    avg_time = mean(times)

    print(
        "r1_variance,r2_variance,rl_variance,validation_variance,avg_time_for_set"
    )
    print("%.2f,%.2f,%.2f,%.2f,%d" % (r1_v, r2_v, rl_v, valid_v, avg_time))
Пример #3
0
def continous():
    MODELDIR = "pocketsphinx/model"
    DATADIR = "pocketsphinx/test/data"

    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-hmm', os.path.join(MODELDIR, 'en-us/en-us'))
    config.set_string('-lm', os.path.join(MODELDIR, 'en-us/en-us.lm.bin'))
    config.set_string('-dict', os.path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))
     # Decode streaming data.
    try:
        with suppress_stdout_stderr():
            decoder = Decoder(config)
    except RuntimeError:
        time.sleep(1) # try waiting and trying again
        decoder = Decoder(config)

    looper = loop_decode(decoder=decoder)
    x = looper.next()
    assert(x == None)
    for y in xrange(0,10):
        x = looper.next()

        print "Guess:", x.hypothesis.hypstr
        print "Elapsed (milliseconds): ", x.elasped_time / 1000.0

        print "="*50
Пример #4
0
def loop_decode(seconds = 3, decoder = None):
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    RECORD_SECONDS = seconds
    cu = None
    try:
        while True:
            yield cu
            with suppress_stdout_stderr():
                p = pyaudio.PyAudio()
                stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
            print("* recording")
            frames = []
            n_frames = int(RATE / CHUNK * RECORD_SECONDS)
            n1 = datetime.datetime.now()
            decoder.start_utt()
            for i in range(0, n_frames):
                data = stream.read(CHUNK)
                with suppress_stdout_stderr():
                    if data:
                        decoder.process_raw(data, False, False)
                frames.append(data)
                if not i%10:
                    print i
            print("* done recording")
            decoder.end_utt()
            
            stream.stop_stream()
            stream.close()
            p.terminate()
            with suppress_stdout_stderr():
                cu = CandiateUtterance()
                cu.hypothesis = decoder.hyp()
                cu.hypothesis_segments = [seg.word for seg in decoder.seg()]
                cu.nbest = zip(range(10), decoder.nbest())
            n2 = datetime.datetime.now()
            cu.elasped_time = (n2-n1).microseconds
            
    except GeneratorExit:
        #finished
        pass
Пример #5
0
def fb_fit_predict(station_measure, df, scale, changepoints, future, verbose_level: int=500):
    station, measure = station_measure
    if verbose_level >= 2:
        print('fitting model on {} {}'.format(station, measure))
    model = Prophet(changepoint_prior_scale=scale, n_changepoints=changepoints)
    model_data = df.loc[df.stationId == station][['utc_time', measure]].copy()
    model_data.columns = ['ds', 'y']
    with suppress_stdout_stderr():
        model.fit(model_data)
    forecast = model.predict(future)
    return [station, measure, forecast['yhat'].values]
    def get_scores(self, hypothesis, references):
        """
        Get rouge scores as a dict of format:
        {"rouge-1": {"p": 0.5,
                     "r": 0.3,
                     "f": 0.4},
         "rouge-2: ...,
         "rouge-l: ...
        }

        :param references: A list of reference summaries
        :param hypothesis: A list of corresponding summaries
        :returns: A dictionary with the rouge scores
        """
        refs_split = list(map(self.split_sentences, references))
        hyps_split = list(map(self.split_sentences, hypothesis))
        if self.use_python:
            return self._get_scores_python(hyps_split, refs_split)

        with suppress_stdout_stderr():
            return self._get_scores_perl(hyps_split, refs_split)