示例#1
0
            # Decoding
            if parameters['crf']:
                y_preds = np.array(f_eval(*input))[1:-1]
            else:
                y_preds = f_eval(*input).argmax(axis=1)
            y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds]
            # Output tags in the IOB2 format
            if parameters['tag_scheme'] == 'iobes':
                y_preds = iobes_iob(y_preds)
            # Write tags
            assert len(y_preds) == len(words)

            if opts.outputFormat == 'json':
                f_output.write(
                    json.dumps({
                        "text": ' '.join(words),
                        "ranges": iob_ranges(y_preds)
                    }))
            else:
                f_output.write('%s\n' %
                               ' '.join('%s%s%s' % (w, opts.delimiter, y)
                                        for w, y in zip(words_ini, y_preds)))
        else:
            f_output.write('\n')
        count += 1
        if count % 100 == 0:
            print count

print '---- %i lines tagged in %.4fs ----' % (count, time.time() - start)
f_output.close()
示例#2
0
            words = line.rstrip().split()
            # Prepare input
            sentence = prepare_sentence(words, word_to_id, char_to_id,
                                        lower=parameters['lower'])
            input = create_input(sentence, parameters, False)
            # Decoding
            if parameters['crf']:
                y_preds = np.array(f_eval(*input))[1:-1]
            else:
                y_preds = f_eval(*input).argmax(axis=1)
            y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds]
            # Output tags in the IOB2 format
            #if parameters['tag_scheme'] == 'iobes':
                #y_preds = iobes_iob(y_preds)
            # Write tags
            assert len(y_preds) == len(words)
            
            if opts.outputFormat == 'json':
                f_output.write(json.dumps({ "text": ' '.join(words), "ranges": iob_ranges(y_preds) }))
            else:
                f_output.write('%s\n' % ' '.join('%s%s%s' % (w, opts.delimiter, y)
                                                 for w, y in zip(words_ini, y_preds)))
        else:
            f_output.write('\n')
        count += 1
        if count % 100 == 0:
            print count

print '---- %i lines tagged in %.4fs ----' % (count, time.time() - start)
f_output.close()
def run_tagging(model,
                f_eval,
                parameters,
                word_to_id,
                char_to_id,
                tag_to_id,
                opts_input="",
                opts_output="",
                opts_delimiter="__",
                opts_outputFormat=""):
    # Check parameters validity
    assert opts_delimiter
    assert os.path.isfile(opts_input)

    #set environment to use gpu

    f_output = codecs.open(opts_output, 'w', 'utf-8')
    start = time.time()
    logger.info('Tagging...')
    with codecs.open(opts_input, 'r', 'utf-8') as f_input:
        count = 0
        for line in f_input:
            words_ini = line.rstrip().split()
            if line:
                # Lowercase sentence
                if parameters['lower']:
                    line = line.lower()
                # Replace all digits with zeros
                if parameters['zeros']:
                    line = zero_digits(line)
                words = line.rstrip().split()
                # Prepare input
                sentence = prepare_sentence(words,
                                            word_to_id,
                                            char_to_id,
                                            lower=parameters['lower'])
                input = create_input(sentence, parameters, False)
                # Decoding
                if parameters['crf']:
                    y_preds = np.array(f_eval(*input))[1:-1]
                else:
                    y_preds = f_eval(*input).argmax(axis=1)
                y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds]
                # Output tags in the IOB2 format
                if parameters['tag_scheme'] == 'iobes':
                    y_preds = iobes_iob(y_preds)
                # Write tags
                assert len(y_preds) == len(words)

                if opts_outputFormat == 'json':
                    f_output.write(
                        json.dumps({
                            "text": ' '.join(words),
                            "ranges": iob_ranges(y_preds)
                        }))
                else:
                    #logger.info( "write out tags..."
                    f_output.write(
                        '%s\n' % ' '.join('%s%s%s' % (w, opts_delimiter, y)
                                          for w, y in zip(words_ini, y_preds)))
            else:
                f_output.write('\n')
            count += 1
            # if count % 100 == 0:
            #     logger.info( count

    logger.info('---- %i lines tagged in %.4fs ----' %
                (count, time.time() - start))
    f_output.close()
    logger.info(opts_output)
    logger.info("")
    return opts_output + " has been tagged!"


# def main():
#     logger.info( "executed"

# if __name__ == '__main__':
#     main()
示例#4
0
文件: tagger.py 项目: glample/tagger
            words = line.rstrip().split()
            # Prepare input
            sentence = prepare_sentence(words, word_to_id, char_to_id,
                                        lower=parameters['lower'])
            input = create_input(sentence, parameters, False)
            # Decoding
            if parameters['crf']:
                y_preds = np.array(f_eval(*input))[1:-1]
            else:
                y_preds = f_eval(*input).argmax(axis=1)
            y_preds = [model.id_to_tag[y_pred] for y_pred in y_preds]
            # Output tags in the IOB2 format
            if parameters['tag_scheme'] == 'iobes':
                y_preds = iobes_iob(y_preds)
            # Write tags
            assert len(y_preds) == len(words)
            
            if opts.outputFormat == 'json':
                f_output.write(json.dumps({ "text": ' '.join(words), "ranges": iob_ranges(y_preds) }))
            else:
                f_output.write('%s\n' % ' '.join('%s%s%s' % (w, opts.delimiter, y)
                                                 for w, y in zip(words_ini, y_preds)))
        else:
            f_output.write('\n')
        count += 1
        if count % 100 == 0:
            print count

print '---- %i lines tagged in %.4fs ----' % (count, time.time() - start)
f_output.close()