Пример #1
0
def phrase2vec(txt, topn):
    global phrase2vector
    if phrase2vector is None:
        from pynlpini.word2vec.word2vector import Word2Vector
        phrase2vector = Word2Vector.get_phrase_model()
    words = txt.split() | where(lambda word: word in phrase2vector.vocab.keys)
    return json.dumps(phrase2vector.most_similar(words, topn=topn), ensure_ascii=False)
Пример #2
0
def word2vec(txt, topn):
    global word2vector
    if word2vector is None:
        from pynlpini.word2vec.word2vector import Word2Vector
        word2vector = Word2Vector.get_word_model()
    words = txt.split()
    return json.dumps(word2vector.most_similar(words, topn=topn), ensure_ascii=False)
Пример #3
0
def phrase2vec(txt, topn):
    global phrase2vector
    if phrase2vector is None:
        from pynlpini.word2vec.word2vector import Word2Vector
        phrase2vector = Word2Vector.get_phrase_model()
    words = txt.split() | where(lambda word: word in phrase2vector.vocab.keys)
    return json.dumps(phrase2vector.most_similar(words, topn=topn),
                      ensure_ascii=False)
Пример #4
0
def word2vec(txt, topn):
    global word2vector
    if word2vector is None:
        from pynlpini.word2vec.word2vector import Word2Vector
        word2vector = Word2Vector.get_word_model()
    words = txt.split()
    return json.dumps(word2vector.most_similar(words, topn=topn),
                      ensure_ascii=False)
Пример #5
0
import json
import os

from flask import Flask, request, abort
from pynlpini.word2vec.word2vector import Word2Vector
from pynlpini.keyword.keyword_extractor import KeywordExtractor
from pynlpini.seg.seg_tagger import SegTagger
from pipe import *
from collections import defaultdict


app = Flask(__name__)
base_dir = os.path.dirname(__file__)

phrase2vector = Word2Vector.get_phrase_model()
phrase_vocabs = phrase2vector.vocab.keys()
keyword_extractor = KeywordExtractor(SegTagger())
tag_dict = defaultdict(set)

with open(base_dir + "/tag.csv") as tag_file:
    tmp_dict = tag_file.readlines() | select(lambda x: x.decode("utf-8").strip()) | where(
        lambda x: len(x) > 0) | select(
        lambda x: (x.split()[0].strip(), x.split()[1].strip())) | as_dict
    for k, v in tmp_dict.iteritems():
        if k in phrase_vocabs and v in phrase_vocabs:
            tag_dict[v].add(k)


@app.route('/tag', methods=['GET', 'POST'])
def tag():