Пример #1
0
 def _constructTrie(self):
     trie = Trie()
     # Written in multiple lines instead of loop to prevent dealing with hashing
     trie.insertList(
         tuple(
             map(lambda x: x.lower(), [
                 str(member)
                 for name, member in lh.SQLKeyword.__members__.items()
             ])), self.terminate_map['Keyword'])
     trie.insertList(
         tuple(
             map(lambda x: x.lower(), [
                 str(member)
                 for name, member in lh.SQLFunction.__members__.items()
             ])), self.terminate_map['Function'])
     trie.insertList(
         tuple(
             map(lambda x: x.lower(), [
                 str(member)
                 for name, member in lh.SQLDataType.__members__.items()
             ])), self.terminate_map['DataType'])
     trie.insertList(
         tuple(
             map(lambda x: x.lower(), [
                 str(member)
                 for name, member in lh.SQLOperator.__members__.items()
             ])), self.terminate_map['Operator'])
     return trie
Пример #2
0
    def __init__(self, lower=True):
        self.lower = lower

        self.trie = Trie()
        self.ent2type = {}  # word list to type
        self.ent2id = {'<UNK>': 0}  # word list to id
        self.space = ''
class LexiconBuilder(object):
    def __init__(self, tokens, value):
        self.lexicon = Trie()
        self.build(tokens, value)  # 每次都会初始化pkl

    # def _build(self):
    # 	cnt = 0
    # 	with open(self._path, 'r', encoding='utf-8') as rf:
    # 		for num, line in enumerate(rf, 1):
    # 			line = line.strip()
    # 			if line:
    # 				[token] = line.split()
    # 				self.lexicon.add(token, value)
    # 				cnt += 1

    def build(self, tokens, value):
        """

		:param tokens: word_list:['电视机', '电视', ...]
		:param value: word_onto:'tool','show_name',...
		:return:
		"""
        for token in tokens:
            self.lexicon.add(token, value)

    def show(self, num):
        return self.lexicon.show(num)

    def search(self, word):
        return self.lexicon.search(word, value_flag=True, verbose=False)
Пример #4
0
class Gazetteer:
    def __init__(self, lower):  #lower = False
        self.trie = Trie()
        self.ent2type = {}  ## word list to type
        self.ent2id = {"<UNK>": 0}  ## word list to id
        self.lower = lower
        self.space = ""

    def enumerateMatchList(self, word_list):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        match_list = self.trie.enumerateMatch(word_list, self.space)
        return match_list

    def insert(self, word_list, source):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        self.trie.insert(word_list)
        string = self.space.join(word_list)
        if string not in self.ent2type:
            self.ent2type[string] = source
        if string not in self.ent2id:
            self.ent2id[string] = len(self.ent2id)

    def searchId(self, word_list):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        string = self.space.join(word_list)
        if string in self.ent2id:
            return self.ent2id[string]
        return self.ent2id["<UNK>"]

    def searchType(self, word_list):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        string = self.space.join(word_list)
        if string in self.ent2type:
            return self.ent2type[string]
        print(
            "Error in finding entity type at gazetteer.py, exit program! String:",
            string)
        exit(0)

    def size(self):
        return len(self.ent2type)

    def clean(self):
        self.trie = Trie()
        self.ent2type = {}
        self.ent2id = {"<UNK>": 0}
        self.space = ""
Пример #5
0
class Lexicon(object):
    def __init__(self, lower=True):
        self.lower = lower

        self.trie = Trie()
        self.ent2type = {}  # word list to type
        self.ent2id = {'<UNK>': 0}  # word list to id
        self.space = ''

    def enumerate_match_list(self, word_list):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        match_list = self.trie.enumerate_match(word_list, self.space)
        return match_list

    def insert(self, word_list, source):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        self.trie.insert(word_list)
        string = self.space.join(word_list)
        if string not in self.ent2type:
            self.ent2type[string] = source
        if string not in self.ent2id:
            self.ent2id[string] = len(self.ent2id)

    def search_id(self, word_list):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        string = self.space.join(word_list)
        if string in self.ent2id:
            return self.ent2id[string]
        return self.ent2id['<UNK>']

    def search_type(self, word_list):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        string = self.space.join(word_list)
        if string in self.ent2type:
            return self.ent2type[string]
        print('Error in finding entity type at lexicon.py, exit programming')
        exit(0)

    def size(self):
        return len(self.ent2type)

    def clean(self):
        self.trie = Trie()
        self.ent2type = {}
        self.ent2id = {}
        self.space = ''
Пример #6
0
class Gazetteer:
    def __init__(self, lower):
        self.trie = Trie()
        self.ent2type = {} ## word list to type
        self.ent2id = {"<UNK>":0}   ## word list to id
        self.lower = lower
        self.space = ""

    def enumerateMatchList(self, word_list):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        match_list = self.trie.enumerateMatch(word_list, self.space)
        return match_list

    def insert(self, word_list, source):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        self.trie.insert(word_list)
        string = self.space.join(word_list)
        if string not in self.ent2type:
            self.ent2type[string] = source
        if string not in self.ent2id:
            self.ent2id[string] = len(self.ent2id)

    def searchId(self, word_list):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        string = self.space.join(word_list)
        if string in self.ent2id:
            return self.ent2id[string]
        return self.ent2id["<UNK>"]

    def searchType(self, word_list):
        if self.lower:
            word_list = [word.lower() for word in word_list]
        string = self.space.join(word_list)
        if string in self.ent2type:
            return self.ent2type[string]
        print  ("Error in finding entity type at gazetteer.py, exit program! String:", string)
        exit(0)

    def size(self):
        return len(self.ent2type)
Пример #7
0
 def __init__(self, lower):
     self.trie = Trie()
     self.ent2type = {}  ## word list to type
     self.ent2id = {"<UNK>": 0}  ## word list to id
     self.lower = lower
     self.space = ""
 def __init__(self, tokens, value):
     self.lexicon = Trie()
     self.build(tokens, value)  # 每次都会初始化pkl
Пример #9
0
 def clean(self):
     self.trie = Trie()
     self.ent2type = {}
     self.ent2id = {"<UNK>": 0}
     self.space = ""
Пример #10
0
import os
from flask_restful import Resource
from flask import request
from flask import jsonify
from utils.utils import json_reader, get_movie_by_index, build_movie_hash_table
from utils.trie import Trie
from utils.search import MovieNameStrategy, GenreStrategy, ShowingTimeStrategy, SearchMovie

parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

json_path = f'{parent_dir}/mockData/movies.json'
movies = json_reader(json_path)

trie = Trie()

Trie.build_movie_trie(trie, movies)

movie_hash_table = build_movie_hash_table(movies)


class MovieListResource(Resource):
    def get(self):
        genre = request.args.get('genre')
        name = request.args.get('name')
        showing_time = request.args.get('showing_time')

        data_obj = {'name': name, 'genre': genre, 'showing_time': showing_time}

        if name is None and genre is None and showing_time is None:
            result = movies
Пример #11
0
 def __init__(self, lower):
     self.trie = Trie()
     self.ent2type = {} ## word list to type
     self.ent2id = {"<UNK>":0}   ## word list to id
     self.lower = lower
     self.space = ""
Пример #12
0
 def clean(self):
     self.trie = Trie()
     self.ent2type = {}
     self.ent2id = {}
     self.space = ''