示例#1
0
 def __init__(self, test_item_table=None):
     if test_item_table:
         self.items = pytrie.StringTrie(**test_item_table)
     else:
         all_items_list = db.get_all_items()
         all_items_dict = {}
         for item_id, item_name in all_items_list:
             lowercase_name = item_name.lower()
             all_items_dict[lowercase_name] = item_id
         self.items = pytrie.StringTrie(**all_items_dict)
示例#2
0
 def setUp(self):
     """Set up self._trie with 111 tokens, one of them a blessed version."""
     self._trie = pytrie.StringTrie()
     self._store = EphemeralStore()
     blessed_version = BlessedVersion(MasterHandler._BLESSED_VERSION,
                                      MasterHandler._MASTER_OWNER)
     for i in range(0, 10):
         some_token = Token(blessed_version.advance_version(),
                            '/some_dir/some_token_%d' % i,
                            priority=i,
                            data='some_data_%d' % i)
         self._trie[some_token.name] = some_token
         self._store.commit_tokens(updates=[some_token])
         for j in range(0, 10):
             some_other_token = Token(
                 blessed_version.advance_version(),
                 '/some_dir/some_token_%d/some_other_token_%d' % (i, j),
                 priority=j,
                 data='some_data_%d_%d' % (i, j))
             self._trie[some_other_token.name] = some_other_token
             self._store.commit_tokens(updates=[some_other_token])
     blessed_version.advance_version()
     self._trie[MasterHandler._BLESSED_VERSION] = blessed_version
     self._store.commit_tokens(updates=[blessed_version])
     self._check_version_uniqueness()
示例#3
0
    def __init__(self):
        self.buckets = {}
        self.words = {}
        self.wilds = pytrie.StringTrie()

        dictDir = abspath(join(dirname(__file__), "../dictionaries"))
        self._load_dict("%s/LIWC2007_English100131.dic" % dictDir)
        self._load_dict("%s/tiptap.dic" % dictDir)
示例#4
0
def getStopWords():
    f = open("finalstoplist", "r")
    global stop_list
    stop_list = pytrie.StringTrie()
    l = re.split('[\s+]', f.read())
    for i in l:
        stop_list[i] = 0
    f.close()
    def init_trie(self):
        """Initialize trie with current data in geodata table"""
        rethink_conn = rethinkdb.connect(db='hotel_cosmos', host=os.environ['RETHINK_IP'], port=28015, user="******",
                                         password=os.environ['RETHINK_PASS'])
        geodata = list(rethinkdb.table('geodata').run(rethink_conn))

        for row in geodata:
            reverse_hotel_key = ''
            if row['type'] == 'hotel':
                name = re.sub(r'hotel','', row['name'].lower(), re.IGNORECASE)
                key = translate(name + ' ' + row['city'].lower())
                reverse_hotel_key = translate(row['city'].lower() + ' ' + name)
            elif row['type'] == 'zip':
                key = translate(row['name'].lower() + ' ' + row['city'].lower())
            elif row['type'] == 'city':
                key = translate(row['name'].lower() + ' ' + row['country'].lower())
            elif row['type'] == 'street':
                key = translate(row['name'].lower())

            if key not in self.d:
                # remove unneeded data
                key = re.sub('\s+', '', key)
                row.pop('id', None)
                row.pop('timeStampAdded', None)

                if row['type'] != 'zip':
                    row.pop('country', None)

                row.pop('index_country', None)
                self.d[key] = row

            if reverse_hotel_key != '':
                if reverse_hotel_key not in self.d:
                    reverse_hotel_key = re.sub('\s+', '', reverse_hotel_key)
                    # remove unneeded data
                    row.pop('id', None)
                    row.pop('timeStampAdded', None)
                    row.pop('country', None)
                    row.pop('index_country', None)
                    self.d[reverse_hotel_key] = row

        self.trie = pytrie.StringTrie(self.d)

        # delete dictionary
        self.d.clear()
        del self.d

        # close connection to rethinkdb
        rethink_conn.close()
示例#6
0
def native_load_data(path_to_data):
    """
    Load the longest version of the trie, containing most n-grams
    :param path_to_data: path to the n-gram corpus
    :return: the trie, which also gets stored on the drive
    """

    with codecs.open(path_to_data, "r", encoding='utf-8',
                     errors='ignore') as fdata:
        grams = pd.read_table(fdata, names=["freq", "first", "second"])

    grams['freq'] = grams['freq'].apply(lambda x: (x, ))
    freqs = grams['freq'].values
    phrases = grams['first'] + " " + grams['second']
    res = dict(zip(phrases, freqs))
    pytrie1 = pytrie.StringTrie(res)
    with open('pytrie.pkl', 'wb') as output:
        pickle.dump(pytrie1, output, pickle.HIGHEST_PROTOCOL)
    return pytrie1
示例#7
0
 def __init__(self):
     self.storage = pytrie.StringTrie()
     self.points_by_id = {}
示例#8
0
 def __init__(self, store):
     self._store = store
     self._trie = pytrie.StringTrie()
     self._lock = threading.Lock()
     self._load_tokens()
示例#9
0
 def trie(self):
     startTime = time.time()
     print(f'pytrie ST start')
     trie = pytrie.StringTrie(zip(self.list, list(range(len(self.list)))))
     print(f'pytrie ST time elapsed: {time.time() - startTime:.2f}s')
     return (trie)
class SearchBox:
    tr = pytrie.StringTrie()

    def __init__(self):
        try:
            server = 'fopo2ibguo.database.windows.net'
            database = 'testingdacpac'
            username = '******'
            password = '******'
            driver = 'ODBC Driver 13 for SQL Server'
            self.conn = pyodbc.connect('DRIVER=' + driver + ';SERVER=' +
                                       server + ';PORT=1443;DATABASE=' +
                                       database + ';UID=' + username +
                                       ';PWD=' + password)

        except:
            print("I am unable to connect to the database")
        self.cursor = self.conn.cursor()
        self.cursor.execute("SELECT  distinct Location from Employee")
        rows = self.cursor.fetchall()
        for row in rows:
            self.tr.__setitem__(row[0], ['Location', row[0]])

        self.cursor.execute("SELECT  * from profile")
        rows = self.cursor.fetchall()
        for row in rows:
            self.tr.__setitem__(row[1], ['ProfileId', row[0]])
            for alias in row[2].split(','):
                self.tr.__setitem__(alias, ['ProfileId', row[0]])

        self.cursor.execute("SELECT  * from skillset")
        rows = self.cursor.fetchall()
        for row in rows:
            try:
                self.tr.__setitem__(row[1], ['SkillId', row[0]])
            except:
                pass

    def contains(self, string):
        try:
            return self.tr.__getitem__(string)
        except:
            return []

    def listToString(self, fieldName, listOfString):
        listString = ""
        i = 0
        listLength = len(listOfString)
        print("LENGTH OF ", listLength)
        while i < listLength:

            # for item in listOfString:
            if listString == "":
                listString = " ( " + fieldName + " = " + str(listOfString[i])
            else:
                listString += " or " + fieldName + " = " + str(listOfString[i])
            i += 1
        return listString + " )"

    def rowToList(self, rowList):
        rowString = " in ("
        for row in rowList:
            rowString += str(row[0]) + ","
        return rowString[:-1] + ")"

    def search(self, searchString):
        #searchString = searchString.lower()
        startTime = time.time()

        token = searchString.split(' ')
        expr = 0
        queryString = ""
        tokenLength = len(token)
        i = 0
        type = []
        fieldStorage = {None: [None]}
        fieldNameStorage = {None: None}

        i = 0
        prevField = None
        while i < tokenLength:

            type = self.contains(token[i])

            if (len(type) == 0 and i < tokenLength - 1):
                type = self.contains(token[i] + " " + token[i + 1])
                if (len(type) == 0 and i < tokenLength - 2):
                    type = self.contains(token[i] + " " + token[i + 1] + " " +
                                         token[i + 2])

            if len(type) > 0:
                print(token[i], type[0], "'" + type[0] + "'", type[1])
                fieldNameStorage["'" + type[0] + "'"] = type[1]
                try:

                    if type[0] == "experience":
                        j = 1
                        flag = True
                        while (i + j < tokenLength or i - j > 0) and flag:
                            try:

                                expr = int(token[i + j])
                                flag = False

                            except:
                                pass
                            if flag == True:
                                try:
                                    expr = int(token[i - j])
                                    flag = False
                                except:
                                    pass
                            j += 1
                        try:
                            print(expr)

                            fieldStorage["'" + type[0] + "'"].append(expr)
                        except:
                            fieldStorage["'" + type[0] + "'"] = [
                                expr,
                            ]
                    else:
                        if type[0] in fieldStorage:
                            fieldStorage[type[0]].append(type[1])
                            prevField = type[0]
                        else:
                            fieldStorage[type[0]] = [
                                type[1],
                            ]
                            prevField = type[0]
                except:
                    pass
            i += 1
        fieldFlag = False
        for item in fieldStorage:

            if item != None:
                if fieldFlag == False:
                    queryString += " select * from Employee_View where " + self.listToString(
                        item, fieldStorage[item])
                    fieldFlag = True
                else:
                    queryString += " and " + self.listToString(
                        item, fieldStorage[item])

        print(queryString)

        self.cursor.execute(queryString)
        rows = self.cursor.fetchall()

        for row in rows:
            print(row)
            i += 1
            if i == 40:
                i = 0
                input()
示例#11
0
#!/usr/bin/env python3

import re

import dateutil.parser
import pytrie

import db

IS_SELLING_TRIE = pytrie.StringTrie(wts=True,
                                    selling=True,
                                    wtb=False,
                                    buying=False)
# TODO: support things like WTS CoS 10 k (space between number and k)
PRICE_REGEX = re.compile(r'^(\d*\.?\d*)(k|p|pp)?$')
USELESS_PUNCTUATION_REGEX = re.compile(r'^[^\d\w]*(.*?)$')
SPLIT_REGEX = re.compile(r"^\[[^ ]+ ([^]]+)] ([^ ]+) auctions, '(.+)'$")
DIGIT_REGEX = re.compile(r'\d')

DEBUG = False


def debug_print(message):
    if DEBUG:
        print(message)


def split_line(line):
    """Parses text and returns a timestamp, character, and message."""
    # Lines like: [Sun Jan 01 13:45:35 2017] Toon auctions, 'WTS Ale'
    match = SPLIT_REGEX.match(line)