Пример #1
0
def cal_tf(uid: str, text: str):
    word = tagger.parseToNode(text)

    # Parse words
    nouns = []
    while word:
        feature_list = word.feature.split(',')
        prototype, vocabulary = feature_list[6], feature_list[0]
        if vocabulary == '名詞' and prototype != '*':
            nouns.append(prototype)
        word = word.next

    # Calculate tf
    num = len(nouns)
    cnt = Counter(nouns)

    insert = 'INSERT INTO tf (uid, word, tf) VALUES (%s, %s, %s)'

    values = []
    for key, val in cnt.items():
        tf = val / num
        values.append((uid, key, tf))

    with GetCursor() as sub_cur:
        sub_cur.executemany(insert, values)
Пример #2
0
    def run(self):
        with GetCursor() as cur:
            query = 'SELECT DISTINCT gameid FROM traindata_yep'
            cur.execute(query)
            res = list(zip(*cur.fetchall()))[0]
            itemList = list(map(str, res))

        while True:
            if self.lock.acquire():
                if list(self.svdNowStep) == [self.avgNowStep.value + 1] * self.processNum:
                    lSvdItemFeature = [None] * self.processNum
                    for t in range(self.processNum):
                        lSvdItemFeature[t] = self.svdItemFeature[t]
                    lAvgItemFeature = {}
                    for i in itemList:
                        lAvgItemFeature.setdefault(i, {})
                        for j in range(1, self.feature + 1):
                            value = 0.0
                            for t in range(self.processNum):
                                value += lSvdItemFeature[t][i][j]
                            lAvgItemFeature[i].setdefault(j, value / float(self.processNum))
                    self.avgItemFeature.update(lAvgItemFeature)
                    self.avgNowStep.value += 1
                    print('Process-avg: next step')
                if map(lambda x: x < self.stopAt, self.descentRate) == [True] * self.processNum:
                    self.lock.release()
                    return
                self.lock.release()
            time.sleep(1)
Пример #3
0
def erf(uname: str, k: int):
    with GetCursor() as cur:
        query = 'SELECT gameid, d1, d2, d3, d4, d5, d6 FROM raw_train_data WHERE userid = \'%s\'' % uname
        cur.execute(query)
        table = list(map(list, zip(*cur.fetchall())))
        lent = len(table[0])

        all_ = 0
        for line in table[1:]:
            all_ += sum(line)
        group_time = all_ / k

        remind = [0] * lent
        aim = []
        for line in table[1:]:
            new = line
            while sum(remind) + sum(new) > group_time * 0.999999:
                proportion = (group_time - sum(remind)) / sum(new)
                res_list = listadd(remind, listpro(new, proportion))
                rate_list = listpro(res_list, 1 / group_time)
                aim.append(rate_list)
                remind = [0] * lent
                new = listpro(new, 1 - proportion)
            remind = listadd(remind, new)
        rating_list = list(map(list, zip(*aim)))

        insert = 'INSERT INTO ts_train_data VALUES (%s, %s, %s)'
        for i in range(lent):
            ratings = ' '.join(map(lambda x: str(round(x, 3)), rating_list[i]))
            cur.execute(insert, (uname, table[0][i], ratings))
Пример #4
0
def get_tweets(user_id: str):
    # https://developer.twitter.com/en/docs/tweets/timelines/api-reference/get-statuses-user_timeline
    url = 'https://api.twitter.com/1.1/statuses/user_timeline.json'

    params = {'user_id': user_id, 'count': 10}

    values = []
    for tweet in TwitterConnector().get_json_res(url, params):
        values.append((tweet['id_str'], tweet['user']['name'], tweet['text']))

    insert = 'INSERT INTO tweets (id, user, text) VALUES (%s, %s, %s)'
    with GetCursor() as cur_1:
        cur_1.executemany(insert, values)
Пример #5
0
def get_tweets(screen_name: str, count: int = 100):
    url = 'https://api.twitter.com/1.1/statuses/user_timeline.json'
    params = {'screen_name': screen_name, 'count': count}

    tweets = TwitterConnector().get_json_res(url, params)

    uid = tweets[0]['user']['id_str']

    tweet_list = []
    for tweet in tweets:
        tweet_list.append(tweet['text'])

    insert = 'INSERT INTO docs (uid, tweets) VALUES (%s, %s)'
    with GetCursor() as cur:
        cur.execute(insert, (uid, ' '.join(tweet_list)))
Пример #6
0
    def load_data(self):
        with GetCursor() as cur:
            query = 'SELECT DISTINCT userid FROM traindata_yep'
            cur.execute(query)
            res = list(zip(*cur.fetchall()))[0]
            self.userList = list(map(str, res))

            query = 'SELECT DISTINCT gameid FROM traindata_yep'
            cur.execute(query)
            res = list(zip(*cur.fetchall()))[0]
            self.itemList = list(map(str, res))

            query = 'SELECT userid, gameid, rating FROM traindata_yep WHERE tg = %d AND rating != 0' % self.tg
            cur.execute(query)
            for item in cur:
                self.train.setdefault(str(item[0]), {})
                self.train[str(item[0])][str(item[1])] = float(item[2])
Пример #7
0
    def correct_data(self) -> (list, list):
        test_y = []
        known = []

        with GetCursor() as cur:
            for user in self.__user_tpl:
                test_list = []
                query = 'SELECT gameid FROM date170709 WHERE userid = \'%s\'' % user
                cur.execute(query)
                for row in cur:
                    if row[0] in self.__game_dict:
                        test_list.append(row[0])
                test_y.append(test_list)

                query = 'SELECT gameid FROM ts_train_data WHERE userid = \'%s\'' % user
                cur.execute(query)
                known.append(list(zip(*cur.fetchall()))[0])

        return test_y, known
Пример #8
0
    def __init__(self, user_limit: int, time_step: int = None):
        with GetCursor() as cur:
            # Get game list
            query = 'SELECT DISTINCT gameid FROM ts_train_data ORDER BY gameid'
            cur.execute(query)
            self.game_tpl = list(zip(*cur.fetchall()))[0]

            self.__game_dict = dict(
                (k, v) for v, k in enumerate(self.game_tpl))
            self.__game_count = len(self.game_tpl)

            # Get user list
            query = 'SELECT userid, COUNT(*) AS num FROM raw_train_data GROUP BY userid ORDER BY num DESC LIMIT %d' \
                    % user_limit
            cur.execute(query)
            self.__user_tpl = list(zip(*cur.fetchall()))[0]
            self.__user_sparse_matrix = None

            self.__time_step = time_step
Пример #9
0
    def global_data(self) -> (list, list):
        data = []
        sign = []

        with GetCursor() as cur:
            for user in self.__user_tpl:
                # Get global rating
                query = 'SELECT gameid, rating FROM global_train_data WHERE userid = \'%s\'' % user
                cur.execute(query)

                rating_list = [0.0] * self.__game_count
                sign_list = [0] * self.__game_count
                for row in cur:
                    rating_list[self.__game_dict[row[0]]] = float(row[1])
                    sign_list[self.__game_dict[row[0]]] = 1
                data.append(rating_list)
                sign.append(sign_list)

        return data, sign
Пример #10
0
    def __do_parse(self):
        # Insert values
        values = []

        with open('../_2_parse_page/%d.txt' % self.__file_num, 'r', encoding='utf-8') as fl:
            for line in fl:
                # Parse each word
                word = tagger.parseToNode(line)
                while word:
                    feature_list = word.feature.split(',')
                    genkei, hinshi = feature_list[6], feature_list[0]
                    # Filter out supposed word
                    if genkei not in ng_genkei and hinshi not in ng_hinshi:
                        values.append((self.__file_num, genkei, hinshi))
                    word = word.next

        # Execute query
        with GetCursor() as cur:
            cur.executemany(insert, values)
Пример #11
0
    def __require_usm(self):
        if not self.__user_sparse_matrix:
            self.__user_sparse_matrix = dict()

            with GetCursor() as cur:
                for user in self.__user_tpl:
                    # Get ratings
                    query = 'SELECT gameid, ratings FROM ts_train_data WHERE userid = \'%s\'' % user
                    cur.execute(query)

                    # [row, col, val]
                    sparse_matrix = [[], [], []]
                    for rating in cur:
                        ratings = rating[1].split()  # str
                        self.__ts_count = len(ratings)
                        for i in range(self.__ts_count):
                            if float(ratings[i]) > 0:
                                sparse_matrix[0].append(i)
                                sparse_matrix[1].append(
                                    self.__game_dict[rating[0]])
                                sparse_matrix[2].append(float(ratings[i]))

                    self.__user_sparse_matrix.setdefault(user, sparse_matrix)
Пример #12
0
from twconnector import TwitterConnector
from dbconnector import GetCursor

# https://developer.twitter.com/en/docs/accounts-and-users/follow-search-get-users/api-reference/get-friends-ids
url = 'https://api.twitter.com/1.1/friends/ids.json'

my_id = '736421314366312448'

values = []
for friend_id in TwitterConnector().get_json_res(url)['ids']:
    values.append((my_id, friend_id))

insert = 'INSERT INTO followee (from_id, follow_id) VALUES (%s, %s)'
with GetCursor() as cur:
    cur.executemany(insert, values)
Пример #13
0
 def write(games: list, db: str):
     with GetCursor() as cur:
         data = Counter(games).items()
         query = 'INSERT INTO ' + db + ' (gameid, cnt) VALUES (%s, %s)'
         cur.executemany(query, data)