def cal_tf(uid: str, text: str): word = tagger.parseToNode(text) # Parse words nouns = [] while word: feature_list = word.feature.split(',') prototype, vocabulary = feature_list[6], feature_list[0] if vocabulary == '名詞' and prototype != '*': nouns.append(prototype) word = word.next # Calculate tf num = len(nouns) cnt = Counter(nouns) insert = 'INSERT INTO tf (uid, word, tf) VALUES (%s, %s, %s)' values = [] for key, val in cnt.items(): tf = val / num values.append((uid, key, tf)) with GetCursor() as sub_cur: sub_cur.executemany(insert, values)
def run(self): with GetCursor() as cur: query = 'SELECT DISTINCT gameid FROM traindata_yep' cur.execute(query) res = list(zip(*cur.fetchall()))[0] itemList = list(map(str, res)) while True: if self.lock.acquire(): if list(self.svdNowStep) == [self.avgNowStep.value + 1] * self.processNum: lSvdItemFeature = [None] * self.processNum for t in range(self.processNum): lSvdItemFeature[t] = self.svdItemFeature[t] lAvgItemFeature = {} for i in itemList: lAvgItemFeature.setdefault(i, {}) for j in range(1, self.feature + 1): value = 0.0 for t in range(self.processNum): value += lSvdItemFeature[t][i][j] lAvgItemFeature[i].setdefault(j, value / float(self.processNum)) self.avgItemFeature.update(lAvgItemFeature) self.avgNowStep.value += 1 print('Process-avg: next step') if map(lambda x: x < self.stopAt, self.descentRate) == [True] * self.processNum: self.lock.release() return self.lock.release() time.sleep(1)
def erf(uname: str, k: int): with GetCursor() as cur: query = 'SELECT gameid, d1, d2, d3, d4, d5, d6 FROM raw_train_data WHERE userid = \'%s\'' % uname cur.execute(query) table = list(map(list, zip(*cur.fetchall()))) lent = len(table[0]) all_ = 0 for line in table[1:]: all_ += sum(line) group_time = all_ / k remind = [0] * lent aim = [] for line in table[1:]: new = line while sum(remind) + sum(new) > group_time * 0.999999: proportion = (group_time - sum(remind)) / sum(new) res_list = listadd(remind, listpro(new, proportion)) rate_list = listpro(res_list, 1 / group_time) aim.append(rate_list) remind = [0] * lent new = listpro(new, 1 - proportion) remind = listadd(remind, new) rating_list = list(map(list, zip(*aim))) insert = 'INSERT INTO ts_train_data VALUES (%s, %s, %s)' for i in range(lent): ratings = ' '.join(map(lambda x: str(round(x, 3)), rating_list[i])) cur.execute(insert, (uname, table[0][i], ratings))
def get_tweets(user_id: str): # https://developer.twitter.com/en/docs/tweets/timelines/api-reference/get-statuses-user_timeline url = 'https://api.twitter.com/1.1/statuses/user_timeline.json' params = {'user_id': user_id, 'count': 10} values = [] for tweet in TwitterConnector().get_json_res(url, params): values.append((tweet['id_str'], tweet['user']['name'], tweet['text'])) insert = 'INSERT INTO tweets (id, user, text) VALUES (%s, %s, %s)' with GetCursor() as cur_1: cur_1.executemany(insert, values)
def get_tweets(screen_name: str, count: int = 100): url = 'https://api.twitter.com/1.1/statuses/user_timeline.json' params = {'screen_name': screen_name, 'count': count} tweets = TwitterConnector().get_json_res(url, params) uid = tweets[0]['user']['id_str'] tweet_list = [] for tweet in tweets: tweet_list.append(tweet['text']) insert = 'INSERT INTO docs (uid, tweets) VALUES (%s, %s)' with GetCursor() as cur: cur.execute(insert, (uid, ' '.join(tweet_list)))
def load_data(self): with GetCursor() as cur: query = 'SELECT DISTINCT userid FROM traindata_yep' cur.execute(query) res = list(zip(*cur.fetchall()))[0] self.userList = list(map(str, res)) query = 'SELECT DISTINCT gameid FROM traindata_yep' cur.execute(query) res = list(zip(*cur.fetchall()))[0] self.itemList = list(map(str, res)) query = 'SELECT userid, gameid, rating FROM traindata_yep WHERE tg = %d AND rating != 0' % self.tg cur.execute(query) for item in cur: self.train.setdefault(str(item[0]), {}) self.train[str(item[0])][str(item[1])] = float(item[2])
def correct_data(self) -> (list, list): test_y = [] known = [] with GetCursor() as cur: for user in self.__user_tpl: test_list = [] query = 'SELECT gameid FROM date170709 WHERE userid = \'%s\'' % user cur.execute(query) for row in cur: if row[0] in self.__game_dict: test_list.append(row[0]) test_y.append(test_list) query = 'SELECT gameid FROM ts_train_data WHERE userid = \'%s\'' % user cur.execute(query) known.append(list(zip(*cur.fetchall()))[0]) return test_y, known
def __init__(self, user_limit: int, time_step: int = None): with GetCursor() as cur: # Get game list query = 'SELECT DISTINCT gameid FROM ts_train_data ORDER BY gameid' cur.execute(query) self.game_tpl = list(zip(*cur.fetchall()))[0] self.__game_dict = dict( (k, v) for v, k in enumerate(self.game_tpl)) self.__game_count = len(self.game_tpl) # Get user list query = 'SELECT userid, COUNT(*) AS num FROM raw_train_data GROUP BY userid ORDER BY num DESC LIMIT %d' \ % user_limit cur.execute(query) self.__user_tpl = list(zip(*cur.fetchall()))[0] self.__user_sparse_matrix = None self.__time_step = time_step
def global_data(self) -> (list, list): data = [] sign = [] with GetCursor() as cur: for user in self.__user_tpl: # Get global rating query = 'SELECT gameid, rating FROM global_train_data WHERE userid = \'%s\'' % user cur.execute(query) rating_list = [0.0] * self.__game_count sign_list = [0] * self.__game_count for row in cur: rating_list[self.__game_dict[row[0]]] = float(row[1]) sign_list[self.__game_dict[row[0]]] = 1 data.append(rating_list) sign.append(sign_list) return data, sign
def __do_parse(self): # Insert values values = [] with open('../_2_parse_page/%d.txt' % self.__file_num, 'r', encoding='utf-8') as fl: for line in fl: # Parse each word word = tagger.parseToNode(line) while word: feature_list = word.feature.split(',') genkei, hinshi = feature_list[6], feature_list[0] # Filter out supposed word if genkei not in ng_genkei and hinshi not in ng_hinshi: values.append((self.__file_num, genkei, hinshi)) word = word.next # Execute query with GetCursor() as cur: cur.executemany(insert, values)
def __require_usm(self): if not self.__user_sparse_matrix: self.__user_sparse_matrix = dict() with GetCursor() as cur: for user in self.__user_tpl: # Get ratings query = 'SELECT gameid, ratings FROM ts_train_data WHERE userid = \'%s\'' % user cur.execute(query) # [row, col, val] sparse_matrix = [[], [], []] for rating in cur: ratings = rating[1].split() # str self.__ts_count = len(ratings) for i in range(self.__ts_count): if float(ratings[i]) > 0: sparse_matrix[0].append(i) sparse_matrix[1].append( self.__game_dict[rating[0]]) sparse_matrix[2].append(float(ratings[i])) self.__user_sparse_matrix.setdefault(user, sparse_matrix)
from twconnector import TwitterConnector from dbconnector import GetCursor # https://developer.twitter.com/en/docs/accounts-and-users/follow-search-get-users/api-reference/get-friends-ids url = 'https://api.twitter.com/1.1/friends/ids.json' my_id = '736421314366312448' values = [] for friend_id in TwitterConnector().get_json_res(url)['ids']: values.append((my_id, friend_id)) insert = 'INSERT INTO followee (from_id, follow_id) VALUES (%s, %s)' with GetCursor() as cur: cur.executemany(insert, values)
def write(games: list, db: str): with GetCursor() as cur: data = Counter(games).items() query = 'INSERT INTO ' + db + ' (gameid, cnt) VALUES (%s, %s)' cur.executemany(query, data)