def __init__(self): self.logger = get_logger() self._ps = Pickle_serializer() self._gd = Google_drive() self._opt = Config(open('config/demo_dist.json').read()) self._mg_data_click = Mongodb(self._opt.mongodb.data_click) self._mg_data_choice = Mongodb(self._opt.mongodb.data_choice)
def __init__(self): self.logger = get_logger() self._opt = Config(open('config/demo.json').read()) self._gd = Google_drive('token.pickle') self._ps = Pickle_serializer() self._kp = Kafka_queue_producer(self._opt.demo_app.kafka.data_center) self._reco_p_item_cluster = {} self._reco_user_id_dic = {} self._cluster = {} self._reco_p_cluster_user = {}
def __init__(self): self.logger = get_logger() self._ps = Pickle_serializer() self.creds = self._ps.load('token.pickle') while True: try: self.service = build('drive', 'v3', credentials=self.creds) break except socket.timeout: self.logger.info('Time-out... try restart...')
def test(): _ps = Pickle_serializer() data = dict() data['item_count'] = 100000 data['user_count'] = 1000000 data['cluster'] = 8 data['traffic'] = 200000 data['timing'] = 15 data['item_idx'] = [i for i in range(data['item_count'])] data['user_idx'] = [u for u in range(data['user_count'])] data['p_item_cluster'] = np.random.dirichlet( [1 for _ in range(data['cluster'])], data['item_count']).transpose() data['p_cluster_user'] = np.random.dirichlet( [1 for _ in range(data['cluster'])], data['user_count']) data['p_user'] = np.random.dirichlet( [1 for _ in range(data['user_count'])], 1) _ps.dump(data, 'test.ps')
def make_init_reco(): _ps = Pickle_serializer() data = dict() item_count = 100000 user_count = 1000000 data['user_count'] = 1000 data['cluster'] = 8 data['reco_p_cluster_user'] = np.random.dirichlet([1 for _ in range(data['cluster'])], data['user_count']) data['reco_user_id_dic'] = {v: idx for idx, v in enumerate(map(int, np.random.rand(data['user_count'])*user_count))} _ps.dump(data, 'reco_cluster_user.ps') data = dict() demo_count = 1000 data['cluster'] = 8 sample = lambda: dict(zip(map(int, np.random.rand(demo_count)*item_count), np.random.dirichlet([1 for _ in range(demo_count)]))) data['reco_p_item_cluster'] = {k: sample() for k in range(data['cluster'])} _ps.dump(data, 'reco_item_cluster.ps') _gd = Google_drive() _gd.upload(folder='demo_reco', files={'reco_cluster_user.ps': 'reco_cluster_user.ps', 'reco_item_cluster.ps': 'reco_item_cluster.ps'}, max_data=1)
def make_demo_user(): _ps = Pickle_serializer() data = dict() data['item_count'] = 100000 data['user_count'] = 1000000 data['cluster'] = 8 data['traffic'] = 200000 data['timing'] = 15 data['item_idx'] = [i for i in range(data['item_count'])] data['user_idx'] = [u for u in range(data['user_count'])] data['p_item_cluster'] = np.random.dirichlet([1 for _ in range(data['cluster'])], data['item_count']).transpose() data['p_cluster_user'] = np.random.dirichlet([1 for _ in range(data['cluster'])], data['user_count']) data['p_user'] = np.random.dirichlet([1 for _ in range(data['user_count'])], 1) _ps.dump(data, 'demo_user.ps') _gd = Google_drive() _gd.upload(folder='demo_user_data', files={'demo_user.ps': 'demo_user.ps'}, max_data=1)
class Demo_app(): def __init__(self): self.logger = get_logger() self._opt = Config(open('config/demo.json').read()) self._gd = Google_drive('token.pickle') self._ps = Pickle_serializer() self._kp = Kafka_queue_producer(self._opt.demo_app.kafka.data_center) self._reco_p_item_cluster = {} self._reco_user_id_dic = {} self._cluster = {} self._reco_p_cluster_user = {} def _data_load(self): self._opt = Config(open('config/demo.json').read()) for bucket in self._opt.demo_app.bucket.values(): self._gd.download(folder=bucket.google_drive.folder, path=bucket.google_drive.root_path) reco_item_cluster = self._ps.load( bucket.google_drive.reco_item_cluster_path) reco_cluster_user = self._ps.load( bucket.google_drive.reco_cluster_user_path) self._reco_p_item_cluster[ bucket.name] = reco_item_cluster['reco_p_item_cluster'] self._reco_user_id_dic[ bucket.name] = reco_cluster_user['reco_user_id_dic'] self._cluster[bucket.name] = reco_cluster_user['cluster'] self._reco_p_cluster_user[ bucket.name] = reco_cluster_user['reco_p_cluster_user'] self.logger.info('Update [{}-{}] bucket...'.format( bucket.name, bucket.version)) async def _task(self): self.logger.info('Start task...') while True: begin_t = time.time() # to do try: self._data_load() except Exception as e: self.logger.warning('Somthing is wrong : {}'.format(e)) sys.exit(1) # finishing sleep_t = max(0, 60 - int(time.time() - begin_t)) self.logger.info('Sleep {} secs before next start'.format(sleep_t)) await asyncio.sleep(sleep_t) def __make_reco(self, user_id, bucket): if user_id in self._reco_user_id_dic[bucket]: cluster = self._reco_p_cluster_user[bucket][ self._reco_user_id_dic[bucket][user_id]] else: cluster = [ 1. / self._cluster[bucket] for _ in range(self._cluster[bucket]) ] dic = defaultdict(float) for k in range(self._cluster[bucket]): for item, prob in self._reco_p_item_cluster[bucket][k].items(): dic[item] += prob * cluster[k] return sorted(dic.items(), key=lambda x: -x[1])[:10] def sample_bucket(self): name, prob = [], [] for bucket in self._opt.demo_app.bucket.values(): name.append(bucket.name) prob.append(bucket.ratio) return np.random.choice(name, p=prob) def _make_reco(self, massege): dic = defaultdict(float) user_list = massege['value']['user_id'] for user_id in user_list: bucket = self.sample_bucket() dic[user_id] = { 'list': self.__make_reco(user_id, bucket), 'bucket': bucket } self.logger.info('Make {} users reco list...'.format(len(dic.keys()))) return dic def _pack_dic_msg(self, val, msg_type): dic_msg = {} dic_msg['type'] = msg_type dic_msg['value'] = val dic_msg['timestamp'] = time.time() dic_msg['servive'] = 'demo_personal_reco_system' return dic_msg async def _feed(self, request, ws): self.logger.info('Start feed throw :{}:{}'.format( request.ip, request.port)) while True: message = json.loads(await ws.recv()) # to do try: if message['type'] == 'user_list': reco_user_list = self._make_reco(message) dic_msg = self._pack_dic_msg(val=reco_user_list, msg_type='reco_user_list') await ws.send(json.dumps(dic_msg)) self._kp.push(dic_msg) elif message['type'] == 'user_feedback': self._kp.push(message) except Exception as e: self.logger.warning('Somthing is wrong : {}'.format(e)) sys.exit(1) # finishing def run(self): app.add_task(self._task) app.add_websocket_route(self._feed, self._opt.demo_app.sanic.websocket_route) app.run(host=self._opt.demo_app.sanic.host, port=self._opt.demo_app.sanic.port)
from googleapiclient.discovery import build from Feynman.serialize import Pickle_serializer _ps = Pickle_serializer() def main(): creds = _ps.load('token.pickle') service = build('drive', 'v3', credentials=creds) # Call the Drive v3 API results = service.files().list( pageSize=10, fields="nextPageToken, files(id, name)").execute() items = results.get('files', []) if not items: print('No files found.') else: print('Files:') for item in items: print(u'{0} ({1})'.format(item['name'], item['id'])) if __name__ == '__main__': main()
class Google_drive(): def __init__(self): self.logger = get_logger() self._ps = Pickle_serializer() self.creds = self._ps.load('token.pickle') while True: try: self.service = build('drive', 'v3', credentials=self.creds) break except socket.timeout: self.logger.info('Time-out... try restart...') def _get_list(self): result = self.service.files().list(fields='*').execute()['files'] time.sleep(.1) return result def _get_folder_id(self, folder, rlist): r = {dic['name']: dic for dic in rlist} if folder not in r: parants_id = '1quTKA43JyrULAZ2kZxS9sQsjfnRNiG-z' body = {'name': folder, 'parents': [parants_id], 'mimeType': 'application/vnd.google-apps.folder'} r = self.service.files().create(body=body, fields='*').execute() self.logger.info('Create new folder : {}({})'.format(folder, r['id'])) time.sleep(.1) return r['id'] else: return r[folder]['id'] def _upload(self, folder, files, max_data=3): rlist = self._get_list() folder_id = self._get_folder_id(folder, rlist) rlist = [dic for dic in rlist if folder_id in dic['parents']] for name, path in files.items(): body = {'name': name, 'parents': [folder_id]} media_body = MediaFileUpload(path, resumable=True) r = self.service.files().create(body=body, media_body=media_body).execute() time.sleep(.1) self.logger.info('Upload new filee : {}({})/{}({})'.format(folder, folder_id, name, r['id'])) name_list = [dic for dic in rlist if name in dic['name']] if len(name_list) > max_data - 1: del_list = sorted(name_list, key=lambda x: x['createdTime'])[:-(max_data - 1)] for dic in del_list: self.service.files().delete(fileId=dic['id']).execute() self.logger.info('Delete old file : {}({})/{}({})'.format(folder, folder_id, name, dic['id'])) time.sleep(.1) def upload(self, folder, files, max_data=3): while True: try: self._upload(folder, files, max_data) break except socket.timeout: self.logger.info('Time-out... try restart...') def _download(self, folder, path): if not os.path.exists(path): os.makedirs(path) path = os.path.join(path, folder) if not os.path.exists(path): os.makedirs(path) rlist = self._get_list() folder_id = self._get_folder_id(folder, rlist) rlist = [dic for dic in rlist if folder_id in dic['parents']] fname = {dic['name'] for dic in rlist} for name in fname: name_list = [dic for dic in rlist if name in dic['name']] file_id = max(name_list, key=lambda x: x['createdTime'])['id'] request = self.service.files().get_media(fileId=file_id) fh = io.FileIO(os.path.join(path, name), 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() self.logger.info('Download file : {}({})'.format(os.path.join(path, name), file_id)) def download(self, folder, path): while True: try: self._download(folder, path) break except socket.timeout: self.logger.info('Time-out... try restart...')
def __init__(self): self.logger = get_logger() self._opt = Config(open('config/demo.json').read()) self._url = self._opt.demo_user.url self._gd = Google_drive('token.pickle') self._ps = Pickle_serializer()
class Demo_user(): def __init__(self): self.logger = get_logger() self._opt = Config(open('config/demo.json').read()) self._url = self._opt.demo_user.url self._gd = Google_drive('token.pickle') self._ps = Pickle_serializer() def _make_user_list(self): user_num = self._traffic / 24 / 60 / 60 * self._opt.demo_user.sleep_t user_num = np.random.poisson(user_num) u_idxs = np.random.choice(range(self._user_count), user_num, p=self._p_user[0]) return {'user_id': list(map(int, u_idxs))} def _make_user_choice(self, u_list): u_idxs = u_list['user_id'] u_ks = [ np.random.choice(range(self._cluster), p=self._p_cluster_user[u_idx]) for u_idx in u_idxs ] i_idxs = [ np.random.choice(range(self._item_count), p=self._p_item_cluster[u_k] / sum(self._p_item_cluster[u_k])) for u_k in u_ks ] return dict(zip(u_idxs, i_idxs)) def _make_user_interest(self, u_list): u_idxs = u_list['user_id'] u_max_interest = self._opt.demo_user.max_interest return { u_idx: list( np.argsort(-np.dot(self._p_cluster_user[u_idx], self._p_item_cluster))[:u_max_interest]) for u_idx in u_idxs } def _pack_dic_msg(self, val, msg_type): dic_msg = {} dic_msg['type'] = msg_type dic_msg['value'] = val dic_msg['timestamp'] = time.time() dic_msg['servive'] = 'demo_personal_reco_system' return dic_msg async def _producer(self): self.logger.info('Start producer...') while True: begin_t = time.time() # to do try: self._opt = Config(open('config/demo.json').read()) u_list = self._make_user_list() self._u_choice = self._make_user_choice(u_list) self._u_interest = self._make_user_interest(u_list) self.logger.info('demo user {} generate... '.format( len(u_list['user_id']))) dic_msg = self._pack_dic_msg(val=u_list, msg_type='user_list') await self.ws.send(json.dumps(dic_msg)) except Exception as e: self.logger.warning('Somthing is wrong : {}'.format(e)) break # finishing sleep_t = max( 0, self._opt.demo_user.sleep_t - int(time.time() - begin_t)) self.logger.info('Sleep {} secs before next start'.format(sleep_t)) await asyncio.sleep(sleep_t) def _make_user_react(self, message): result = [] reco_user_list = message['value'] pss, choice, click, unclick = 0, 0, 0, 0 for user_id in reco_user_list.keys(): stat = np.random.choice(['pass', 'choice', 'click'], p=[0.4, 0.3, 0.3]) if stat == 'pass': pss += 1 continue elif stat == 'choice' and int(user_id) in self._u_choice: tmp = { 'user_id': user_id, 'item_id': str(self._u_choice[int(user_id)]), 'bucket': reco_user_list[user_id]['bucket'], 'stat': 'choice' } choice += 1 result.append(tmp) elif stat == 'click' and int(user_id) in self._u_interest: reco_item = set(list(zip(*reco_user_list[user_id]['list']))[0]) interest_item = set(self._u_interest[int(user_id)]) candidate_item = list(reco_item.intersection(interest_item)) if candidate_item: tmp = { 'user_id': user_id, 'item_id': str(np.random.choice(candidate_item)), 'bucket': reco_user_list[user_id]['bucket'], 'stat': 'click' } click += 1 result.append(tmp) else: unclick += 1 self.logger.info( 'Make user feedback -> pass: {}, choice: {}, click: {}, unclick: {}' .format(pss, choice, click, unclick)) return result async def _consumer(self): self.logger.info('Start consumer...') while True: message = json.loads(await self.ws.recv()) # to do try: if message['type'] == 'reco_user_list': u_feedback = self._make_user_react(message) dic_msg = self._pack_dic_msg(val=u_feedback, msg_type='user_feedback') await self.ws.send(json.dumps(dic_msg)) except Exception as e: self.logger.warning('Somthing is wrong : {}'.format(e)) break # finishing def _data_load(self): self._gd.download(folder=self._opt.demo_user.google_drive.folder, path=self._opt.demo_user.google_drive.root_path) demo_user = self._ps.load(self._opt.demo_user.google_drive.data_path) self._traffic = demo_user['traffic'] self._user_count = demo_user['user_count'] self._item_count = demo_user['item_count'] self._cluster = demo_user['cluster'] self._p_user = demo_user['p_user'] self._p_cluster_user = demo_user['p_cluster_user'] self._p_item_cluster = demo_user['p_item_cluster'] self._user_idx = demo_user['user_idx'] self._item_idx = demo_user['item_idx'] self._u_choice = {} self._u_interest = {} async def _main(self): self.logger.info('Start...') while True: try: self._data_load() self.ws = await websockets.connect(self._url) await asyncio.gather(self._producer(), self._consumer()) except Exception as e: self.logger.warning('Restart... after {} secs -> {}'.format( 60, e)) await asyncio.sleep(60) continue def run(self): asyncio.run(self._main())
class Base_gc(): def __init__(self): self.logger = get_logger() self._ps = Pickle_serializer() self._gd = Google_drive() self._opt = Config(open('config/demo_dist.json').read()) self._mg_data_click = Mongodb(self._opt.mongodb.data_click) self._mg_data_choice = Mongodb(self._opt.mongodb.data_choice) def _init(self): self.data = dict() self.data['cluster'] = 1 self.data['reco_p_cluster_user'] = [[1.]] self.data['reco_user_id_dic'] = {-1: -1} self.data['reco_p_item_cluster'] = { k: { -1: 1. } for k in range(self.data['cluster']) } if not os.path.exists('cache'): os.makedirs('cache') if not os.path.exists('cache/demo_gc'): os.makedirs('cache/demo_gc') def _run(self): dic = defaultdict(int) for data in self._mg_data_click.find(): dic[int(data['item_id'])] += 1 for data in self._mg_data_choice.find(): dic[int(data['item_id'])] += 1 self.data['reco_p_item_cluster'] = { k: dic for k in range(self.data['cluster']) } self._ps.dump(self.data, 'cache/demo_gc/reco_cluster_user.ps') self._ps.dump(self.data, 'cache/demo_gc/reco_item_cluster.ps') self._gd.upload(folder='demo_gc', files={ 'reco_cluster_user.ps': 'cache/demo_gc/reco_cluster_user.ps', 'reco_item_cluster.ps': 'cache/demo_gc/reco_item_cluster.ps' }, max_data=1) def run(self): self.logger.info('Start...') self._init() while True: begin_t = time.time() # to do try: self._run() except KeyboardInterrupt: self.logger.warning('KeyboardInterrupt detect...') break # finishing sleep_t = max(0, 3600 - int(time.time() - begin_t)) self.logger.info('Sleep {} secs before next start'.format(sleep_t)) time.sleep(sleep_t)