def __init__(self):
     self.logger = get_logger()
     self._ps = Pickle_serializer()
     self._gd = Google_drive()
     self._opt = Config(open('config/demo_dist.json').read())
     self._mg_data_click = Mongodb(self._opt.mongodb.data_click)
     self._mg_data_choice = Mongodb(self._opt.mongodb.data_choice)
示例#2
0
    def __init__(self):
        self.logger = get_logger()
        self._opt = Config(open('config/demo.json').read())
        self._gd = Google_drive('token.pickle')
        self._ps = Pickle_serializer()
        self._kp = Kafka_queue_producer(self._opt.demo_app.kafka.data_center)

        self._reco_p_item_cluster = {}
        self._reco_user_id_dic = {}
        self._cluster = {}
        self._reco_p_cluster_user = {}
示例#3
0
    def __init__(self):
        self.logger = get_logger()
        self._ps = Pickle_serializer()
        self.creds = self._ps.load('token.pickle')

        while True:
            try:
                self.service = build('drive', 'v3', credentials=self.creds)
                break
            except socket.timeout:
                self.logger.info('Time-out... try restart...')
def test():

    _ps = Pickle_serializer()
    data = dict()
    data['item_count'] = 100000
    data['user_count'] = 1000000
    data['cluster'] = 8
    data['traffic'] = 200000
    data['timing'] = 15
    data['item_idx'] = [i for i in range(data['item_count'])]
    data['user_idx'] = [u for u in range(data['user_count'])]

    data['p_item_cluster'] = np.random.dirichlet(
        [1 for _ in range(data['cluster'])], data['item_count']).transpose()
    data['p_cluster_user'] = np.random.dirichlet(
        [1 for _ in range(data['cluster'])], data['user_count'])
    data['p_user'] = np.random.dirichlet(
        [1 for _ in range(data['user_count'])], 1)

    _ps.dump(data, 'test.ps')
def make_init_reco():

    _ps = Pickle_serializer()
    data = dict()
    item_count = 100000
    user_count = 1000000
    data['user_count'] = 1000
    data['cluster'] = 8
    data['reco_p_cluster_user'] = np.random.dirichlet([1 for _ in range(data['cluster'])], data['user_count'])
    data['reco_user_id_dic'] = {v: idx for idx, v in enumerate(map(int, np.random.rand(data['user_count'])*user_count))}

    _ps.dump(data, 'reco_cluster_user.ps')

    data = dict()
    demo_count = 1000
    data['cluster'] = 8

    sample = lambda: dict(zip(map(int, np.random.rand(demo_count)*item_count),
                              np.random.dirichlet([1 for _ in range(demo_count)])))
    data['reco_p_item_cluster'] = {k: sample() for k in range(data['cluster'])}

    _ps.dump(data, 'reco_item_cluster.ps')

    _gd = Google_drive()
    _gd.upload(folder='demo_reco',
               files={'reco_cluster_user.ps': 'reco_cluster_user.ps',
                      'reco_item_cluster.ps': 'reco_item_cluster.ps'},
               max_data=1)
def make_demo_user():

    _ps = Pickle_serializer()
    data = dict()
    data['item_count'] = 100000
    data['user_count'] = 1000000
    data['cluster'] = 8
    data['traffic'] = 200000
    data['timing'] = 15
    data['item_idx'] = [i for i in range(data['item_count'])]
    data['user_idx'] = [u for u in range(data['user_count'])]

    data['p_item_cluster'] = np.random.dirichlet([1 for _ in range(data['cluster'])], data['item_count']).transpose()
    data['p_cluster_user'] = np.random.dirichlet([1 for _ in range(data['cluster'])], data['user_count'])
    data['p_user'] = np.random.dirichlet([1 for _ in range(data['user_count'])], 1)

    _ps.dump(data, 'demo_user.ps')

    _gd = Google_drive()
    _gd.upload(folder='demo_user_data',
               files={'demo_user.ps': 'demo_user.ps'},
               max_data=1)
示例#7
0
class Demo_app():
    def __init__(self):
        self.logger = get_logger()
        self._opt = Config(open('config/demo.json').read())
        self._gd = Google_drive('token.pickle')
        self._ps = Pickle_serializer()
        self._kp = Kafka_queue_producer(self._opt.demo_app.kafka.data_center)

        self._reco_p_item_cluster = {}
        self._reco_user_id_dic = {}
        self._cluster = {}
        self._reco_p_cluster_user = {}

    def _data_load(self):
        self._opt = Config(open('config/demo.json').read())
        for bucket in self._opt.demo_app.bucket.values():
            self._gd.download(folder=bucket.google_drive.folder,
                              path=bucket.google_drive.root_path)

            reco_item_cluster = self._ps.load(
                bucket.google_drive.reco_item_cluster_path)
            reco_cluster_user = self._ps.load(
                bucket.google_drive.reco_cluster_user_path)

            self._reco_p_item_cluster[
                bucket.name] = reco_item_cluster['reco_p_item_cluster']
            self._reco_user_id_dic[
                bucket.name] = reco_cluster_user['reco_user_id_dic']
            self._cluster[bucket.name] = reco_cluster_user['cluster']
            self._reco_p_cluster_user[
                bucket.name] = reco_cluster_user['reco_p_cluster_user']
            self.logger.info('Update [{}-{}] bucket...'.format(
                bucket.name, bucket.version))

    async def _task(self):
        self.logger.info('Start task...')
        while True:
            begin_t = time.time()
            # to do
            try:
                self._data_load()
            except Exception as e:
                self.logger.warning('Somthing is wrong : {}'.format(e))
                sys.exit(1)
            # finishing
            sleep_t = max(0, 60 - int(time.time() - begin_t))
            self.logger.info('Sleep {} secs before next start'.format(sleep_t))
            await asyncio.sleep(sleep_t)

    def __make_reco(self, user_id, bucket):
        if user_id in self._reco_user_id_dic[bucket]:
            cluster = self._reco_p_cluster_user[bucket][
                self._reco_user_id_dic[bucket][user_id]]
        else:
            cluster = [
                1. / self._cluster[bucket]
                for _ in range(self._cluster[bucket])
            ]

        dic = defaultdict(float)
        for k in range(self._cluster[bucket]):
            for item, prob in self._reco_p_item_cluster[bucket][k].items():
                dic[item] += prob * cluster[k]
        return sorted(dic.items(), key=lambda x: -x[1])[:10]

    def sample_bucket(self):
        name, prob = [], []
        for bucket in self._opt.demo_app.bucket.values():
            name.append(bucket.name)
            prob.append(bucket.ratio)
        return np.random.choice(name, p=prob)

    def _make_reco(self, massege):
        dic = defaultdict(float)
        user_list = massege['value']['user_id']
        for user_id in user_list:
            bucket = self.sample_bucket()
            dic[user_id] = {
                'list': self.__make_reco(user_id, bucket),
                'bucket': bucket
            }
        self.logger.info('Make {} users reco list...'.format(len(dic.keys())))
        return dic

    def _pack_dic_msg(self, val, msg_type):
        dic_msg = {}
        dic_msg['type'] = msg_type
        dic_msg['value'] = val
        dic_msg['timestamp'] = time.time()
        dic_msg['servive'] = 'demo_personal_reco_system'
        return dic_msg

    async def _feed(self, request, ws):
        self.logger.info('Start feed throw :{}:{}'.format(
            request.ip, request.port))
        while True:
            message = json.loads(await ws.recv())
            # to do
            try:
                if message['type'] == 'user_list':
                    reco_user_list = self._make_reco(message)
                    dic_msg = self._pack_dic_msg(val=reco_user_list,
                                                 msg_type='reco_user_list')
                    await ws.send(json.dumps(dic_msg))
                    self._kp.push(dic_msg)
                elif message['type'] == 'user_feedback':
                    self._kp.push(message)
            except Exception as e:
                self.logger.warning('Somthing is wrong : {}'.format(e))
                sys.exit(1)
            # finishing

    def run(self):
        app.add_task(self._task)
        app.add_websocket_route(self._feed,
                                self._opt.demo_app.sanic.websocket_route)
        app.run(host=self._opt.demo_app.sanic.host,
                port=self._opt.demo_app.sanic.port)
from googleapiclient.discovery import build

from Feynman.serialize import Pickle_serializer


_ps = Pickle_serializer()


def main():

    creds = _ps.load('token.pickle')
    service = build('drive', 'v3', credentials=creds)

    # Call the Drive v3 API
    results = service.files().list(
            pageSize=10, fields="nextPageToken, files(id, name)").execute()

    items = results.get('files', [])

    if not items:
        print('No files found.')
    else:
        print('Files:')
        for item in items:
            print(u'{0} ({1})'.format(item['name'], item['id']))


if __name__ == '__main__':
    main()
示例#9
0
class Google_drive():
    def __init__(self):
        self.logger = get_logger()
        self._ps = Pickle_serializer()
        self.creds = self._ps.load('token.pickle')

        while True:
            try:
                self.service = build('drive', 'v3', credentials=self.creds)
                break
            except socket.timeout:
                self.logger.info('Time-out... try restart...')

    def _get_list(self):
        result = self.service.files().list(fields='*').execute()['files']
        time.sleep(.1)
        return result

    def _get_folder_id(self, folder, rlist):
        r = {dic['name']: dic for dic in rlist}
        if folder not in r:
            parants_id = '1quTKA43JyrULAZ2kZxS9sQsjfnRNiG-z'
            body = {'name': folder,
                    'parents': [parants_id],
                    'mimeType': 'application/vnd.google-apps.folder'}
            r = self.service.files().create(body=body, fields='*').execute()
            self.logger.info('Create new folder : {}({})'.format(folder, r['id']))
            time.sleep(.1)
            return r['id']
        else:
            return r[folder]['id']

    def _upload(self, folder, files, max_data=3):
        rlist = self._get_list()
        folder_id = self._get_folder_id(folder, rlist)

        rlist = [dic for dic in rlist if folder_id in dic['parents']]

        for name, path in files.items():
            body = {'name': name,
                    'parents': [folder_id]}
            media_body = MediaFileUpload(path,
                                         resumable=True)
            r = self.service.files().create(body=body, media_body=media_body).execute()
            time.sleep(.1)
            self.logger.info('Upload new filee : {}({})/{}({})'.format(folder, folder_id, name, r['id']))

            name_list = [dic for dic in rlist if name in dic['name']]
            if len(name_list) > max_data - 1:
                del_list = sorted(name_list, key=lambda x: x['createdTime'])[:-(max_data - 1)]
                for dic in del_list:
                    self.service.files().delete(fileId=dic['id']).execute()
                    self.logger.info('Delete old file : {}({})/{}({})'.format(folder, folder_id, name, dic['id']))
                    time.sleep(.1)

    def upload(self, folder, files, max_data=3):
        while True:
            try:
                self._upload(folder, files, max_data)
                break
            except socket.timeout:
                self.logger.info('Time-out... try restart...')

    def _download(self, folder, path):
        if not os.path.exists(path):
            os.makedirs(path)
        path = os.path.join(path, folder)
        if not os.path.exists(path):
            os.makedirs(path)

        rlist = self._get_list()
        folder_id = self._get_folder_id(folder, rlist)

        rlist = [dic for dic in rlist if folder_id in dic['parents']]

        fname = {dic['name'] for dic in rlist}
        for name in fname:
            name_list = [dic for dic in rlist if name in dic['name']]
            file_id = max(name_list, key=lambda x: x['createdTime'])['id']

            request = self.service.files().get_media(fileId=file_id)
            fh = io.FileIO(os.path.join(path, name), 'wb')
            downloader = MediaIoBaseDownload(fh, request)
            done = False
            while done is False:
                status, done = downloader.next_chunk()
            self.logger.info('Download file : {}({})'.format(os.path.join(path, name), file_id))

    def download(self, folder, path):
        while True:
            try:
                self._download(folder, path)
                break
            except socket.timeout:
                self.logger.info('Time-out... try restart...')
示例#10
0
 def __init__(self):
     self.logger = get_logger()
     self._opt = Config(open('config/demo.json').read())
     self._url = self._opt.demo_user.url
     self._gd = Google_drive('token.pickle')
     self._ps = Pickle_serializer()
示例#11
0
class Demo_user():
    def __init__(self):
        self.logger = get_logger()
        self._opt = Config(open('config/demo.json').read())
        self._url = self._opt.demo_user.url
        self._gd = Google_drive('token.pickle')
        self._ps = Pickle_serializer()

    def _make_user_list(self):
        user_num = self._traffic / 24 / 60 / 60 * self._opt.demo_user.sleep_t
        user_num = np.random.poisson(user_num)
        u_idxs = np.random.choice(range(self._user_count),
                                  user_num,
                                  p=self._p_user[0])
        return {'user_id': list(map(int, u_idxs))}

    def _make_user_choice(self, u_list):
        u_idxs = u_list['user_id']
        u_ks = [
            np.random.choice(range(self._cluster),
                             p=self._p_cluster_user[u_idx]) for u_idx in u_idxs
        ]
        i_idxs = [
            np.random.choice(range(self._item_count),
                             p=self._p_item_cluster[u_k] /
                             sum(self._p_item_cluster[u_k])) for u_k in u_ks
        ]
        return dict(zip(u_idxs, i_idxs))

    def _make_user_interest(self, u_list):
        u_idxs = u_list['user_id']
        u_max_interest = self._opt.demo_user.max_interest
        return {
            u_idx: list(
                np.argsort(-np.dot(self._p_cluster_user[u_idx],
                                   self._p_item_cluster))[:u_max_interest])
            for u_idx in u_idxs
        }

    def _pack_dic_msg(self, val, msg_type):
        dic_msg = {}
        dic_msg['type'] = msg_type
        dic_msg['value'] = val
        dic_msg['timestamp'] = time.time()
        dic_msg['servive'] = 'demo_personal_reco_system'
        return dic_msg

    async def _producer(self):
        self.logger.info('Start producer...')
        while True:
            begin_t = time.time()
            # to do
            try:
                self._opt = Config(open('config/demo.json').read())

                u_list = self._make_user_list()
                self._u_choice = self._make_user_choice(u_list)
                self._u_interest = self._make_user_interest(u_list)
                self.logger.info('demo user {} generate... '.format(
                    len(u_list['user_id'])))
                dic_msg = self._pack_dic_msg(val=u_list, msg_type='user_list')
                await self.ws.send(json.dumps(dic_msg))
            except Exception as e:
                self.logger.warning('Somthing is wrong : {}'.format(e))
                break
            # finishing
            sleep_t = max(
                0, self._opt.demo_user.sleep_t - int(time.time() - begin_t))
            self.logger.info('Sleep {} secs before next start'.format(sleep_t))
            await asyncio.sleep(sleep_t)

    def _make_user_react(self, message):
        result = []
        reco_user_list = message['value']
        pss, choice, click, unclick = 0, 0, 0, 0
        for user_id in reco_user_list.keys():
            stat = np.random.choice(['pass', 'choice', 'click'],
                                    p=[0.4, 0.3, 0.3])
            if stat == 'pass':
                pss += 1
                continue
            elif stat == 'choice' and int(user_id) in self._u_choice:
                tmp = {
                    'user_id': user_id,
                    'item_id': str(self._u_choice[int(user_id)]),
                    'bucket': reco_user_list[user_id]['bucket'],
                    'stat': 'choice'
                }
                choice += 1
                result.append(tmp)
            elif stat == 'click' and int(user_id) in self._u_interest:
                reco_item = set(list(zip(*reco_user_list[user_id]['list']))[0])
                interest_item = set(self._u_interest[int(user_id)])
                candidate_item = list(reco_item.intersection(interest_item))
                if candidate_item:
                    tmp = {
                        'user_id': user_id,
                        'item_id': str(np.random.choice(candidate_item)),
                        'bucket': reco_user_list[user_id]['bucket'],
                        'stat': 'click'
                    }
                    click += 1
                    result.append(tmp)
                else:
                    unclick += 1
        self.logger.info(
            'Make user feedback -> pass: {}, choice: {}, click: {}, unclick: {}'
            .format(pss, choice, click, unclick))
        return result

    async def _consumer(self):
        self.logger.info('Start consumer...')
        while True:
            message = json.loads(await self.ws.recv())
            # to do
            try:
                if message['type'] == 'reco_user_list':
                    u_feedback = self._make_user_react(message)
                    dic_msg = self._pack_dic_msg(val=u_feedback,
                                                 msg_type='user_feedback')
                    await self.ws.send(json.dumps(dic_msg))
            except Exception as e:
                self.logger.warning('Somthing is wrong : {}'.format(e))
                break
            # finishing

    def _data_load(self):
        self._gd.download(folder=self._opt.demo_user.google_drive.folder,
                          path=self._opt.demo_user.google_drive.root_path)

        demo_user = self._ps.load(self._opt.demo_user.google_drive.data_path)

        self._traffic = demo_user['traffic']
        self._user_count = demo_user['user_count']
        self._item_count = demo_user['item_count']
        self._cluster = demo_user['cluster']
        self._p_user = demo_user['p_user']
        self._p_cluster_user = demo_user['p_cluster_user']
        self._p_item_cluster = demo_user['p_item_cluster']
        self._user_idx = demo_user['user_idx']
        self._item_idx = demo_user['item_idx']
        self._u_choice = {}
        self._u_interest = {}

    async def _main(self):
        self.logger.info('Start...')
        while True:
            try:
                self._data_load()
                self.ws = await websockets.connect(self._url)
                await asyncio.gather(self._producer(), self._consumer())
            except Exception as e:
                self.logger.warning('Restart... after {} secs -> {}'.format(
                    60, e))
                await asyncio.sleep(60)
                continue

    def run(self):
        asyncio.run(self._main())
class Base_gc():
    def __init__(self):
        self.logger = get_logger()
        self._ps = Pickle_serializer()
        self._gd = Google_drive()
        self._opt = Config(open('config/demo_dist.json').read())
        self._mg_data_click = Mongodb(self._opt.mongodb.data_click)
        self._mg_data_choice = Mongodb(self._opt.mongodb.data_choice)

    def _init(self):
        self.data = dict()
        self.data['cluster'] = 1
        self.data['reco_p_cluster_user'] = [[1.]]
        self.data['reco_user_id_dic'] = {-1: -1}
        self.data['reco_p_item_cluster'] = {
            k: {
                -1: 1.
            }
            for k in range(self.data['cluster'])
        }
        if not os.path.exists('cache'):
            os.makedirs('cache')
        if not os.path.exists('cache/demo_gc'):
            os.makedirs('cache/demo_gc')

    def _run(self):
        dic = defaultdict(int)
        for data in self._mg_data_click.find():
            dic[int(data['item_id'])] += 1
        for data in self._mg_data_choice.find():
            dic[int(data['item_id'])] += 1

        self.data['reco_p_item_cluster'] = {
            k: dic
            for k in range(self.data['cluster'])
        }

        self._ps.dump(self.data, 'cache/demo_gc/reco_cluster_user.ps')
        self._ps.dump(self.data, 'cache/demo_gc/reco_item_cluster.ps')

        self._gd.upload(folder='demo_gc',
                        files={
                            'reco_cluster_user.ps':
                            'cache/demo_gc/reco_cluster_user.ps',
                            'reco_item_cluster.ps':
                            'cache/demo_gc/reco_item_cluster.ps'
                        },
                        max_data=1)

    def run(self):
        self.logger.info('Start...')
        self._init()
        while True:
            begin_t = time.time()
            # to do
            try:
                self._run()
            except KeyboardInterrupt:
                self.logger.warning('KeyboardInterrupt detect...')
                break
            # finishing
            sleep_t = max(0, 3600 - int(time.time() - begin_t))
            self.logger.info('Sleep {} secs before next start'.format(sleep_t))
            time.sleep(sleep_t)