def predict():

        logger.info('Use history weight: {}'.format(WEIGHT))
        logger.info("Initting connection to redis")
        con = get_redis_client(config)
        for model in MODELS:
            if model not in ["tv", "movie", "cartoon", ]:
                continue
            data, ids = load_data(model, static=True)
            logger.info('Init sim handler')
            s = Sim(data, ids, weight=WEIGHT[model], static_=True)
            count = 0
            try:
                for cover_id, result in s.process():
                    count += 1
                    if DEBUG:
                        logger.info('{}  {}'.format(cover_id, result))
                        continue
                    con.set(KEY_PATTERN.format(cover_id), json.dumps(result))
                    con.expire(KEY_PATTERN.format(cover_id), 2592000)
            except Exception, e:
                logger.error('catched error :{}, processed num: {}, model: {}'.format(e, count, model))
                traceback.print_exc()
                raise Exception('Error')
            logger.info('Num of {} result : {}'.format(model, count))
            print('Num of {} result : {}'.format(model, count))
Пример #2
0
def index_orders(redis_url, order_queue, unindex=False):
    redisClient = util.get_redis_client(redis_url)
    while True:
        with util.get_queue_message(order_queue, redisClient) as message:
            if not unindex:
                try:
                    record_order(message, util.Locker(redisClient))
                except Exception:
                    logger.exception("Error recording message")
            else:
                try:
                    delete_order(message, util.Locker(redisClient))
                except Exception:
                    logger.exception("Error deleting record")
Пример #3
0
def populate_blockhash(redis_url, topic_name):
    global blockhash
    r = util.get_redis_client(redis_url)
    # Get block number initially. The pubsub channel will give us block hashes,
    # but from a caching perspective they should all be unique, which is the
    # important part.
    try:
        blockhash = r.get("topic://%s::blocknumber" %
                          topic_name).decode("utf8")
    except Exception:
        pass
    p = r.pubsub()
    p.subscribe(topic_name)
    for message in p.listen():
        if message.get("type") == "message":
            try:
                blockhash = json.loads(message["data"])
            except KeyError:
                pass
Пример #4
0
 def __init__(self, tid):
     self._redis_key = redis_prefix + str(tid)
     self._redis = get_redis_client()
     self._data = self.load() or {}
     self._destoried = False
Пример #5
0
 def __init__(self, tid):
     self._redis_key = redis_prefix + str(tid)
     self._redis = get_redis_client()
     self._data = self.load() or {}
     self._destoried = False
Пример #6
0
            # If we're doing this repeatedly, we wait until the queue is
            # cleared to load it up again.
            time.sleep(length_check_frequency)
        remaining_time = (start_time + delay) - time.time()
        if remaining_time > 0:
            # If the queue clears faster than the round delay, wait until the
            # time is up to load the queue again.
            time.sleep(remaining_time)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("redis_url")
    parser.add_argument("publish_queue")
    parser.add_argument("--round-delay", "-d", type=int, default=60)
    parser.add_argument("--length-check-freq", "-f", type=int, default=0.1)
    parser.add_argument("--recur", "-r", action="store_true", default=False)
    parser.add_argument("--queued-max", "-m", type=int, default=1000)
    parser.add_argument("--log-level", "-l", default="info")
    args = parser.parse_args()

    logging.basicConfig(level=getattr(logging, args.log_level.upper()))

    redisClient = util.get_redis_client(args.redis_url)
    if args.recur:
        main(redisClient, args.publish_queue, args.round_delay,
             args.queued_max, args.length_check_freq)
    else:
        queue_orders(redisClient, args.publish_queue, args.queued_max,
                     args.length_check_freq)
Пример #7
0
# encoding:utf8
__author__ = 'brianyang'

import sys
import time
import random
import json
import sys
sys.path.append('/home/q/blog')
from util import get_redis_client

redis_client = get_redis_client()

reload(sys)
sys.setdefaultencoding('utf8')

import requests
from bs4 import BeautifulSoup

url = 'http://xjh.haitou.cc/bj/page-%d'
scrapy_size = 30
result = []
id_dict = {}


def parse_result(soup, result):
    trs = soup.find_all('tr', attrs={'data-key': True})
    for tr in trs:
        id_ = tr['data-key']
        title_dom = tr.find('td', class_='cxxt-title')
        cancel = title_dom.find('span', class_='badge badge-cancel')