Exemplo n.º 1
0
def fetch_cate_pro(token, cate_id, pgToken=None, times=1):
    url = 'https://api.joom.com/1.1/search/products?language=en-US&currency=USD&_=jfs3%s'
    params = {
        'count':
        50,
        'filters': [{
            'id': 'categoryId',
            'value': {
                'type': 'categories',
                'items': [{
                    'id': cate_id
                }]
            }
        }]
    }
    if pgToken:
        params["pageToken"] = pgToken
    logger.info(u"正在抓取分类%s下第%s页产品" % (cate_id, times))
    try:
        res = requests.post(url % random_key(4),
                            data=json.dumps(params),
                            headers={
                                "authorization": token,
                                "content-type": 'application/json'
                            },
                            timeout=15)
    except Exception, e:
        res = requests.post(url % random_key(4),
                            data=json.dumps(params),
                            headers={
                                "authorization": token,
                                "content-type": 'application/json'
                            },
                            timeout=15)
Exemplo n.º 2
0
def fetch_cate(token, p_tag=None, level=1, p_id=0):
    url = 'https://api.joom.com/1.1/categoriesHierarchy'
    params = {
        'levels': 1,
        'parentLevels': 1,
        'language': 'en-US',
        'currency': 'USD'
    }
    if p_tag:
        params["categoryId"] = p_tag
    logger.info(u"正在采集id为%s的分类" % p_tag)
    logger.info(u"参数为%s" % params)
    res = requests.get(url, params=params, headers={"authorization": token})
    if "unauthorized" in res.content:
        token = get_joom_token()
        fetch_cate.delay(token, p_tag, level, p_id)
        return
    n_level = level + 1
    if res.status_code == 200:
        content = json.loads(res.content)
        c_infos = content["payload"]["children"]
        for c_info in c_infos:
            tag = c_info['id']
            name = c_info['name']
            is_leaf = 0 if c_info["hasPublicChildren"] else 1
            cate = Category.raw_save(tag, name, p_id, is_leaf, level, 31)
            n_p_id = cate
            if not is_leaf:
                fetch_cate.delay(token, p_tag=tag, level=n_level, p_id=n_p_id)
            else:
                fetch_cate_pro.delay(token, tag)
Exemplo n.º 3
0
 def get_current_user(self):
     try:
         user_id = self.session["user_id"]
         with sessionCM() as session:
             user = User.find_by_id(session, user_id)
             return user
     except Exception, e:
         logger.info(e.message)
Exemplo n.º 4
0
def upsert_review(review):
    logger.info(u"正在插入评论, no为%s" % review["review_no"])
    connect = db.connect()
    try:
        sql = text('insert ignore  into joom_review (review_no,create_time,update_time,pro_no,variation_id,user_no,joom_review.language,origin_text,new_text,order_id,is_anonymous,colors,star,shop_no,photos) VALUES (:review_no,:create_time,:update_time,:pro_no,:variation_id,:user_no,:language,:origin_text,:new_text,:order_id,:is_anonymous,:colors,:star,:shop_no,:photos) ')
        cursor = connect.execute(sql, **review)
        cursor.close()
    except Exception, e:
        logger.info(traceback.format_exc(e))
Exemplo n.º 5
0
def upsert_user(user):
    logger.info(u"正在插入用户, no为%s" % user["user_no"])
    connect = db.connect()
    try:
        sql = text(
            'insert ignore into joom_user (user_no, full_name, images) values (:user_no, :full_name, :images)')
        cursor = connect.execute(sql, **user)
        cursor.close()
    except Exception, e:
        logger.info(traceback.format_exc(e))
Exemplo n.º 6
0
def upsert_pro(pro):
    logger.info(u"正在插入产品, no为%s" % pro["pro_no"])
    connect = db.connect()
    try:
        sql = text(
            'insert into joom_pro (joom_pro.name,pro_no,shop_no,category_id,image,rate,msrp,discount,real_price,reviews_count,create_time,update_time,cate_id1,cate_id2,cate_id3,cate_id4,cate_id5,origin_price,r_count_30,r_count_7,r_count_7_14,growth_rate,save_count) values (:name,:pro_no,:shop_no,:category_id,:image,:rate,:msrp,:discount,:real_price,:reviews_count,:create_time,:update_time,"","","","","",0,0,0,0,0,0) on duplicate key update joom_pro.name=:name,category_id=:category_id,rate=:rate,msrp=:msrp,discount=:discount,real_price=:real_price,reviews_count=:reviews_count,update_time=:update_time;'
        )
        cursor = connect.execute(sql, **pro)
        cursor.close()
    except Exception, e:
        logger.info(traceback.format_exc(e))
Exemplo n.º 7
0
def upsert_shop(shop):
    logger.info(u"正在插入店铺, no为%s" % shop["shop_no"])
    connect = db.connect()
    try:
        sql = text(
            'insert into joom_shop (joom_shop.name,shop_no,logo,rate,save_count,create_time,update_time,is_verify,pro_count,reviews_count,r_count_30,r_count_7,r_count_7_14,growth_rate,cate_id) values (:name,:shop_no,:logo,:rate,:save_count,:create_time,:update_time,:is_verify,0,0,0,0,0,0,"") on duplicate key update rate=:rate, save_count=:save_count, create_time=:create_time, update_time=:update_time, is_verify=:is_verify;'
        )
        cursor = connect.execute(sql, **shop)
        cursor.close()
    except Exception, e:
        logger.info(traceback.format_exc(e))
Exemplo n.º 8
0
def send_mail(subject,
              text,
              to=list(),
              cc=list(),
              bcc=list(),
              name='smtp.qq.com',
              account='*****@*****.**',
              password='******'
              ):
    """
    发关邮件的后台任务可以单独调用
    """
    assert type(to) == list
    assert type(cc) == list
    assert type(bcc) == list

    fro = "网站myweb<%s>" % account
    real_to = to
    msg = MIMEMultipart()
    msg["From"] = fro
    msg["Subject"] = subject
    msg["To"] = COMMASPACE.join(to)
    if cc:
        msg["Cc"] = COMMASPACE.join(cc)
        real_to += cc

    if bcc:
        msg["Bcc"] = COMMASPACE.join(bcc)
        real_to += bcc
    msg["Date"] = formatdate(localtime=True)

    if isinstance(text, dict):
        text_group = list()
        for k, v in text.iteritems():
            text_group.append("%s:%s" % (str(k), str(v)))
        text = ";".join(text_group)

    msg.attach(MIMEText(text, "html", _charset="UTF8"))

    try:
        auth_info = {"name": name, "user": account, "passwd": password}
        smtp = smtplib.SMTP_SSL(auth_info["name"], 465, timeout=20)
        smtp.login(auth_info["user"], auth_info["passwd"])
        smtp.sendmail(fro, real_to, msg.as_string())
        smtp.quit()

    except Exception, e:
        logger.info("this time is to send content: %s" % text)
        logger.info(traceback.format_exc(e))
Exemplo n.º 9
0
def fetch_review(tag, token, page_token=None):
    url = "https://api.joom.com/1.1/products/%s/reviews?=all&count=1000&sort=top&language=en-US&currency=USD&_=jfs3%s" % (tag, random_key(4))
    params = {
        "filter_id": "all",
        "count": 200,
        "sort": "top"
    }
    if page_token:
        params["pageToken"] = page_token
    logger.info(u"正在第%s次抓取产品%s的评论, 参数为%s" % (1,  tag, params))
    try:
        res = requests.get(url, params=params, headers={"authorization": token}, timeout=20)
    except Exception:
        res = requests.get(url, params=params, headers={"authorization": token}, timeout=20)
    if "unauthorized" in res.content:
        token = get_joom_token()
        fetch_review.delay(tag, token, page_token)
        return
    content = res.json()
    if content.get("payload"):
        reviews = content["payload"]["items"]
        review_datas, review_users, review_count = retrieve_review(reviews)
            # if len(review_datas):
            #     session.execute(JoomReview.__table__.insert(), review_datas)
        with futures.ThreadPoolExecutor(max_workers=32) as executor:
            future_to_user = {
                executor.submit(upsert_review, review=review_data): review_data for review_data in review_datas
            }
            for future in futures.as_completed(future_to_user):
                rev_pro = future_to_user[future]
                try:
                    rp = future.result()
                except Exception as exc:
                    logger.error("%s generated an exception: %s" % (rev_pro, exc))
        with futures.ThreadPoolExecutor(max_workers=32) as executor:
            future_to_user = {
                executor.submit(upsert_user, user=rev_user): rev_user for rev_user in review_users
            }
            for future in futures.as_completed(future_to_user):
                rev_pro = future_to_user[future]
                try:
                    rp = future.result()
                except Exception as exc:
                    logger.error("%s generated an exception: %s" % (rev_pro, exc))
        if content["payload"].get("nextPageToken") and len(reviews):
            return fetch_review.delay(tag, token, page_token=content["payload"]["nextPageToken"])
    else:
        logger.info(u"抓取产品%s的评论失败, 参数为%s" % (tag, params))
Exemplo n.º 10
0
def aps_callback(req_type, url, data, job_id, other_kwargs=None, retry=1):
    """
    scheduler回调函数
    TODO: 添加调用记录
    :param req_type: 请求方式 post|get
    :param url: 请求地址
    :param data: 请求数据
    :param job_id: 在任务队列中的id
    :param other_kwargs: 其他
    :param retry: 重试次数
    :return:
    """
    with sessionCM() as session:
        sched = Scheduler.find_by_scheduler_id(session, job_id)
        job = scheduler.get_job(job_id)
        next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M:%S")
        logger.info("正在执行scheduler回调, 第%s次s请求:" % retry)
        logger.info(url)
        logger.info(data)
        try:
            if req_type == "post":
                res = requests.post(url=url, data=data)
            else:
                res = requests.get(url=url, params=data)
            res = res.json()
            if res["status"]:
                sched.update(
                    session, **{
                        "status": ACTIVE if sched.trigger != "date" else END,
                        "next_run_time": next_run_time
                    })
            else:
                sched.update(
                    session, **{
                        "status": ERROR,
                        "err_mess": res["message"],
                        "next_run_time": next_run_time
                    })
            return res
        except Exception, e:
            if retry < 3:
                return aps_callback(req_type,
                                    url,
                                    data,
                                    job_id,
                                    other_kwargs=other_kwargs,
                                    retry=retry + 1)
            else:
                logger.info("POST fail {0}".format(e.message))
                sched.update(
                    session, **{
                        "status": ERROR,
                        "err_mess": "POST fail {0}".format(e.message),
                        "next_run_time": next_run_time
                    })
                return {"status": 0, "mess": e.message}
Exemplo n.º 11
0
 def on_pong(self, data):
     logger.info("receive a response of my ping")
Exemplo n.º 12
0
 def on_request(self):
     logger.info(self.request.arguments)
     logger.info(self.request.full_url())
     logger.info(self.request.headers["X-Real-IP"])
     logger.info(self.session.get("user_id"))
     self.write_error(404)
Exemplo n.º 13
0
 def str_to_unicode(word):
     try:
         return to_unicode(word)
     except Exception, e:
         logger.info(e.message)
         return word.decode("unicode-escape")
Exemplo n.º 14
0
 def add_my_job(self,
                trigger,
                res_type,
                res_url,
                job_id,
                job_name,
                schedule_args,
                schedule_type,
                func_args,
                user_id=0,
                remark="",
                job_store="default"):
     if trigger not in TRIGGER_LIST:
         raise ErrorArgumentError
     job_id = job_id or str(uuid.uuid1())
     callback_args = [res_type, res_url, func_args, job_id]
     try:
         job = self.add_job_to_scheduler(trigger, job_id, job_name,
                                         schedule_args, callback_args,
                                         job_store)
         with sessionCM() as session:
             _scheduler = Scheduler.find_by_scheduler_id(session, job.id)
             info = {
                 "request_url":
                 res_url,
                 "trigger":
                 trigger,
                 "action":
                 job_name,
                 "args":
                 json.dumps(func_args),  # json 序列化后的参数
                 "mold":
                 schedule_type,
                 "type":
                 schedule_type,  # sys
                 "next_run_time":
                 job.next_run_time.strftime("%Y-%m-%d %H:%M:%S"),
                 "user_id":
                 user_id,
                 "scheduler_id":
                 job.id,
                 "extra":
                 json.dumps(schedule_args),  # 不同的trigger的不同参数
                 "remark":
                 remark,
                 "status":
                 ACTIVE
             }
             if not _scheduler:
                 Scheduler.create(session, **info)
             else:
                 _scheduler.update(session, **info)
         mess = "job_id为%s的任务添加成功" % job_id
         logger.info(mess)
         return {"status": 1, "message": mess}
     except ConflictingIdError:
         mess = "job_id为%s的任务已经存在" % job_id
         logger.error(mess)
         return {"status": 0, "message": mess}
     except Exception, e:
         logger.error(traceback.format_exc(e))
         return {"status": 0, "message": "添加任务失败"}
Exemplo n.º 15
0
def print_datetime():
    print datetime.datetime.now()
    logger.info(datetime.datetime.now())
Exemplo n.º 16
0
        res = requests.post(url % random_key(4),
                            data=json.dumps(params),
                            headers={
                                "authorization": token,
                                "content-type": 'application/json'
                            },
                            timeout=15)
    if "unauthorized" in res.content:
        token = get_joom_token()
        fetch_cate_pro.delay(token, cate_id, pgToken, times)
        return

    content = json.loads(res.content)
    items = content["payload"]["items"]
    if len(items) == 0:
        logger.info(u"分类%s抓取完成!" % cate_id)
    else:
        for item in items:
            logger.info(u'产品id为%s' % item["id"])
            fetch_review.delay(item["id"], token)
        with futures.ThreadPoolExecutor(max_workers=16) as executor:
            future_to_user = {
                executor.submit(fetch_pro, tag=item["id"], token=token):
                item["id"]
                for item in items
            }
            for future in futures.as_completed(future_to_user):
                rev_pro = future_to_user[future]
                try:
                    rp = future.result()
                except Exception as exc:
Exemplo n.º 17
0
    pro_data["discount"] = parent_info["discount"]
    pro_data["Description"] = item["description"]
    pro_data["ProductSKUs"] = list()
    pro_data["images"] = get_images(item)
    pro_data["ProductSKUs"] = get_variants(item["variants"])
    return pro_data, shop_info, pro_info


@celery.task(ignore_result=True)
def fetch_pro(tag, token):
    data_url = 'https://api.joom.com/1.1/products/%s?language=en-US&currency=USD' % tag
    try:
        res = requests.get(data_url,
                           headers={"authorization": token},
                           timeout=5)
    except Exception, e:
        res = requests.get(data_url,
                           headers={"authorization": token},
                           timeout=5)
    if "unauthorized" in res.content:
        token = get_joom_token()
        fetch_pro(tag, token)
        return
    content = json.loads(res.content)

    pro_data, shop_info, pro_info = trans_pro(content)
    upsert_shop(shop_info)
    upsert_pro(pro_info)

    logger.info(u"产品%s保存成功!" % tag)