示例#1
0
    def select_by_parameter(cls, parameter):

        page = int(parameter['page'])
        limit = int(parameter['limit'])
        status = int(parameter['status'])
        keyword = str(parameter['keyword'])
        sort = int(parameter['sort'])
        try:
            datas = db_session.query(MainUrl).filter(
                MainUrl.sort == sort, MainUrl.status == status,
                MainUrl.webSite.like(
                    "%{}%".format(keyword))).limit(limit).offset(
                        (page - 1) * limit)
            count = db_session.query(MainUrl).filter(
                MainUrl.sort == sort, MainUrl.status == status,
                MainUrl.webSite.like("%{}%".format(keyword))).count()

            db_session.close()

            return {
                "code": "200",
                "message": "succeed",
                "data": [item.single_to_dict() for item in datas],
                "count": count
            }

        except (SqlalchemyIntegrityError, PymysqlIntegrityError,
                InvalidRequestError):
            db_session.close()
            return {"code": "404", "message": "fialed", "data": [], "count": 0}
示例#2
0
def test_db():
    import threading

    th = threading.Thread(target=opt_db)
    th.start()
    jobs = []
    for x in range(10010, 20000):
        job = Job()
        job.task = 2
        job.account = 2
        job.agent = 1
        job.status = 'running11'
        job.track_id = '{}'.format(x)
        job.start_time = datetime.datetime.now()
        jobs.append(job)

    db_lock.acquire()
    print(datetime.datetime.now())
    db_session.add_all(jobs)
    # db_session.commit()
    db_session.execute(Job.__table__.insert(),
                       [{
                           'task': 1,
                           'account': 3,
                           'agent': 1,
                           'status': 'ffff',
                           'track_id': '{}'.format(x),
                           'start_time': datetime.datetime.now()
                       } for x in range(1000)])
    db_session.commit()
    print(db_session)
    db_session.close()
    db_lock.release()
    print(datetime.datetime.now())
示例#3
0
    def select_all(cls, parameter):

        page = int(parameter['page'])
        limit = int(parameter['limit'])
        sort = int(parameter['sort'])

        try:
            datas = db_session.query(MainUrl).filter(
                MainUrl.sort == sort, MainUrl.spider_name == 0,
                MainUrl.status == 1).limit(limit).offset((page - 1) * limit)
            count = db_session.query(MainUrl).filter(
                MainUrl.sort == sort, MainUrl.spider_name == 0,
                MainUrl.status == 1).count()
            db_session.close()

            return {
                "code": "200",
                "message": "succeed",
                "data": [item.single_to_dict() for item in datas],
                "count": count
            }

        except (SqlalchemyIntegrityError, PymysqlIntegrityError,
                InvalidRequestError):
            db_session.close()
            return {"code": "404", "message": "fialed", "data": [], "count": 0}
示例#4
0
    def update_task_name(cls, parameter):

        spider_name = int(parameter['task_id'])
        main_url_pids = parameter['main_url_pids']
        operation = str(parameter['operation'])
        if main_url_pids != "":
            try:
                if operation == "import":
                    for main_url_pid in eval(main_url_pids):
                        main_url = db_session.query(MainUrl).filter(
                            MainUrl.pid == main_url_pid).first()
                        main_url.spider_name = spider_name
                elif operation == "remove":
                    for main_url_pid in eval(main_url_pids):
                        main_url = db_session.query(MainUrl).filter(
                            MainUrl.pid == main_url_pid).first()
                        main_url.spider_name = 0
                db_session.commit()
                db_session.close()
                return {"code": "200", "message": "更新成功"}

            except (SqlalchemyIntegrityError, PymysqlIntegrityError,
                    InvalidRequestError):
                db_session.close()
                return {"code": "404", "message": "更新失败"}
        else:
            return {"code": "202", "message": "并没有移除数据"}
示例#5
0
 def add_one(cls, parameter):
     mainurl = MainUrl()
     mainurl.address = parameter['address']
     mainurl.webSite = parameter['webSite']
     mainurl.status = 0
     mainurl.remark = ""
     mainurl.sort = parameter['sort']
     db_session.add(mainurl)
     db_session.commit()
     db_session.close()
示例#6
0
    def start_task(cls, parameter):
        spider_name = int(parameter['id'])
        datas = db_session.query(MainUrl).filter(
            MainUrl.spider_name == spider_name, MainUrl.status == 1).all()
        db_session.close()
        parameters = []
        for item in [item.single_to_dict() for item in datas]:
            parameter = {}
            url = item.get("address")
            try:
                rule = item["rule"]
                if rule == None or rule == "null" or rule == "":
                    crawler_info.info(
                        "{} : has no filtering rules, default algorithm acquisition"
                        .format(url))
                    parameter["rule"] = {
                        'filter_rule': '',
                        'selector': 'xpath',
                        'deep_limit': '1',
                        'fields': {
                            'title': '',
                            'author': '',
                            'publishTime': '',
                            'content': ''
                        }
                    }
                else:
                    filter_rule = json.loads(rule)["filter_rule"]
                    if filter_rule and filter_rule != "":
                        rule = json.loads(item["rule"].replace("@", "+"))
                        parameter["rule"] = rule
                    else:
                        parameter["rule"] = rule
            except:
                crawler_info.info(
                    "{} : has no filtering rules, default algorithm acquisition"
                    .format(url))
                parameter["rule"] = {
                    'filter_rule': '',
                    'selector': 'xpath',
                    'deep_limit': '1',
                    'fields': {
                        'title': '',
                        'author': '',
                        'publishTime': '',
                        'content': ''
                    }
                }

            parameter["pid"] = item.get("pid")
            parameter["webSite"] = item.get("webSite")
            parameter["url"] = str(url).strip()
            parameters.append(parameter)
        return parameters
示例#7
0
    def add_one(cls, parameter):
        spider_task = SpiderTask()
        spider_task.task_name = parameter['task_name']
        spider_task.status = 0
        spider_task.creater = 'admin'
        spider_task.create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                                time.localtime())

        db_session.add(spider_task)

        db_session.commit()
        db_session.close()
        return {"code": "200", "message": "succeed"}
示例#8
0
 def update_mainurl(cls, parameter):
     pid = parameter['pid']
     mainurl = db_session.query(MainUrl).filter(MainUrl.pid == pid).first()
     try:
         remark = parameter['remark']
         mainurl.remark = remark
         db_session.commit()
         db_session.close()
     except:
         pass
     try:
         status = parameter['status']
         mainurl.status = status
         db_session.commit()
         db_session.close()
     except:
         pass
     try:
         rule = parameter['rule']
         mainurl.rule = rule
         db_session.commit()
         db_session.close()
     except:
         pass
示例#9
0
 def delete_one(cls, parameter):
     maininfo = db_session.query(MainUrl).filter(
         MainUrl.pid == parameter["pid"]).first()
     db_session.delete(maininfo)
     db_session.commit()
     db_session.close()
示例#10
0
 def delete_one(cls, parameter):
     spider_task = db_session.query(SpiderTask).filter(
         SpiderTask.id == parameter["id"]).first()
     db_session.delete(spider_task)
     db_session.commit()
     db_session.close()
示例#11
0
 def update_status(cls, parameter):
     spider_task = db_session.query(SpiderTask).filter(
         SpiderTask.id == int(parameter["id"])).first()
     spider_task.status = int(parameter["status"])
     db_session.commit()
     db_session.close()