Exemplo n.º 1
0
def queryFieldByTaskJobId(taskJobId):
    # sql = """ select * from job_template_field where id in
    #       (select job_tempalte_field_id from task_job_re_field where task_job_id=%s group by job_template_id)"""%(taskJobId) ;
    sql = """ del_flag=0 and id in
              (select job_tempalte_field_id from task_job_re_field where task_job_id='%s' and del_flag=0 group by job_tempalte_field_id)""" % (
    taskJobId);
    return query(JobTemplateField,text(sql),type=1)
Exemplo n.º 2
0
def queryJobTemplateFieldAndJobTemplateByJobId(jobId):
    taskjob = querTaskJob(jobId)
    templateReField = Session.query(TaskJobReField).filter(TaskJobReField.jobTemplateId == taskjob.jobTemplateId).all()
    objectlist = []
    for field in templateReField:
        object_field = query(JobTemplateField,text("id ='%s' and del_flag=0"%(field.jobTemplateFieldId)),type=1)
        objectlist.append(object_field)
    if objectlist:
        return {"result": objectlist}
    else:
        return {"result": queryJobTemplateFieldAndJobTemplateByJobTemplateId(taskjob.jobTemplateId)}
Exemplo n.º 3
0
 def parse(self, response):
     items = []
     hxs = Selector(response)
     jobTemplateFieldList = query(JobTemplateField, type=1)
     for jobTemplateField in jobTemplateFieldList:
         fieldNameEn = jobTemplateField.fieldNameEn
         fieldValue = jobTemplateField.fieldValue
         node = hxs.xpath(fieldValue).extract()
         split = jobTemplateField.split if jobTemplateField.split != None else ""
         value = split.join(node)
         value = value if value != None else ""
         regExp = jobTemplateField.regExp
         if regExp != None and regExp != "":
             pattern = re.compile(regExp)
             matches = pattern.search(value.encode("utf8"))
             if matches != None and len(matches.groups()) > 0:
                 value = regExp.join(matches.groups())
             elif len(matches.groups()) == 0 and matches != None:
                 value = matches.group()
         items[fieldNameEn] = value
     self.log('A response from %s just arrived!' % response.url)
     return items
Exemplo n.º 4
0
def queryJobParam(taskJobId):
    # sql = """ select * from job_t   emplate_field where id in
    #       (select job_tempalte_field_id from task_job_re_field where task_job_id=%s group by job_template_id)"""%(taskJobId) ;
    sql = """ id in (select job_template_param_id from task_job_re_template_param where id='%s'
                    and del_flag=0) and del_flag=0 group by job_template_param_id"""% (taskJobId);
    return query(JobTemplateParam,text(sql),type=0)
Exemplo n.º 5
0
def queryJobTemplateFieldByJobTemplateId(jobTemplateId):
    fields=query(JobTemplateField,text("job_template_id='%s' and del_flag=0"%(jobTemplateId)),type=1)
    return fields
Exemplo n.º 6
0
def queryJobTemplateParamByJobTemplateIdType(jobTemplateId):
    return query(JobTemplateParam,text("job_template_id='%s' and del_flag=0 and type=0"%(jobTemplateId)),type=1)
Exemplo n.º 7
0
def queryJobTemplateFieldAndJobTemplateByJobTemplateId(jobTemplateId):
    fields=query(JobTemplateField,text("job_template_id='%s' and del_flag=0"%(jobTemplateId)),type=1)
    params = query(JobTemplateParam,text("job_template_id='%s' and del_flag=0"%(jobTemplateId)),type=1)
    template=queryJobTemplate(jobTemplateId)
    return {"jobTemplate":template,"jobTemplateFieldList":fields,"jobTemplateParamList":params}
Exemplo n.º 8
0
def queryJobTemplate(jobTemplateId):
    return query(JobTemplate,text("id='%s' and del_flag=0"%(jobTemplateId)),type=0)
Exemplo n.º 9
0
def querTaskJobParam(taskJobId):
    return query(TaskJob,text("id='%s' and del_flag=0"%(taskJobId)),type=0)
Exemplo n.º 10
0
def childTaskJobByParentId(parentId):
    sql=text("""parent_id='%s' and del_flag=0"""%(parentId))
    object = query(TaskJob,sql,type=1)
    return object
Exemplo n.º 11
0
def startCrawlerByTaskJobId(jobId, taskJobParam=None):
    logging.info('------startCrawlerByTaskJobId-------%s' % jobId)
    taskJobHistory = TaskJobDao.addTaskJobHistroy(jobId,
                                                  TaskJobHistoryType.SINGLE)
    taskJob = query(TaskJob, text('id="' + str(jobId) + '"'), type=0)
    TaskJobDao.updateTaskJobStatus(jobId, TaskStatus.RUNNING)
    # tableName = jobTemplate.tableName
    jobTemplateId = None
    dbClient = DbClient()
    LoggerDao.addTaskJobLogger(taskJob,
                               LoggerDao.LoggerType.START,
                               taskJobHistoryId=taskJobHistory.id,
                               status=TaskStatus.RUNNING,
                               content=u"任务启动")
    try:
        if TaskType.SINGLE == str(taskJob.type):
            jobTemplate = TemplateDao.queryJobTemplate(taskJob.jobTemplateId)
            if jobTemplate == None:
                TaskJobDao.updateTaskJobStatus(jobId, TaskStatus.FAIL)
                LoggerDao.addTaskJobLogger(taskJob,
                                           LoggerDao.LoggerType.START,
                                           jobTemplateId=jobTemplateId,
                                           taskJobHistoryId=taskJobHistory.id,
                                           content=u"no jobTemplate")
                logging.error("no jobTemplate")
                return
            jobTemplateId = jobTemplate.id
            taskJobParam = crawlerbyKeyWord(taskJob, jobTemplate.id)
            jobTemplateFieldList = TemplateDao.queryJobTemplateFieldByJobTemplateId(
                jobTemplate.id)
            dbClient.getConnection(taskJob.databaseId
                                   or jobTemplate.databaseId)
            if not dbClient.isTableExist(jobTemplate.tableName):
                dbClient.createTable(jobTemplate.id, jobTemplate.tableName,
                                     jobTemplateFieldList)
            setattr(jobTemplate, "url", taskJob.url)
            setattr(jobTemplate, "tableName", taskJob.tableName)
            LoggerDao.addTaskJobLogger(taskJob,
                                       LoggerDao.LoggerType.START,
                                       jobTemplateId=jobTemplate.id,
                                       taskJobHistoryId=taskJobHistory.id,
                                       status=TaskStatus.RUNNING,
                                       content=u"定向任务任务启动")
            parseUrlAndInsertRedis(taskJob,
                                   taskJobParam=taskJobParam,
                                   taskJobHistory=taskJobHistory,
                                   jobTemplate=jobTemplate)
        elif TaskType.BATCH == str(taskJob.type):
            jobTemplateList = TemplateDao.loadTemplateByTaskJobId({
                "taskJobId":
                taskJob.id,
                "action":
                "1"
            })
            if jobTemplateList.get("jobTemplateList") != None and len(
                    jobTemplateList.get("jobTemplateList")) > 0:
                for jobTemplate in jobTemplateList.get("jobTemplateList"):
                    jobTemplateId = jobTemplate.id
                    taskJobParam = crawlerbyKeyWord(taskJob, jobTemplate.id)
                    jobTemplateFieldList = TemplateDao.queryJobTemplateFieldByJobTemplateId(
                        jobTemplate.id)
                    LoggerDao.addTaskJobLogger(
                        taskJob,
                        LoggerDao.LoggerType.START,
                        jobTemplateId=jobTemplate.id,
                        taskJobHistoryId=taskJobHistory.id,
                        status=TaskStatus.RUNNING,
                        content=u"批量任务启动")
                    databaseId = jobTemplate.databaseId if jobTemplate.databaseId != "-1" and jobTemplate.databaseId != None else taskJob.databaseId
                    dbClient.getConnection(databaseId)
                    if dbClient == None:
                        TaskJobDao.updateTaskJobStatus(jobId, TaskStatus.FAIL)
                        LoggerDao.addTaskJobLogger(
                            taskJob,
                            LoggerDao.LoggerType.START,
                            jobTemplateId=jobTemplateId,
                            taskJobHistoryId=taskJobHistory.id,
                            content=u"no dbClient")
                        logging.error("no dbClient")
                        return
                    if not dbClient.isTableExist(jobTemplate.tableName):
                        dbClient.createTable(jobTemplate.id,
                                             jobTemplate.tableName,
                                             jobTemplateFieldList)
                    parseUrlAndInsertRedis(taskJob,
                                           taskJobParam=taskJobParam,
                                           taskJobHistory=taskJobHistory,
                                           jobTemplate=jobTemplate)
            else:
                TaskJobDao.updateTaskJobStatus(jobId, TaskStatus.FAIL)
                LoggerDao.addTaskJobLogger(taskJob,
                                           LoggerDao.LoggerType.START,
                                           jobTemplateId=jobTemplateId,
                                           taskJobHistoryId=taskJobHistory.id,
                                           content=u"no jobTemplate")
                logging.error("no jobTemplate")
        elif TaskType.DEPTH == str(taskJob.type):
            parseUrlAndInsertRedis(taskJob,
                                   taskJobParam=taskJobParam,
                                   taskJobHistory=taskJobHistory)

            # print mainId
            # if tempList:
            #     for temp in tempList:
            #         tempNode = hashConsistency.get_node(stringify(temp))
            #         nodePool.append(tempNode)
            #         RedisUtils.lpush(tempNode+"_"+ConfigUtils.getRedisPorperties(KEYMAP.ASSIST_SPIDER_REDIS_KEY), taskJobHistory.id)
            #         RedisUtils.lpush(tempNode+"_"+ConfigUtils.getRedisPorperties(KEYMAP.ASSIST_SPIDER_REDIS_KEY) + "_" + taskJobHistory.id,stringify(temp))
            #         RedisUtils.hset(ConfigUtils.getRedisPorperties(KEYMAP.FINISH_SPIDER_REDIS_KEY), temp.id, stringify(temp))
    except Exception, e:
        TaskJobDao.updateTaskJobStatus(jobId, TaskStatus.FAIL)
        LoggerDao.addTaskJobLogger(taskJob,
                                   LoggerDao.LoggerType.START,
                                   jobTemplateId=jobTemplateId,
                                   taskJobHistoryId=taskJobHistory.id,
                                   content=u"解析异常" + str(e))
        logging.error(repr(Exception))
Exemplo n.º 12
0
def loadAllTaskFieldById(taskJobId):
    return query(TaskJobReField,
                 text("task_job_id='" + taskJobId + "'and del_flag=0"),
                 type=1)