示例#1
0
    def crawlingMarcapStockData(self, dtoList: List[StockRunCrawling]) -> None:
        self.logger.info("crawlingMarcapStockData", str(len(dtoList)))
        for dto in dtoList:
            if dto.taskId == "marcap":

                async def marcapTaskWorker(runDto: StockRunCrawling,
                                           pool: Pool,
                                           taskPool: TaskPool) -> None:
                    try:
                        self.logger.info("runCrawling&marcapTaskWorker",
                                         "start")
                        marcapCrawler = MarcapCrawler()
                        taskUniqueId = runDto.taskUniqueId
                        self.crawlerRepository.addCrawler(
                            taskUniqueId, marcapCrawler)
                        self.createListners(marcapCrawler.ee)
                        self.logger.info("runCrawling&marcapTaskWorker",
                                         f"taskWorker:{taskUniqueId}")
                        await marcapCrawler.crawling(runDto)
                        taskPool.removeTaskPool(pool)
                        self.crawlerRepository.removeCrawler(taskUniqueId)
                    except asyncio.CancelledError:
                        self.logger.info("convertFactorFileToDbTask", "cancel")
                    except Exception:
                        self.logger.error("convertFactorFileToDbTask",
                                          f"error: {traceback.format_exc()}")
                        self.tasksRepository.errorTask(runDto,
                                                       traceback.format_exc())

                workerTask = Task(dto.taskUniqueId, marcapTaskWorker,
                                  {"runDto": dto})
                if self.tasksRepository.taskRunner:
                    if self.tasksRepository.isExistTask(
                            dto.taskId, dto.taskUniqueId):
                        return
                    startDate = datetime.strptime(dto.startDateStr, "%Y%m%d")
                    endDate = datetime.strptime(dto.endDateStr, "%Y%m%d")
                    taskDates = [
                        (startDate + timedelta(days=x)).strftime("%Y%m%d")
                        for x in range((endDate - startDate).days + 1)
                    ]
                    task = ProcessTask(
                        **{
                            "market": dto.market,
                            "startDateStr": dto.startDateStr,
                            "endDateStr": dto.endDateStr,
                            "taskUniqueId": dto.taskUniqueId,
                            "taskId": dto.taskId,
                            "count": len(taskDates),
                            "tasks": deque(taskDates),
                            "restCount": len(taskDates),
                            "tasksRet": deque(([0] * len(taskDates))),
                        })
                    task.state = "find worker"
                    self.tasksRepository.addTask(task)
                    self.tasksRepository.runTask(workerTask)
                    self.logger.info("runMarcapTask", f"runTask {task.json()}")
示例#2
0
    def convertFactorFileToDb(self, dto: RunFactorFileConvert) -> None:
        self.logger.info("convertFactorFileToDb")

        async def convertFactorFileToDbTask(pool: Pool,
                                            taskPool: TaskPool) -> None:
            try:
                task = self.tasksRepository.getTask(dto.taskId,
                                                    dto.taskUniqueId)
                data = await asyncio.create_task(
                    self.factorRepository.getFactorsInFile())
                task.state = "make Factor Object"
                self.tasksRepository.updateTask(task)
                daoList = await batchFunction(100, data,
                                              self.makeFactorDaoList)
                task.state = "start insert db"
                self.tasksRepository.updateTask(task)
                self.logger.info("convertFactorFileToDbTask",
                                 f"insertCount: {str(len(daoList))}")
                await self.factorRepository.insertFactor(daoList)
                task.state = "complete"
                self.tasksRepository.completeFactorConvertFileToDbTask(task)
            except asyncio.CancelledError:
                self.logger.info("convertFactorFileToDbTask", "cancel")
            except Exception:
                self.logger.error("convertFactorFileToDbTask",
                                  f"error: {traceback.format_exc()}")
                self.tasksRepository.errorTask(dto, traceback.format_exc())
            finally:
                taskPool.removeTaskPool(pool)

        task = ProcessTask(
            **{
                "market": "",
                "startDateStr": "20070101",
                "endDateStr": "20191231",
                "taskUniqueId": dto.taskUniqueId,
                "taskId": dto.taskId,
                "count": 1,
                "tasks": ["convert"],
                "restCount": 1,
                "tasksRet": [0],
                "state": "start get file"
            })
        self.tasksRepository.addTask(task)
        workerTask = Task(dto.taskUniqueId, convertFactorFileToDbTask)
        self.tasksRepository.runTask(workerTask)
示例#3
0
 def completeFactorDart(self, task: ProcessTask, year: int) -> None:
     self.success(task, 1)
     self.updateTask(task)
     if task.restCount <= 0:
         self.deleteTask(task)
     task.state = "complete"
     self.updateTask(task)
     self.logger.info("completeFactorDart", "complete")
     self.taskEventEmitter.emit(
         EVENT_TASK_REPO_TASK_COMPLETE, "factorDart",
         StockUpdateState(
             **{
                 "taskId": task.taskId,
                 "market": task.market,
                 "date": year,
                 "ret": 1
             }))
示例#4
0
 def success(self, task: ProcessTask, count: int) -> None:
     task.successCount = task.successCount + count
     task.restCount = task.restCount - count
     i = 0
     for _ in range(count):
         task.tasksRet[task.index + i] = SUCCESS
         i = i + 1
     task.index = task.index + count
     task.percent = (task.successCount + task.failCount) / task.count * 100
     if task.restCount <= 0:
         task.state = "success"
     else:
         task.state = "waiting next task"
     self.logger.info("success", f"{task.taskUniqueId}")
示例#5
0
 def fail(self, task: ProcessTask, count: int) -> None:
     task.failCount = task.failCount + count
     task.restCount = task.restCount - count
     i = 0
     for _ in range(count):
         left = task.tasks[task.index + i]
         task.failTasks.append(left)
         task.tasksRet[task.index + i] = FAIL
         i = i + 1
     task.index = task.index + count
     task.percent = (task.successCount + task.failCount) / task.count * 100
     if task.restCount <= 0:
         task.state = "fail"
     else:
         task.state = "waiting next task"
     self.logger.info("fail", f"{task.taskUniqueId}")
示例#6
0
    def crawlingFactorDartData(self, dto: DartApiCrawling) -> None:
        async def crawlingFactorDartDataTask(pool: Pool,
                                             taskPool: TaskPool) -> None:
            # task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
            try:
                crawler = DartApiCrawler()
                self.crawlerRepository.addCrawler(dto.taskUniqueId, crawler)
                self.createFactorDartListener(crawler.ee)
                await crawler.crawling(dto)
                self.crawlerRepository.removeCrawler(dto.taskUniqueId)
            except asyncio.CancelledError:
                self.logger.info("crawlingFactorDartDataTask", "cancel")
            except Exception:
                self.logger.error("crawlingFactorDartDataTask",
                                  f"error: {traceback.format_exc()}")
                self.tasksRepository.errorTask(dto, traceback.format_exc())
            finally:
                taskPool.removeTaskPool(pool)

        count = dto.endYear - dto.startYear + 1
        task = ProcessTask(
            **{
                "market": "",
                "startDateStr": dto.startYear,
                "endDateStr": dto.endYear,
                "taskUniqueId": dto.taskUniqueId,
                "taskId": dto.taskId,
                "count": count,
                "tasks": list(range(dto.startYear, dto.endYear + 1)),
                "restCount": count,
                "tasksRet": [0] * count,
                "state": "find worker"
            })
        self.tasksRepository.addTask(task)
        workerTask = Task(dto.taskUniqueId, crawlingFactorDartDataTask)
        self.tasksRepository.runTask(workerTask)
示例#7
0
 def updateTask(self, task: ProcessTask) -> None:
     self.tasksdto.tasks[task.taskId]["list"][task.taskUniqueId] = task
     self.logger.info("updateTask", f"{task.taskUniqueId}")
     self.mongod.upsertTask(task.dict())
     self.taskEventEmitter.emit(EVENT_TASK_REPO_UPDATE_TASKS, self.tasksdto)