def crawlingMarcapStockData(self, dtoList: List[StockRunCrawling]) -> None: self.logger.info("crawlingMarcapStockData", str(len(dtoList))) for dto in dtoList: if dto.taskId == "marcap": async def marcapTaskWorker(runDto: StockRunCrawling, pool: Pool, taskPool: TaskPool) -> None: try: self.logger.info("runCrawling&marcapTaskWorker", "start") marcapCrawler = MarcapCrawler() taskUniqueId = runDto.taskUniqueId self.crawlerRepository.addCrawler( taskUniqueId, marcapCrawler) self.createListners(marcapCrawler.ee) self.logger.info("runCrawling&marcapTaskWorker", f"taskWorker:{taskUniqueId}") await marcapCrawler.crawling(runDto) taskPool.removeTaskPool(pool) self.crawlerRepository.removeCrawler(taskUniqueId) except asyncio.CancelledError: self.logger.info("convertFactorFileToDbTask", "cancel") except Exception: self.logger.error("convertFactorFileToDbTask", f"error: {traceback.format_exc()}") self.tasksRepository.errorTask(runDto, traceback.format_exc()) workerTask = Task(dto.taskUniqueId, marcapTaskWorker, {"runDto": dto}) if self.tasksRepository.taskRunner: if self.tasksRepository.isExistTask( dto.taskId, dto.taskUniqueId): return startDate = datetime.strptime(dto.startDateStr, "%Y%m%d") endDate = datetime.strptime(dto.endDateStr, "%Y%m%d") taskDates = [ (startDate + timedelta(days=x)).strftime("%Y%m%d") for x in range((endDate - startDate).days + 1) ] task = ProcessTask( **{ "market": dto.market, "startDateStr": dto.startDateStr, "endDateStr": dto.endDateStr, "taskUniqueId": dto.taskUniqueId, "taskId": dto.taskId, "count": len(taskDates), "tasks": deque(taskDates), "restCount": len(taskDates), "tasksRet": deque(([0] * len(taskDates))), }) task.state = "find worker" self.tasksRepository.addTask(task) self.tasksRepository.runTask(workerTask) self.logger.info("runMarcapTask", f"runTask {task.json()}")
def convertFactorFileToDb(self, dto: RunFactorFileConvert) -> None: self.logger.info("convertFactorFileToDb") async def convertFactorFileToDbTask(pool: Pool, taskPool: TaskPool) -> None: try: task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId) data = await asyncio.create_task( self.factorRepository.getFactorsInFile()) task.state = "make Factor Object" self.tasksRepository.updateTask(task) daoList = await batchFunction(100, data, self.makeFactorDaoList) task.state = "start insert db" self.tasksRepository.updateTask(task) self.logger.info("convertFactorFileToDbTask", f"insertCount: {str(len(daoList))}") await self.factorRepository.insertFactor(daoList) task.state = "complete" self.tasksRepository.completeFactorConvertFileToDbTask(task) except asyncio.CancelledError: self.logger.info("convertFactorFileToDbTask", "cancel") except Exception: self.logger.error("convertFactorFileToDbTask", f"error: {traceback.format_exc()}") self.tasksRepository.errorTask(dto, traceback.format_exc()) finally: taskPool.removeTaskPool(pool) task = ProcessTask( **{ "market": "", "startDateStr": "20070101", "endDateStr": "20191231", "taskUniqueId": dto.taskUniqueId, "taskId": dto.taskId, "count": 1, "tasks": ["convert"], "restCount": 1, "tasksRet": [0], "state": "start get file" }) self.tasksRepository.addTask(task) workerTask = Task(dto.taskUniqueId, convertFactorFileToDbTask) self.tasksRepository.runTask(workerTask)
def completeFactorDart(self, task: ProcessTask, year: int) -> None: self.success(task, 1) self.updateTask(task) if task.restCount <= 0: self.deleteTask(task) task.state = "complete" self.updateTask(task) self.logger.info("completeFactorDart", "complete") self.taskEventEmitter.emit( EVENT_TASK_REPO_TASK_COMPLETE, "factorDart", StockUpdateState( **{ "taskId": task.taskId, "market": task.market, "date": year, "ret": 1 }))
def success(self, task: ProcessTask, count: int) -> None: task.successCount = task.successCount + count task.restCount = task.restCount - count i = 0 for _ in range(count): task.tasksRet[task.index + i] = SUCCESS i = i + 1 task.index = task.index + count task.percent = (task.successCount + task.failCount) / task.count * 100 if task.restCount <= 0: task.state = "success" else: task.state = "waiting next task" self.logger.info("success", f"{task.taskUniqueId}")
def fail(self, task: ProcessTask, count: int) -> None: task.failCount = task.failCount + count task.restCount = task.restCount - count i = 0 for _ in range(count): left = task.tasks[task.index + i] task.failTasks.append(left) task.tasksRet[task.index + i] = FAIL i = i + 1 task.index = task.index + count task.percent = (task.successCount + task.failCount) / task.count * 100 if task.restCount <= 0: task.state = "fail" else: task.state = "waiting next task" self.logger.info("fail", f"{task.taskUniqueId}")
def crawlingFactorDartData(self, dto: DartApiCrawling) -> None: async def crawlingFactorDartDataTask(pool: Pool, taskPool: TaskPool) -> None: # task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId) try: crawler = DartApiCrawler() self.crawlerRepository.addCrawler(dto.taskUniqueId, crawler) self.createFactorDartListener(crawler.ee) await crawler.crawling(dto) self.crawlerRepository.removeCrawler(dto.taskUniqueId) except asyncio.CancelledError: self.logger.info("crawlingFactorDartDataTask", "cancel") except Exception: self.logger.error("crawlingFactorDartDataTask", f"error: {traceback.format_exc()}") self.tasksRepository.errorTask(dto, traceback.format_exc()) finally: taskPool.removeTaskPool(pool) count = dto.endYear - dto.startYear + 1 task = ProcessTask( **{ "market": "", "startDateStr": dto.startYear, "endDateStr": dto.endYear, "taskUniqueId": dto.taskUniqueId, "taskId": dto.taskId, "count": count, "tasks": list(range(dto.startYear, dto.endYear + 1)), "restCount": count, "tasksRet": [0] * count, "state": "find worker" }) self.tasksRepository.addTask(task) workerTask = Task(dto.taskUniqueId, crawlingFactorDartDataTask) self.tasksRepository.runTask(workerTask)
def updateTask(self, task: ProcessTask) -> None: self.tasksdto.tasks[task.taskId]["list"][task.taskUniqueId] = task self.logger.info("updateTask", f"{task.taskUniqueId}") self.mongod.upsertTask(task.dict()) self.taskEventEmitter.emit(EVENT_TASK_REPO_UPDATE_TASKS, self.tasksdto)