Пример #1
0
    def update_items(self, tab_item, items_data, update_keys=()):
        """
        @summary:
        ---------
        @param tab_item: redis中items的表名
        @param items_data: [item.to_dict] 数据
        @param update_keys: 更新的字段
        ---------
        @result:
        """
        to_table = tools.get_info(tab_item, ":s_(.*?)_item", fetch_one=True)
        sql, datas = tools.make_batch_sql(
            to_table,
            items_data,
            update_columns=update_keys or list(items_data[0].keys()),
        )
        update_count = self.to_db.add_batch(sql, datas)
        if update_count is None:
            log.error("更新表 %s 数据失败" % (to_table))
        else:
            msg = "共更新 %s 条数据 到 %s" % (update_count // 2, to_table)
            if update_keys:
                msg += " 更新字段为 {}".format(update_keys)
            log.info(msg)

        return update_count != None
Пример #2
0
    def __export_to_db(self, tab_item, datas, is_update=False, update_keys=()):
        to_table = tools.get_info(tab_item, ":s_(.*?)_item$", fetch_one=True)

        # 打点 校验
        self.check_datas(table=to_table, datas=datas)

        for pipeline in self._pipelines:
            if is_update:
                if to_table == self._task_table and not isinstance(
                    pipeline, MysqlPipeline
                ):
                    continue

                if not pipeline.update_items(to_table, datas, update_keys=update_keys):
                    log.error(
                        f"{pipeline.__class__.__name__} 更新数据失败. table: {to_table}  items: {datas}"
                    )
                    return False

            else:
                if not pipeline.save_items(to_table, datas):
                    log.error(
                        f"{pipeline.__class__.__name__} 保存数据失败. table: {to_table}  items: {datas}"
                    )
                    return False

        # 若是任务表, 且上面的pipeline里没mysql,则需调用mysql更新任务
        if not self._have_mysql_pipeline and is_update and to_table == self._task_table:
            self.mysql_pipeline.update_items(to_table, datas, update_keys=update_keys)
Пример #3
0
    def create(self, sort_keys=False):
        contents = self.get_data()

        json = {}
        for content in contents:
            content = content.strip()
            if not content or content.startswith(":"):
                continue

            regex = "([^:\s]*)[:|\s]*(.*)"

            result = tools.get_info(content, regex, fetch_one=True)
            if result[0] in json:
                json[result[0]] = json[result[0]] + "&" + result[1]
            else:
                json[result[0]] = result[1].strip()

        print(tools.dumps_json(json, sort_keys=sort_keys))
Пример #4
0
    def export_items(self, tab_item, items_data):
        """
        @summary:
        ---------
        @param tab_item: redis中items的表名
        @param items_data: [item.to_dict] 数据
        ---------
        @result:
        """

        to_table = tools.get_info(tab_item, ":s_(.*?)_item", fetch_one=True)
        sql, datas = tools.make_batch_sql(to_table, items_data)
        add_count = self.to_db.add_batch(sql, datas)
        datas_size = len(datas)
        if add_count is None:
            log.error("导出数据到表 %s 失败" % (to_table))
        else:
            log.info("共导出 %s 条数据 到 %s, 重复 %s 条" %
                     (datas_size, to_table, datas_size - add_count))

        return add_count != None
Пример #5
0
    def export_all(
        self,
        tables,
        auto_update=False,
        batch_count=100,
        every_table_per_export_callback=None,
    ):
        """
        @summary: 导出所有item
        ---------
        @param tables: 如qidian  则导出起点下面所有的items
        数据库中的表格式必须有规律 如导出 qidian:comment:s_qidian_book_comment_dynamic_item 对应导入 qidian_book_comment_dynamic
        @param auto_update: 是否自动更新
        @param batch_count: 每批次导出的数量
        @every_table_per_export_callback: 导出前的回调函数, 用来修改特定表的参数 to_table, auto_update, batch_count
        如:
            def every_table_per_export_callback(to_table, auto_update, batch_count):
                if to_table == 'xxx':
                    auto_update = True
                return to_table, auto_update, batch_count
        ---------
        @result:
        """
        tables = (self.redisdb.getkeys(tables + "*_item")
                  if not isinstance(tables, list) else tables)
        if not tables:
            log.info("无表数据")
        for table in tables:
            from_table = table
            to_table = tools.get_info(str(from_table),
                                      ":s_(.*?)_item",
                                      fetch_one=True)
            if callable(every_table_per_export_callback):
                to_table, auto_update, batch_count = every_table_per_export_callback(
                    to_table, auto_update, batch_count)

            log.info("""
                \r正在导出 %s -> %s""" % (from_table, to_table))
            self.export(from_table, to_table, auto_update, batch_count)
Пример #6
0
    def __export_to_db(self, tab_item, datas, is_update=False, update_keys=()):
        export_success = False
        # 打点 校验
        to_table = tools.get_info(tab_item, ":s_(.*?)_item$", fetch_one=True)
        item_name = to_table + "_item"
        self.check_datas(table=to_table, datas=datas)

        if setting.ADD_ITEM_TO_MYSQL:  # 任务表需要入mysql
            if isinstance(setting.ADD_ITEM_TO_MYSQL, (list, tuple)):
                for item in setting.ADD_ITEM_TO_MYSQL:
                    if item in item_name:
                        export_success = (
                            self._export_data.export_items(tab_item, datas) if
                            not is_update else self._export_data.update_items(
                                tab_item, datas, update_keys=update_keys))

            else:
                export_success = (
                    self._export_data.export_items(tab_item, datas)
                    if not is_update else self._export_data.update_items(
                        tab_item, datas, update_keys=update_keys))

        if setting.ADD_ITEM_TO_REDIS:
            if isinstance(setting.ADD_ITEM_TO_REDIS, (list, tuple)):
                for item in setting.ADD_ITEM_TO_REDIS:
                    if item in item_name:
                        self._db.sadd(tab_item, datas)
                        export_success = True
                        log.info("共导出 %s 条数据 到redis %s" %
                                 (len(datas), tab_item))
                        break

            else:
                self._db.sadd(tab_item, datas)
                export_success = True
                log.info("共导出 %s 条数据 到redis %s" % (len(datas), tab_item))

        return export_success