Exemplo n.º 1
0
    def load(self, mode="ro"):
        try:
            self.create_output_path()
            sh.chmod("700", self.output_path)
        except sh.ErrorReturnCode as e:
            ## Already mounted readonly.
            pass
        try:
            log.debug("Loading {0}".format(self))

            self.loaded = self.mount_compat("rw")
            if self.loaded:
                try:
                    sh.rm(
                        "-rf", os.path.join(self.output_path, '._.Trashes'),
                        os.path.join(self.output_path, '.Spotlight-V100'),
                        os.path.join(self.output_path, 'lost+found'),
                        os.path.join(self.output_path, '$RECYCLE.BIN'),
                        os.path.join(self.output_path,
                                     'System Volume Information'))
                except:
                    pass
                try:
                    sh.umount(self.output_path)
                except:
                    self.loaded = False
                self.loaded = self.mount_compat(mode)
                return self.loaded
            else:
                return False
        except sh.ErrorReturnCode as e:
            self.unload()
            log.exception(e)
            return False
Exemplo n.º 2
0
    def load(self):
        self.create_output_path()
        try:

            #patoolib.extract_archive(self.path, outdir=self.output_path, interactive=False)
            #p = Process(target=patoolib.extract_archive,kwargs={"archive":self.path, "outdir":self.output_path, "interactive":False})
            command = "patool --non-interactive extract --outdir {0} {1}".format(
                self.output_path, self.path)
            child = pexpect.spawn(command)
            while True:
                #stdout, stderr = cmd_proc.communicate(input=bytes("\n", 'utf8'), timeout=10)
                child.sendline("")
                if child.isalive():
                    time.sleep(1)
                else:
                    break

            if child.exitstatus == 0:
                return True
            else:
                self.unload()
                return False
        except Exception as e:
            err_str = "Unable to unpack {0} @ ".format(self.path,
                                                       self.output_path)
            log.error(err_str)
            log.exception(e)
            return False
Exemplo n.º 3
0
    def login(self):
        """登录QQ空间"""

        log.run().debug("执行Like.login()")  # 打印日志

        author_info = False
        try:
            log.info().info("正在读取用户信息")  # 打印日志

            with open("config/user.json", "r", encoding="utf-8") as usr:
                infos = json.load(usr)
                account = infos['account']
                password = infos['password']

            author_info = True
            log.info().info("用户信息读取成功")  # 打印日志

        except Exception as e:

            log.exception().exception(e)  # 打印日志
            log.error().error("用户信息读取失败")  # 打印日志

        if author_info:
            # 登录部分
            log.info().info("开始登录QQ空间")  # 打印日志
            driver = webdriver.PhantomJS()
            driver.maximize_window()
            url = "https://qzone.qq.com/"
            driver.get(url)
            driver.implicitly_wait(3)

            try:
                driver.switch_to.frame("login_frame")
                try:
                    driver.find_element_by_id('switcher_plogin').click()
                except:
                    log.run().info("默认显示账号密码登录,不需要切换")

                driver.find_element_by_id('u').clear()
                driver.find_element_by_id('u').send_keys(account)
                driver.find_element_by_id('p').click()
                driver.find_element_by_id('p').send_keys(password)

                driver.find_element_by_id('login_button').click()
                time.sleep(3)
                driver.implicitly_wait(20)

                log.debug().debug("即将开始验证QQ登录")

                return self.login_on(driver)  # 判断是否登录

            except Exception as login_01:

                log.exception().exception(login_01)  # 打印日志
                log.error().info("QQ空间登录模块获取失败")  # 打印日志

                return 'error'

        else:
            return 'error'
Exemplo n.º 4
0
 def _put_file(self, file, local_path, dropbox_path):
     size = os.stat(file.fileno()).st_size
     if size < 1000: # kb
         self.client.put_file(dropbox_path, file, overwrite=True)
         self.send_progress(local_path, 1.0)
     else:
         chunk_size = 1024 * 1024
         offset = 0
         upload_id = None
         last_block = None
         while offset < size:
             next_chunk_size = min(chunk_size, size - offset)
             if last_block is None:
                 last_block = file.read(next_chunk_size)
             try:
                 (offset, upload_id) = self.client.upload_chunk(
                     last_block, next_chunk_size, offset, upload_id)
                 self.last_block = None
                 self.send_progress(local_path, min(offset, size) / size)
             except dropbox.rest.ErrorResponse as e:
                 log.exception(e)
         self.client.commit_chunked_upload(
             'auto' + dropbox_path, upload_id,
             overwrite=True, parent_rev=None
         )
Exemplo n.º 5
0
 def load(self):
     log.debug("loading mapable drive {0}".format(self.path))
     try:
         if not re.search(r"block special",
                          str(sh.file(self.path).stdout, 'utf8'),
                          flags=re.IGNORECASE):
             self.lodev = sh.losetup("-f").split()[0]
             sh.losetup(self.lodev, self.path)
             sh.blkid(self.lodev)
             try:
                 sh.partprobe(self.lodev)
             except:
                 pass
         else:
             sh.blkid(self.path)
             try:
                 sh.partprobe(self.path)
             except:
                 pass
         sh.sync("/dev/")
         self.process_devicemap()
     except Exception as e:
         log.exception(e)
         return False
     return True
Exemplo n.º 6
0
    def _upload(self, event, dropbox_path):
        if event.isdir:
            if event.type != 'CREATE': return
            try:
                self.client.file_create_folder(dropbox_path)
            except dropbox.rest.ErrorResponse as e:
                log.exception(e)
            finally: return

        with open(event.source_absolute, 'rb') as file:
            self._put_file(file, event.source_absolute, dropbox_path)
Exemplo n.º 7
0
def initialize_database(path=MASTER_DOC):
    global models

    CSVModel.clear()

    csv_docs = xls_parse_from_url(path)
    log.info('Downloaded %s' % path)
    model_instances = {}
    for k, doc in csv_docs.iteritems():
        if k in ['IDMap', 'AllScenarios']:
            continue
        try:
            csv_model = CSVModel(doc).create_model(k)
            models[csv_model.__name__] = csv_model
            model_instances[k] = csv_model.from_csv(doc)
            log.info("Parsed sheet %s" % k)
        except ArgumentError:
            log.exception("Couldn't load %s" % k)
            continue
        except TypeError:
            log.exception("Couldn't load %s" % k)
            continue
    # We want a late load so that the order is preserved and deterministic
    from model.refs.parameter_ref import ParameterRef

    log.info('Dropping view')
    drop_dp_view(engine)
    drop_view(engine)
    CSVModel.drop_all(engine)
    CSVModel.create_all(engine)
    log.info('Creating view')
    initialize_view(engine)
    initialize_dp_view(engine)

    for k, v in model_instances.iteritems():
        for inst in v:
            session.add(inst)
            try:
                session.commit()
            except Exception as e:
                session.rollback()
                from traceback import print_exc
                print_exc(e)
        log.info("Initialized %s" % k)
    log.info("Initializing Parameter References and Associations")
    pdicts = [(pdict.scenario, pdict.id, pdict.parameter_ids)
              for pdict in model_instances['ParameterDictionary']]
    log.info("Loaded ParameterDictionary into memory")
    params = {p.id: p.scenario for p in model_instances['ParameterDefs']}
    log.info("Loaded Parameters into Memory")
    if engine.name == 'postgresql':
        speedy_parameter_load(pdicts, params)
    else:
        linear_parameter_load(pdicts, params, session)
Exemplo n.º 8
0
def initialize_database(path=MASTER_DOC):
    global models 

    CSVModel.clear()

    csv_docs = xls_parse_from_url(path)
    log.info('Downloaded %s' % path)
    model_instances = {}
    for k,doc in csv_docs.iteritems():
        if k in ['IDMap', 'AllScenarios']:
            continue
        try:
            csv_model = CSVModel(doc).create_model(k)
            models[csv_model.__name__] = csv_model
            model_instances[k] = csv_model.from_csv(doc)
            log.info("Parsed sheet %s" % k)
        except ArgumentError:
            log.exception("Couldn't load %s" % k)
            continue
        except TypeError:
            log.exception("Couldn't load %s" % k)
            continue
    # We want a late load so that the order is preserved and deterministic
    from model.refs.parameter_ref import ParameterRef

    log.info('Dropping view')
    drop_dp_view(engine)
    drop_view(engine)
    CSVModel.drop_all(engine)
    CSVModel.create_all(engine)
    log.info('Creating view')
    initialize_view(engine)
    initialize_dp_view(engine)

    for k,v in model_instances.iteritems():
        for inst in v:
            session.add(inst)
            try:
                session.commit()
            except Exception as e:
                session.rollback()
                from traceback import print_exc
                print_exc(e)
        log.info("Initialized %s" % k)
    log.info("Initializing Parameter References and Associations")
    pdicts = [(pdict.scenario, pdict.id, pdict.parameter_ids) for pdict in model_instances['ParameterDictionary']]
    log.info("Loaded ParameterDictionary into memory")
    params = {p.id : p.scenario for p in model_instances['ParameterDefs']}
    log.info("Loaded Parameters into Memory")
    if engine.name == 'postgresql':
        speedy_parameter_load(pdicts, params)
    else:
        linear_parameter_load(pdicts, params, session)
Exemplo n.º 9
0
def initialize_saf(database='data/objects_20131126_112742.xls'):
    global models

    CSVModel.clear()

    csv_docs = xls_parse_from_url(database)
    log.info('Loaded %s' % database)

    model_instances = {}
    for k,doc in csv_docs.iteritems():
        try:
            csv_model = CSVModel(doc).create_model('saf_%s' % k)
            models[csv_model.__name__] = csv_model
            model_instances[csv_model.__name__] = csv_model.from_csv(doc)
            log.info("Parsed sheet %s" % k)
        except ArgumentError:
            log.exception("Couldn't load %s" % k)
            continue
        except TypeError:
            log.exception("Couldn't load %s" % k)
            continue
    from model.refs.saf_instrument_ref import SAFInstrumentRef

    log.info("Dropping SAF Views")
    drop_saf_instrument_view(engine)
    drop_qc_view(engine)
    log.info("Dropping SAF Models")
    CSVModel.drop_all(engine)
    log.info("Creating SAF Models")
    CSVModel.create_all(engine)
    log.info("Creating SAF Views")
    initialize_saf_instrument_view(engine)
    initialize_qc_view(engine)
    
    for k,v in model_instances.iteritems():
        for inst in v:
            session.add(inst)
            try:
                session.commit()
            except Exception as e:
                session.rollback()
                from traceback import print_exc
                print_exc(e)
                raise
        log.info('Initialized %s' % k)
    log.info('Initialized SAF Data instances')
    instruments = model_instances['saf_instrument']
    instruments = [(i.id, i.data_product_list) for i in instruments]
    log.info("Loaded instruments into memory")
    if engine.name == 'postgresql':
        speedy_saf_ref(instruments)
    else:
        linear_saf_ref(instruments, session)
Exemplo n.º 10
0
def initialize_saf(database='data/objects_20131126_112742.xls'):
    global models

    CSVModel.clear()

    csv_docs = xls_parse_from_url(database)
    log.info('Loaded %s' % database)

    model_instances = {}
    for k, doc in csv_docs.iteritems():
        try:
            csv_model = CSVModel(doc).create_model('saf_%s' % k)
            models[csv_model.__name__] = csv_model
            model_instances[csv_model.__name__] = csv_model.from_csv(doc)
            log.info("Parsed sheet %s" % k)
        except ArgumentError:
            log.exception("Couldn't load %s" % k)
            continue
        except TypeError:
            log.exception("Couldn't load %s" % k)
            continue
    from model.refs.saf_instrument_ref import SAFInstrumentRef

    log.info("Dropping SAF Views")
    drop_saf_instrument_view(engine)
    drop_qc_view(engine)
    log.info("Dropping SAF Models")
    CSVModel.drop_all(engine)
    log.info("Creating SAF Models")
    CSVModel.create_all(engine)
    log.info("Creating SAF Views")
    initialize_saf_instrument_view(engine)
    initialize_qc_view(engine)

    for k, v in model_instances.iteritems():
        for inst in v:
            session.add(inst)
            try:
                session.commit()
            except Exception as e:
                session.rollback()
                from traceback import print_exc
                print_exc(e)
                raise
        log.info('Initialized %s' % k)
    log.info('Initialized SAF Data instances')
    instruments = model_instances['saf_instrument']
    instruments = [(i.id, i.data_product_list) for i in instruments]
    log.info("Loaded instruments into memory")
    if engine.name == 'postgresql':
        speedy_saf_ref(instruments)
    else:
        linear_saf_ref(instruments, session)
Exemplo n.º 11
0
def linear_saf_ref(instances,session):
    from model.refs.saf_instrument_ref import SAFInstrumentRef
    for i_id, dp_ids in instances:
        dp_ids.replace(' ', '')
        dp_ids = dp_ids.split(',')
        for dp_id in dp_ids:
            inst_ref = SAFInstrumentRef(instrument_id=i_id, data_product_id=dp_id)
            session.add(inst_ref)
            try:
                session.commit()
            except:
                log.exception("Couldn't load reference")
                session.rollback()
Exemplo n.º 12
0
def linear_parameter_load(pdicts, params, session):
    from model.refs.parameter_ref import ParameterRef
    for pdict_scenario, pdict_id, pdict_parameter_ids in pdicts:
        param_ids = pdict_parameter_ids.replace(' ', '') # strip white space,
        param_ids = param_ids.split(',')
        for param_id in param_ids:
            param_scenario = params[param_id]
            pref = ParameterRef(pdict_id=pdict_id, pdict_scenario=pdict_scenario, param_id=param_id, param_scenario=param_scenario)
            session.add(pref)
            try:
                session.commit()
            except:
                log.exception("Couldn't load reference")
                session.rollback()
Exemplo n.º 13
0
    def login_on(self, driver):
        """验证QQ空间是否登录"""
        log.run().debug("执行Like.login_on()")  # 打印日志
        try:
            driver.find_element_by_id('QZ_Toolbar_Container')
            log.run().debug("QQ已登录")  # 打印日志

            return driver
        except Exception as login_02:
            log.exception().exception(login_02)  # 打印日志
            log.info().info("QQ未登录")  # 打印日志

            self.screenshot(driver)
            return 'error'
Exemplo n.º 14
0
def linear_saf_ref(instances, session):
    from model.refs.saf_instrument_ref import SAFInstrumentRef
    for i_id, dp_ids in instances:
        dp_ids.replace(' ', '')
        dp_ids = dp_ids.split(',')
        for dp_id in dp_ids:
            inst_ref = SAFInstrumentRef(instrument_id=i_id,
                                        data_product_id=dp_id)
            session.add(inst_ref)
            try:
                session.commit()
            except:
                log.exception("Couldn't load reference")
                session.rollback()
Exemplo n.º 15
0
def get_metadata(full_name):
    metadata = None
    try:
        parser = createParser(full_name)
        metadata = extractMetadata(parser)
        if parser:
            parser.stream._input.close()
            del parser
    except hachoir.stream.input.InputStreamError:
        ## is directory
        metadata = None
    except Exception as err:
        log.exception(err)
        log.error("Cannot extract metadata")
        metadata = None
    finally:
        return metadata
Exemplo n.º 16
0
def linear_parameter_load(pdicts, params, session):
    from model.refs.parameter_ref import ParameterRef
    for pdict_scenario, pdict_id, pdict_parameter_ids in pdicts:
        param_ids = pdict_parameter_ids.replace(' ', '')  # strip white space,
        param_ids = param_ids.split(',')
        for param_id in param_ids:
            param_scenario = params[param_id]
            pref = ParameterRef(pdict_id=pdict_id,
                                pdict_scenario=pdict_scenario,
                                param_id=param_id,
                                param_scenario=param_scenario)
            session.add(pref)
            try:
                session.commit()
            except:
                log.exception("Couldn't load reference")
                session.rollback()
Exemplo n.º 17
0
    def mount_compat(self, mode="ro"):
        status = True

        try:
            sh.mount("-o", "{0},{1}".format(mode, self.MOUNT_OPTIONS),
                     self.path, self.output_path)
        except sh.ErrorReturnCode as e:
            log.debug("Legacy re-mount opts for {0}".format(self))
            try:
                sh.mount("-o", "{0}".format(mode), self.path, self.output_path)
            except:
                try:
                    sh.mount(self.path, self.output_path)
                except Exception as e:
                    log.error("Cannot mount : {0}".format(self))
                    log.exception(e)
                    status = False
        return status
Exemplo n.º 18
0
    def praise(self, driver):
        """点赞部分"""
        log.run().debug("执行Like.praise()")  # 打印日志
        log.info().info("查找未点赞目标")
        driver.refresh()  # 刷新
        driver.implicitly_wait(10)

        if self.login_on(driver) != 'error':  # 检查是否在线

            # 判断自己是否点赞
            try:

                praise_person = driver.find_element_by_xpath(
                    '//*[@id="feed_friend_list"]/li[1]/div[@class="f-single-foot"]/div[@class="f-like-list f-like _likeInfo"]/div[@class="user-list"]/a'
                ).get_attribute("class")
                if praise_person == 'item _ownerlike q_namecard':
                    result = False
                else:
                    result = True
            except:
                result = True
                log.info().info("找到目标,准备点赞")  # 打印日志
            # 判断是否点赞
            try:
                if result:
                    driver.find_element_by_css_selector(
                        "[class='fui-icon icon-op-praise']").click()
                    log.info().info("点赞成功")  # 打印日志
            except Exception as e106:

                log.exception().exception(e106)  # 打印日志
                log.error().error("点赞发生错误")  # 打印日志

            time.sleep(3)
            return driver
        else:
            time.sleep(60)
            log.info().info("正尝试重新登录")  # 打印日志
            driver_now = self.login()  # 重新登录,

            if driver_now != 'error':  # 若成功则继续点赞
                return driver
            else:
                return 'error'
Exemplo n.º 19
0
    def create_file(self, path, parent=None):
        file_obj = None
        #if stat.S_ISFIFO(os.stat(path).st_mode) or stat.S_ISCHR(os.stat(path).st_mod):
        if stat.S_ISFIFO(os.stat(path).st_mode) or stat.S_ISCHR(
                os.stat(path).st_mode):
            return None

        magic_str, mime_str = self.get_file_magic(path)
        metadata = get_metadata(path)

        for regex, file_class in self.CONTAINER_TYPES_MAP.items():
            if file_class and re.search(regex, magic_str, flags=re.IGNORECASE):
                try:
                    file_obj = file_class(path,
                                          magic_str=magic_str,
                                          mime_type=mime_str,
                                          metadata=metadata,
                                          parent=parent)
                    break
                except IncompatibleFS:
                    log.error(
                        "Attempted to create filesystem from block device without success"
                    )
                    pass

        if not file_obj:
            for regex, file_class in self.MIME_TYPES_MAP.items():
                if file_class and re.search(
                        regex, mime_str, flags=re.IGNORECASE):
                    try:
                        file_obj = file_class(path,
                                              magic_str=magic_str,
                                              mime_type=mime_str,
                                              metadata=metadata,
                                              parent=parent)
                        break
                    except Exception as e:
                        log.exception(e)
                        pass

        if not file_obj:
            file_obj = Data(path, magic_str)

        return file_obj
Exemplo n.º 20
0
    def organize(self, mfile, root_call=True):
        loaded_mfiles = set()

        if root_call:
            log.info("Organizing {0}".format(mfile))
        try:
            if mfile.load():
                log.info("Organizing childs of {0}".format(mfile))
                if mfile.is_source_container():
                    log.debug("{0} is source".format(mfile.path))
                    dump_dir_path = os.path.join(WORK_DIR, SW_PROJECTS_OUTPUT)
                    sh.mkdir("-p", dump_dir_path)

                    dump_dir_path = sh.mktemp(
                        "-d", "-p", dump_dir_path, "--suffix",
                        os.path.basename(mfile.path)).stdout.strip()
                    try:
                        sh.rsync("-rat", mfile.path, dump_dir_path)
                    except sh.ErrorReturnCode_23:
                        ## Rsync errs related with attrs or others
                        pass

                else:
                    loaded_mfiles.add(mfile)
                    #self.dive(mfile)
                    p = Process(target=Organizer.dive, args=[self, mfile])
                    p.start()
                    p.join()
            else:
                destination_path = self.index.put_file(mfile.path)
                metapath_file = open(
                    "{}.{}".format(destination_path, METAFPATHFILE), 'ab')
                metapath_file.write(bytes(mfile.path + "\n", 'utf8'))
                metapath_file.close()
                try:
                    ordered_path = mfile.get_ordered_path()
                    sh.mkdir("-p", os.path.join(ordered_path, 'NoMeta'))
                    fname = os.path.basename(mfile.path)
                    destination_fname = os.path.basename(destination_path)
                    for link in mfile.gen_ordered_paths():
                        log.debug("{} to {}".format(mfile.path, link))
                        sh.mkdir("-p", link)
                        try:
                            has_ext = re.search(r"(\..*)", fname)
                            extension = has_ext.group(1)
                            link = os.path.join(
                                link, u"{0}{1}".format(destination_fname,
                                                       extension))
                        except AttributeError:
                            link = os.path.join(
                                link, u"{0}".format(destination_fname))
                        log.info(u"File {0} @ {1}".format(
                            str(mfile), ordered_path))
                        sh.ln("-s", destination_path, link)
                except sh.ErrorReturnCode_1:
                    pass
                except sh.ErrorReturnCode as e:
                    log.exception(e)
        except Exception as e:
            log.error("Organizer error {0}".format(mfile.path))
            log.exception(e)
        finally:
            for loaded_mfile in loaded_mfiles:
                try:
                    loaded_mfile.unload()
                except Exception as e:
                    log.error("Error unloading {0}".format(mfile.path))
                    log.exception(e)
            return True
Exemplo n.º 21
0
    def deal_article_list(self, req_url, text):
        """
        @summary: 获取文章列表
        分为两种
            1、第一次查看历史消息 返回的是html格式 包含公众号信息
            2、下拉显示更多时 返回json格式
        但是文章列表都是json格式 且合适相同
        抓取思路:
        1、如果是第一种格式,直接解析文章内容,拼接下一页json格式的地址
        2、如果是第二种格式,
        ---------
        @param data:
        ---------
        @result:
        """
        try:
            # 判断是否为被封的账号, 被封账号没有文章列表
            __biz = tools.get_param(req_url, "__biz")

            if "list" in text:
                # 取html格式里的文章列表
                if "action=home" in req_url:
                    # 解析公众号信息
                    self.__parse_account_info(text, req_url)

                    # 解析文章列表
                    regex = "msgList = '(.*?})';"
                    article_list = tools.get_info(text, regex, fetch_one=True)
                    article_list = article_list.replace("&quot;", '"')
                    publish_time = self.__parse_article_list(
                        article_list, __biz, is_first_page=True)

                    # 判断是否还有更多文章 没有跳转到下个公众号,有则下拉显示更多
                    regex = "can_msg_continue = '(\d)'"
                    can_msg_continue = tools.get_info(text,
                                                      regex,
                                                      fetch_one=True)
                    if can_msg_continue == "0":  # 无更多文章
                        log.info("抓取到列表底部 无更多文章,公众号 {} 抓取完毕".format(__biz))
                        new_last_publish_time = self._task_manager.get_new_last_article_publish_time(
                            __biz)
                        if not new_last_publish_time:
                            # 标记成僵尸号
                            log.info("公众号 {} 为僵尸账号 不再监控".format(__biz))
                            self._task_manager.sign_account_is_zombie(__biz)
                        else:
                            self._task_manager.update_account_last_publish_time(
                                __biz, new_last_publish_time)

                    elif publish_time:
                        # 以下是拼接下拉显示更多的历史文章 跳转
                        # 取appmsg_token 在html中
                        regex = 'appmsg_token = "(.*?)";'
                        appmsg_token = tools.get_info(text,
                                                      regex,
                                                      fetch_one=True)

                        # 取其他参数  在url中
                        __biz = tools.get_param(req_url, "__biz")
                        pass_ticket = tools.get_param(req_url, "pass_ticket")

                        next_page_url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz={__biz}&f=json&offset={offset}&count=10&is_ok=1&scene=124&uin=777&key=777&pass_ticket={pass_ticket}&wxtoken=&appmsg_token={appmsg_token}&x5=0&f=json".format(
                            __biz=__biz,
                            offset=10,
                            pass_ticket=pass_ticket,
                            appmsg_token=appmsg_token,
                        )
                        return self._task_manager.get_task(
                            next_page_url,
                            tip="正在抓取列表 next_offset {} 抓取到 {}".format(
                                10, publish_time),
                        )

                else:  # json格式
                    text = tools.get_json(text)
                    article_list = text.get("general_msg_list", {})
                    publish_time = self.__parse_article_list(
                        article_list, __biz)

                    # 判断是否还有更多文章 没有跳转到下个公众号,有则下拉显示更多
                    can_msg_continue = text.get("can_msg_continue")
                    if not can_msg_continue:  # 无更多文章
                        log.info("抓取到列表底部 无更多文章,公众号 {} 抓取完毕".format(__biz))
                        new_last_publish_time = self._task_manager.get_new_last_article_publish_time(
                            __biz)
                        self._task_manager.update_account_last_publish_time(
                            __biz, new_last_publish_time)
                        pass

                    elif publish_time:
                        # 以下是拼接下拉显示更多的历史文章 跳转
                        # 取参数  在url中
                        __biz = tools.get_param(req_url, "__biz")
                        pass_ticket = tools.get_param(req_url, "pass_ticket")
                        appmsg_token = tools.get_param(req_url, "appmsg_token")

                        # 取offset 在json中
                        offset = text.get("next_offset", 0)

                        next_page_url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz={__biz}&f=json&offset={offset}&count=10&is_ok=1&scene=124&uin=777&key=777&pass_ticket={pass_ticket}&wxtoken=&appmsg_token={appmsg_token}&x5=0&f=json".format(
                            __biz=__biz,
                            offset=offset,
                            pass_ticket=pass_ticket,
                            appmsg_token=appmsg_token,
                        )
                        return self._task_manager.get_task(
                            next_page_url,
                            tip="正在抓取列表 next_offset {} 抓取到 {}".format(
                                offset, publish_time),
                        )

            else:  # 该__biz 账号已被封
                self._task_manager.sign_account_is_zombie(__biz)
                pass

        except Exception as e:
            log.exception(e)

        return self._task_manager.get_task()
Exemplo n.º 22
0
config.quiet = True


def work(path):
    ffactory = FileFactory()
    organizer = Organizer()
    mfile = ffactory.create_file(path)
    organizer.organize(mfile)


work_queue = Queue()


def worker_loop():
    path = work_queue.get()
    log.info("Ordering {}".format(path))
    work(path)
    log.info("Finished {}".format(path))


if __name__ == "__main__":

    for param in sys.argv[1:]:
        try:
            p = Process(target=work, args=(param, ))
            p.start()
            p.join()
        except Exception as e:
            log.exception(e)

    log.info("Finished all tasks")