Пример #1
0
wpull_args = WpullArgs(
    default_user_agent=DEFAULT_USER_AGENT,
    wpull_exe=WPULL_EXE,
    youtube_dl_exe=YOUTUBE_DL,
    phantomjs_exe=PHANTOMJS,
    finished_warcs_dir=os.environ["FINISHED_WARCS_DIR"]
)

pipeline = Pipeline(
    CheckIP(),
    GetItemFromQueue(control, pipeline_id, downloader, ao_only=env.get('AO_ONLY')),
    StartHeartbeat(control),
    SetFetchDepth(),
    PreparePaths(),
    WriteInfo(),
    DownloadUrlFile(control),
    WgetDownload(
        wpull_args,
        accept_on_exit_code=AcceptAny(),
        env={
            'ITEM_IDENT': ItemInterpolation('%(ident)s'),
            'LOG_KEY': ItemInterpolation('%(log_key)s'),
            'REDIS_URL': REDIS_URL,
            'PATH': os.environ['PATH']
        }
    ),
    RelabelIfAborted(control),
    WriteInfo(),
    MoveFiles(),
    LimitConcurrent(2,
Пример #2
0
wpull_args = WpullArgs(default_user_agent=DEFAULT_USER_AGENT,
                       wpull_exe=WPULL_EXE,
                       youtube_dl_exe=YOUTUBE_DL,
                       phantomjs_exe=PHANTOMJS,
                       finished_warcs_dir=os.environ["FINISHED_WARCS_DIR"],
                       warc_max_size=WARC_MAX_SIZE)

pipeline = Pipeline(
    CheckIP(),
    GetItemFromQueue(control,
                     pipeline_id,
                     downloader,
                     ao_only=env.get('AO_ONLY'),
                     large=env.get('LARGE')), StartHeartbeat(control),
    SetFetchDepth(), PreparePaths(), WriteInfo(), DownloadUrlFile(control),
    WgetDownload(wpull_args,
                 accept_on_exit_code=AcceptAny(),
                 env={
                     'ITEM_IDENT': ItemInterpolation('%(ident)s'),
                     'LOG_KEY': ItemInterpolation('%(log_key)s'),
                     'REDIS_URL': REDIS_URL,
                     'PATH': os.environ['PATH']
                 }), RelabelIfAborted(control), WriteInfo(), MoveFiles(),
    LimitConcurrent(
        2,
        RsyncUpload(target=RSYNC_URL,
                    target_source_path=ItemInterpolation("%(data_dir)s"),
                    files=ItemValue("all_target_files"),
                    extra_args=['--partial', '--partial-dir', '.rsync-tmp'])),
    StopHeartbeat(), MarkItemAsDone(control, EXPIRE_TIME))
Пример #3
0
    youtube_dl_exe=YOUTUBE_DL,
    finished_warcs_dir=os.environ["FINISHED_WARCS_DIR"],
    warc_max_size=WARC_MAX_SIZE,
    monitor_disk=WPULL_MONITOR_DISK,
    monitor_memory=WPULL_MONITOR_MEMORY,
)

pipeline = Pipeline(
    CheckIP(), CheckLocalWebserver(),
    GetItemFromQueue(control,
                     pipeline_id,
                     downloader,
                     ao_only=env.get('AO_ONLY'),
                     large=env.get('LARGE'),
                     version_check=(VERSION, pipeline_version)),
    StartHeartbeat(control), SetFetchDepth(), PreparePaths(), WriteInfo(),
    DownloadUrlFile(control),
    Wpull(wpull_args,
          accept_on_exit_code=AcceptAny(),
          env={
              'ITEM_IDENT': ItemInterpolation('%(ident)s'),
              'LOG_KEY': ItemInterpolation('%(log_key)s'),
              'REDIS_URL': REDIS_URL,
              'PATH': os.environ['PATH']
          }), RelabelIfAborted(control), CompressLogIfFailed(), WriteInfo(),
    MoveFiles(target_directory=os.environ["FINISHED_WARCS_DIR"]),
    StopHeartbeat(), MarkItemAsDone(control, EXPIRE_TIME))


def stop_control():
    #control.flag_logging_thread_for_termination()