def main(args, stdout, stderr, api_client=None, keep_client=None, install_sig_handlers=True): parser = arg_parser() job_order_object = None arvargs = parser.parse_args(args) if len(arvargs.storage_classes.strip().split(',')) > 1: logger.error(str(u"Multiple storage classes are not supported currently.")) return 1 arvargs.use_container = True arvargs.relax_path_checks = True arvargs.print_supported_versions = False if install_sig_handlers: arv_cmd.install_signal_handlers() if arvargs.update_workflow: if arvargs.update_workflow.find('-7fd4e-') == 5: want_api = 'containers' elif arvargs.update_workflow.find('-p5p6p-') == 5: want_api = 'jobs' else: want_api = None if want_api and arvargs.work_api and want_api != arvargs.work_api: logger.error(str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified').format( arvargs.update_workflow, want_api, arvargs.work_api)) return 1 arvargs.work_api = want_api if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order: job_order_object = ({}, "") add_arv_hints() for key, val in viewitems(cwltool.argparser.get_default_args()): if not hasattr(arvargs, key): setattr(arvargs, key, val) try: if api_client is None: api_client = arvados.safeapi.ThreadSafeApiCache( api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout}, keep_params={"num_retries": 4}) keep_client = api_client.keep # Make an API object now so errors are reported early. api_client.users().current().execute() if keep_client is None: keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4) executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4) except Exception: logger.exception("Error creating the Arvados CWL Executor") return 1 # Note that unless in debug mode, some stack traces related to user # workflow errors may be suppressed. See ArvadosJob.done(). if arvargs.debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) if arvargs.quiet: logger.setLevel(logging.WARN) logging.getLogger('arvados').setLevel(logging.WARN) logging.getLogger('arvados.arv-run').setLevel(logging.WARN) if arvargs.metrics: metrics.setLevel(logging.DEBUG) logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG) if arvargs.log_timestamps: arvados.log_handler.setFormatter(logging.Formatter( '%(asctime)s %(name)s %(levelname)s: %(message)s', '%Y-%m-%d %H:%M:%S')) else: arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) return cwltool.main.main(args=arvargs, stdout=stdout, stderr=stderr, executor=executor.arv_executor, versionfunc=versionstring, job_order_object=job_order_object, logger_handler=arvados.log_handler, custom_schema_callback=add_arv_hints, loadingContext=executor.loadingContext, runtimeContext=executor.runtimeContext)
def main(args, stdout, stderr, api_client=None, keep_client=None, install_sig_handlers=True): parser = arg_parser() job_order_object = None arvargs = parser.parse_args(args) if len(arvargs.storage_classes.strip().split(',')) > 1: logger.error(str(u"Multiple storage classes are not supported currently.")) return 1 arvargs.use_container = True arvargs.relax_path_checks = True arvargs.print_supported_versions = False if install_sig_handlers: arv_cmd.install_signal_handlers() if arvargs.update_workflow: if arvargs.update_workflow.find('-7fd4e-') == 5: want_api = 'containers' else: want_api = None if want_api and arvargs.work_api and want_api != arvargs.work_api: logger.error(str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified').format( arvargs.update_workflow, want_api, arvargs.work_api)) return 1 arvargs.work_api = want_api if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order: job_order_object = ({}, "") add_arv_hints() for key, val in viewitems(cwltool.argparser.get_default_args()): if not hasattr(arvargs, key): setattr(arvargs, key, val) try: if api_client is None: api_client = arvados.safeapi.ThreadSafeApiCache( api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout}, keep_params={"num_retries": 4}) keep_client = api_client.keep # Make an API object now so errors are reported early. api_client.users().current().execute() if keep_client is None: keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4) executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4) except Exception: logger.exception("Error creating the Arvados CWL Executor") return 1 # Note that unless in debug mode, some stack traces related to user # workflow errors may be suppressed. if arvargs.debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) if arvargs.quiet: logger.setLevel(logging.WARN) logging.getLogger('arvados').setLevel(logging.WARN) logging.getLogger('arvados.arv-run').setLevel(logging.WARN) if arvargs.metrics: metrics.setLevel(logging.DEBUG) logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG) if arvargs.log_timestamps: arvados.log_handler.setFormatter(logging.Formatter( '%(asctime)s %(name)s %(levelname)s: %(message)s', '%Y-%m-%d %H:%M:%S')) else: arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) if stdout is sys.stdout: # cwltool.main has code to work around encoding issues with # sys.stdout and unix pipes (they default to ASCII encoding, # we want utf-8), so when stdout is sys.stdout set it to None # to take advantage of that. Don't override it for all cases # since we still want to be able to capture stdout for the # unit tests. stdout = None return cwltool.main.main(args=arvargs, stdout=stdout, stderr=stderr, executor=executor.arv_executor, versionfunc=versionstring, job_order_object=job_order_object, logger_handler=arvados.log_handler, custom_schema_callback=add_arv_hints, loadingContext=executor.loadingContext, runtimeContext=executor.runtimeContext, input_required=not (arvargs.create_workflow or arvargs.update_workflow))
def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, install_sig_handlers=True): global api_client args = parse_arguments(arguments) logger = logging.getLogger('arvados.arv_put') if args.silent: logger.setLevel(logging.WARNING) else: logger.setLevel(logging.INFO) status = 0 request_id = arvados.util.new_request_id() formatter = ArvPutLogFormatter(request_id) logging.getLogger('arvados').handlers[0].setFormatter(formatter) if api_client is None: api_client = arvados.api('v1', request_id=request_id) if install_sig_handlers: arv_cmd.install_signal_handlers() # Determine the name to use if args.name: if args.stream or args.raw: logger.error("Cannot use --name with --stream or --raw") sys.exit(1) elif args.update_collection: logger.error("Cannot use --name with --update-collection") sys.exit(1) collection_name = args.name else: collection_name = "Saved at {} by {}@{}".format( datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"), pwd.getpwuid(os.getuid()).pw_name, socket.gethostname()) if args.project_uuid and (args.stream or args.raw): logger.error("Cannot use --project-uuid with --stream or --raw") sys.exit(1) # Determine the parent project try: project_uuid = desired_project_uuid(api_client, args.project_uuid, args.retries) except (apiclient_errors.Error, ValueError) as error: logger.error(error) sys.exit(1) if args.progress: reporter = progress_writer(human_progress) elif args.batch_progress: reporter = progress_writer(machine_progress) else: reporter = None # Split storage-classes argument storage_classes = None if args.storage_classes: storage_classes = args.storage_classes.strip().split(',') if len(storage_classes) > 1: logger.error("Multiple storage classes are not supported currently.") sys.exit(1) # Setup exclude regex from all the --exclude arguments provided name_patterns = [] exclude_paths = [] exclude_names = None if len(args.exclude) > 0: # We're supporting 2 kinds of exclusion patterns: # 1) --exclude '*.jpg' (file/dir name patterns, will only match # the name, wherever the file is on the tree) # 2.1) --exclude 'foo/bar' (file/dir path patterns, will match the # entire path, and should be relative to # any input dir argument) # 2.2) --exclude './*.jpg' (Special case for excluding files/dirs # placed directly underneath the input dir) for p in args.exclude: # Only relative paths patterns allowed if p.startswith(os.sep): logger.error("Cannot use absolute paths with --exclude") sys.exit(1) if os.path.dirname(p): # We don't support of path patterns with '..' p_parts = p.split(os.sep) if '..' in p_parts: logger.error( "Cannot use path patterns that include or '..'") sys.exit(1) # Path search pattern exclude_paths.append(p) else: # Name-only search pattern name_patterns.append(p) # For name only matching, we can combine all patterns into a single # regexp, for better performance. exclude_names = re.compile('|'.join( [fnmatch.translate(p) for p in name_patterns] )) if len(name_patterns) > 0 else None # Show the user the patterns to be used, just in case they weren't # specified inside quotes and got changed by the shell expansion. logger.info("Exclude patterns: {}".format(args.exclude)) # If this is used by a human, and there's at least one directory to be # uploaded, the expected bytes calculation can take a moment. if args.progress and any([os.path.isdir(f) for f in args.paths]): logger.info("Calculating upload size, this could take some time...") try: writer = ArvPutUploadJob(paths = args.paths, resume = args.resume, use_cache = args.use_cache, filename = args.filename, reporter = reporter, api_client = api_client, num_retries = args.retries, replication_desired = args.replication, put_threads = args.threads, name = collection_name, owner_uuid = project_uuid, ensure_unique_name = True, update_collection = args.update_collection, storage_classes=storage_classes, logger=logger, dry_run=args.dry_run, follow_links=args.follow_links, exclude_paths=exclude_paths, exclude_names=exclude_names) except ResumeCacheConflict: logger.error("\n".join([ "arv-put: Another process is already uploading this data.", " Use --no-cache if this is really what you want."])) sys.exit(1) except ResumeCacheInvalidError: logger.error("\n".join([ "arv-put: Resume cache contains invalid signature: it may have expired", " or been created with another Arvados user's credentials.", " Switch user or use one of the following options to restart upload:", " --no-resume to start a new resume cache.", " --no-cache to disable resume cache."])) sys.exit(1) except CollectionUpdateError as error: logger.error("\n".join([ "arv-put: %s" % str(error)])) sys.exit(1) except ArvPutUploadIsPending: # Dry run check successful, return proper exit code. sys.exit(2) except ArvPutUploadNotPending: # No files pending for upload sys.exit(0) except PathDoesNotExistError as error: logger.error("\n".join([ "arv-put: %s" % str(error)])) sys.exit(1) if not args.dry_run and not args.update_collection and args.resume and writer.bytes_written > 0: logger.warning("\n".join([ "arv-put: Resuming previous upload from last checkpoint.", " Use the --no-resume option to start over."])) if not args.dry_run: writer.report_progress() output = None try: writer.start(save_collection=not(args.stream or args.raw)) except arvados.errors.ApiError as error: logger.error("\n".join([ "arv-put: %s" % str(error)])) sys.exit(1) if args.progress: # Print newline to split stderr from stdout for humans. logger.info("\n") if args.stream: if args.normalize: output = writer.manifest_text(normalize=True) else: output = writer.manifest_text() elif args.raw: output = ','.join(writer.data_locators()) else: try: if args.update_collection: logger.info(u"Collection updated: '{}'".format(writer.collection_name())) else: logger.info(u"Collection saved as '{}'".format(writer.collection_name())) if args.portable_data_hash: output = writer.portable_data_hash() else: output = writer.manifest_locator() except apiclient_errors.Error as error: logger.error( "arv-put: Error creating Collection on project: {}.".format( error)) status = 1 # Print the locator (uuid) of the new collection. if output is None: status = status or 1 elif not args.silent: stdout.write(output) if not output.endswith('\n'): stdout.write('\n') if install_sig_handlers: arv_cmd.restore_signal_handlers() if status != 0: sys.exit(status) # Success! return output
def main(args, stdout, stderr, api_client=None, keep_client=None, install_sig_handlers=True): parser = arg_parser() job_order_object = None arvargs = parser.parse_args(args) if len(arvargs.storage_classes.strip().split(',')) > 1: logger.error( str(u"Multiple storage classes are not supported currently.")) return 1 arvargs.use_container = True arvargs.relax_path_checks = True arvargs.print_supported_versions = False if install_sig_handlers: arv_cmd.install_signal_handlers() if arvargs.update_workflow: if arvargs.update_workflow.find('-7fd4e-') == 5: want_api = 'containers' elif arvargs.update_workflow.find('-p5p6p-') == 5: want_api = 'jobs' else: want_api = None if want_api and arvargs.work_api and want_api != arvargs.work_api: logger.error( str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified' ).format(arvargs.update_workflow, want_api, arvargs.work_api)) return 1 arvargs.work_api = want_api if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order: job_order_object = ({}, "") add_arv_hints() for key, val in viewitems(cwltool.argparser.get_default_args()): if not hasattr(arvargs, key): setattr(arvargs, key, val) try: if api_client is None: api_client = arvados.safeapi.ThreadSafeApiCache( api_params={ "model": OrderedJsonModel(), "timeout": arvargs.http_timeout }, keep_params={"num_retries": 4}) keep_client = api_client.keep # Make an API object now so errors are reported early. api_client.users().current().execute() if keep_client is None: keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4) executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4) except Exception as e: logger.error(e) return 1 if arvargs.debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) if arvargs.quiet: logger.setLevel(logging.WARN) logging.getLogger('arvados').setLevel(logging.WARN) logging.getLogger('arvados.arv-run').setLevel(logging.WARN) if arvargs.metrics: metrics.setLevel(logging.DEBUG) logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG) if arvargs.log_timestamps: arvados.log_handler.setFormatter( logging.Formatter( '%(asctime)s %(name)s %(levelname)s: %(message)s', '%Y-%m-%d %H:%M:%S')) else: arvados.log_handler.setFormatter( logging.Formatter('%(name)s %(levelname)s: %(message)s')) return cwltool.main.main(args=arvargs, stdout=stdout, stderr=stderr, executor=executor.arv_executor, versionfunc=versionstring, job_order_object=job_order_object, logger_handler=arvados.log_handler, custom_schema_callback=add_arv_hints, loadingContext=executor.loadingContext, runtimeContext=executor.runtimeContext)
def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, install_sig_handlers=True): global api_client args = parse_arguments(arguments) logger = logging.getLogger('arvados.arv_put') if args.silent: logger.setLevel(logging.WARNING) else: logger.setLevel(logging.INFO) status = 0 request_id = arvados.util.new_request_id() formatter = ArvPutLogFormatter(request_id) logging.getLogger('arvados').handlers[0].setFormatter(formatter) if api_client is None: api_client = arvados.api('v1', request_id=request_id) if install_sig_handlers: arv_cmd.install_signal_handlers() # Determine the name to use if args.name: if args.stream or args.raw: logger.error("Cannot use --name with --stream or --raw") sys.exit(1) elif args.update_collection: logger.error("Cannot use --name with --update-collection") sys.exit(1) collection_name = args.name else: collection_name = "Saved at {} by {}@{}".format( datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"), pwd.getpwuid(os.getuid()).pw_name, socket.gethostname()) if args.project_uuid and (args.stream or args.raw): logger.error("Cannot use --project-uuid with --stream or --raw") sys.exit(1) # Determine the parent project try: project_uuid = desired_project_uuid(api_client, args.project_uuid, args.retries) except (apiclient_errors.Error, ValueError) as error: logger.error(error) sys.exit(1) if args.progress: reporter = progress_writer(human_progress) elif args.batch_progress: reporter = progress_writer(machine_progress) else: reporter = None # Split storage-classes argument storage_classes = None if args.storage_classes: storage_classes = args.storage_classes.strip().split(',') if len(storage_classes) > 1: logger.error("Multiple storage classes are not supported currently.") sys.exit(1) # Setup exclude regex from all the --exclude arguments provided name_patterns = [] exclude_paths = [] exclude_names = None if len(args.exclude) > 0: # We're supporting 2 kinds of exclusion patterns: # 1) --exclude '*.jpg' (file/dir name patterns, will only match # the name, wherever the file is on the tree) # 2.1) --exclude 'foo/bar' (file/dir path patterns, will match the # entire path, and should be relative to # any input dir argument) # 2.2) --exclude './*.jpg' (Special case for excluding files/dirs # placed directly underneath the input dir) for p in args.exclude: # Only relative paths patterns allowed if p.startswith(os.sep): logger.error("Cannot use absolute paths with --exclude") sys.exit(1) if os.path.dirname(p): # We don't support of path patterns with '..' p_parts = p.split(os.sep) if '..' in p_parts: logger.error( "Cannot use path patterns that include or '..'") sys.exit(1) # Path search pattern exclude_paths.append(p) else: # Name-only search pattern name_patterns.append(p) # For name only matching, we can combine all patterns into a single # regexp, for better performance. exclude_names = re.compile('|'.join( [fnmatch.translate(p) for p in name_patterns] )) if len(name_patterns) > 0 else None # Show the user the patterns to be used, just in case they weren't # specified inside quotes and got changed by the shell expansion. logger.info("Exclude patterns: {}".format(args.exclude)) # If this is used by a human, and there's at least one directory to be # uploaded, the expected bytes calculation can take a moment. if args.progress and any([os.path.isdir(f) for f in args.paths]): logger.info("Calculating upload size, this could take some time...") try: writer = ArvPutUploadJob(paths = args.paths, resume = args.resume, use_cache = args.use_cache, filename = args.filename, reporter = reporter, api_client = api_client, num_retries = args.retries, replication_desired = args.replication, put_threads = args.threads, name = collection_name, owner_uuid = project_uuid, ensure_unique_name = True, update_collection = args.update_collection, storage_classes=storage_classes, logger=logger, dry_run=args.dry_run, follow_links=args.follow_links, exclude_paths=exclude_paths, exclude_names=exclude_names) except ResumeCacheConflict: logger.error("\n".join([ "arv-put: Another process is already uploading this data.", " Use --no-cache if this is really what you want."])) sys.exit(1) except CollectionUpdateError as error: logger.error("\n".join([ "arv-put: %s" % str(error)])) sys.exit(1) except ArvPutUploadIsPending: # Dry run check successful, return proper exit code. sys.exit(2) except ArvPutUploadNotPending: # No files pending for upload sys.exit(0) except PathDoesNotExistError as error: logger.error("\n".join([ "arv-put: %s" % str(error)])) sys.exit(1) if not args.dry_run and not args.update_collection and args.resume and writer.bytes_written > 0: logger.warning("\n".join([ "arv-put: Resuming previous upload from last checkpoint.", " Use the --no-resume option to start over."])) if not args.dry_run: writer.report_progress() output = None try: writer.start(save_collection=not(args.stream or args.raw)) except arvados.errors.ApiError as error: logger.error("\n".join([ "arv-put: %s" % str(error)])) sys.exit(1) if args.progress: # Print newline to split stderr from stdout for humans. logger.info("\n") if args.stream: if args.normalize: output = writer.manifest_text(normalize=True) else: output = writer.manifest_text() elif args.raw: output = ','.join(writer.data_locators()) else: try: if args.update_collection: logger.info("Collection updated: '{}'".format(writer.collection_name())) else: logger.info("Collection saved as '{}'".format(writer.collection_name())) if args.portable_data_hash: output = writer.portable_data_hash() else: output = writer.manifest_locator() except apiclient_errors.Error as error: logger.error( "arv-put: Error creating Collection on project: {}.".format( error)) status = 1 # Print the locator (uuid) of the new collection. if output is None: status = status or 1 elif not args.silent: stdout.write(output) if not output.endswith('\n'): stdout.write('\n') if install_sig_handlers: arv_cmd.restore_signal_handlers() if status != 0: sys.exit(status) # Success! return output