def Run(self, args): """This is what gets called when the user runs this command. Args: args: argparse.Namespace, All the arguments that were provided to this command invocation. Raises: files.Error: A file argument could not be read. GenomicsError: User input was invalid. HttpException: An http error response was received while executing api request. Returns: Operation representing the running pipeline. """ v2 = False pipeline = None apitools_client = genomics_util.GetGenomicsClient('v1alpha2') genomics_messages = genomics_util.GetGenomicsMessages('v1alpha2') if args.pipeline_file: if args.command_line: raise exceptions.GenomicsError( '--command_line cannot be used with --pipeline-file.') pipeline = genomics_util.GetFileAsMessage( args.pipeline_file, genomics_messages.Pipeline, self.context[lib.STORAGE_V1_CLIENT_KEY]) pipeline.projectId = genomics_util.GetProjectId() if not pipeline.docker: v2 = True apitools_client = genomics_util.GetGenomicsClient('v2alpha1') genomics_messages = genomics_util.GetGenomicsMessages( 'v2alpha1') pipeline = genomics_util.GetFileAsMessage( args.pipeline_file, genomics_messages.Pipeline, self.context[lib.STORAGE_V1_CLIENT_KEY]) elif args.command_line: v2 = True apitools_client = genomics_util.GetGenomicsClient('v2alpha1') genomics_messages = genomics_util.GetGenomicsMessages('v2alpha1') pipeline = genomics_messages.Pipeline(actions=[ genomics_messages.Action(imageUri=args.docker_image, commands=['-c', args.command_line], entrypoint='bash') ]) else: raise exceptions.GenomicsError( 'Either --pipeline-file or --command_line is required.') arg_inputs = _ValidateAndMergeArgInputs(args) request = None if v2: # Create messages up front to avoid checking for None everywhere. if not pipeline.resources: pipeline.resources = genomics_messages.Resources() resources = pipeline.resources if not resources.virtualMachine: resources.virtualMachine = genomics_messages.VirtualMachine( machineType='n1-standard-1') virtual_machine = resources.virtualMachine if not virtual_machine.serviceAccount: virtual_machine.serviceAccount = genomics_messages.ServiceAccount( ) # Always set the project id. resources.projectId = genomics_util.GetProjectId() # Update the pipeline based on arguments. if args.memory or args.cpus: # Default to n1-standard1 sizes. virtual_machine.machineType = 'custom-%d-%d' % ( args.cpus or 1, (args.memory or 3.84) * 1000) if args.preemptible: virtual_machine.preemptible = args.preemptible if args.zones: resources.zones = args.zones elif not resources.zones and properties.VALUES.compute.zone.Get(): resources.zones = [properties.VALUES.compute.zone.Get()] if args.regions: resources.regions = args.regions elif not resources.regions and properties.VALUES.compute.region.Get( ): resources.regions = [properties.VALUES.compute.region.Get()] if args.service_account_email != 'default': virtual_machine.serviceAccount.email = args.service_account_email if args.service_account_scopes: virtual_machine.serviceAccount.scopes = args.service_account_scopes # Always add a scope for GCS in case any arguments need it. virtual_machine.serviceAccount.scopes.append( 'https://www.googleapis.com/auth/devstorage.read_write') # Generate paths for inputs and outputs in a shared location and put them # into the environment for actions based on their name. env = {} if arg_inputs: input_generator = _SharedPathGenerator('input') for name, value in arg_inputs.items(): if genomics_util.IsGcsPath(value): env[name] = input_generator.Generate() pipeline.actions.insert( 0, genomics_messages.Action( imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'gsutil -q cp %s ${%s}' % (value, name) ])) else: env[name] = value if args.outputs: output_generator = _SharedPathGenerator('output') for name, value in args.outputs.items(): env[name] = output_generator.Generate() pipeline.actions.append( genomics_messages.Action(imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'gsutil -q cp ${%s} %s' % (name, value) ])) # Merge any existing pipeline arguments into the generated environment and # update the pipeline. if pipeline.environment: for val in pipeline.environment.additionalProperties: if val.key not in env: env[val.key] = val.value pipeline.environment = genomics_messages.Pipeline.EnvironmentValue( additionalProperties=genomics_util. ArgDictToAdditionalPropertiesList( env, genomics_messages.Pipeline.EnvironmentValue. AdditionalProperty)) if arg_inputs or args.outputs: virtual_machine.disks.append( genomics_messages.Disk(name=SHARED_DISK)) for action in pipeline.actions: action.mounts.append( genomics_messages.Mount(disk=SHARED_DISK, path='/' + SHARED_DISK)) if args.logging: pipeline.actions.append( genomics_messages.Action( imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'gsutil -q cp /google/logs/output ' + args.logging ], flags=[(genomics_messages.Action. FlagsValueListEntryValuesEnum.ALWAYS_RUN)])) # Update disk sizes if specified, potentially including the shared disk. if args.disk_size: disk_sizes = {} for disk_encoding in args.disk_size.split(','): parts = disk_encoding.split(':', 1) try: disk_sizes[parts[0]] = int(parts[1]) except: raise exceptions.GenomicsError('Invalid --disk-size.') for disk in virtual_machine.disks: size = disk_sizes[disk.name] if size: disk.sizeGb = size request = genomics_messages.RunPipelineRequest( pipeline=pipeline, labels=labels_util.ParseCreateArgs( args, genomics_messages.RunPipelineRequest.LabelsValue)) else: inputs = genomics_util.ArgDictToAdditionalPropertiesList( arg_inputs, genomics_messages.RunPipelineArgs.InputsValue. AdditionalProperty) outputs = genomics_util.ArgDictToAdditionalPropertiesList( args.outputs, genomics_messages.RunPipelineArgs.OutputsValue. AdditionalProperty) # Set "overrides" on the resources. If the user did not pass anything on # the command line, do not set anything in the resource: preserve the # user-intent "did not set" vs. "set an empty value/list" resources = genomics_messages.PipelineResources( preemptible=args.preemptible) if args.memory: resources.minimumRamGb = args.memory if args.cpus: resources.minimumCpuCores = args.cpus if args.disk_size: resources.disks = [] for disk_encoding in args.disk_size.split(','): disk_args = disk_encoding.split(':', 1) resources.disks.append( genomics_messages.Disk(name=disk_args[0], sizeGb=int(disk_args[1]))) # Progression for picking the right zones... # If specified on the command line, use them. # If specified in the Pipeline definition, use them. # If there is a GCE default zone in the local configuration, use it. # Else let the API select a zone if args.zones: resources.zones = args.zones elif pipeline.resources and pipeline.resources.zones: pass elif properties.VALUES.compute.zone.Get(): resources.zones = [properties.VALUES.compute.zone.Get()] request = genomics_messages.RunPipelineRequest( ephemeralPipeline=pipeline, pipelineArgs=genomics_messages.RunPipelineArgs( inputs=genomics_messages.RunPipelineArgs.InputsValue( additionalProperties=inputs), outputs=genomics_messages.RunPipelineArgs.OutputsValue( additionalProperties=outputs), clientId=args.run_id, logging=genomics_messages.LoggingOptions( gcsPath=args.logging), labels=labels_util.ParseCreateArgs( args, genomics_messages.RunPipelineArgs.LabelsValue), projectId=genomics_util.GetProjectId(), serviceAccount=genomics_messages.ServiceAccount( email=args.service_account_email, scopes=args.service_account_scopes), resources=resources)) result = apitools_client.pipelines.Run(request) log.status.Print('Running [{0}].'.format(result.name)) return result
def Run(self, args): """This is what gets called when the user runs this command. Args: args: argparse.Namespace, All the arguments that were provided to this command invocation. Raises: files.Error: A file argument could not be read. GenomicsError: User input was invalid. HttpException: An http error response was received while executing api request. Returns: Operation representing the running pipeline. """ apitools_client = genomics_util.GetGenomicsClient('v1alpha2') genomics_messages = genomics_util.GetGenomicsMessages('v1alpha2') pipeline = genomics_util.GetFileAsMessage( args.pipeline_file, genomics_messages.Pipeline, self.context[lib.STORAGE_V1_CLIENT_KEY]) pipeline.projectId = genomics_util.GetProjectId() arg_inputs = _ValidateAndMergeArgInputs(args) inputs = genomics_util.ArgDictToAdditionalPropertiesList( arg_inputs, genomics_messages.RunPipelineArgs.InputsValue.AdditionalProperty) outputs = genomics_util.ArgDictToAdditionalPropertiesList( args.outputs, genomics_messages.RunPipelineArgs.OutputsValue.AdditionalProperty) # Set "overrides" on the resources. If the user did not pass anything on # the command line, do not set anything in the resource: preserve the # user-intent "did not set" vs. "set an empty value/list" resources = genomics_messages.PipelineResources( preemptible=args.preemptible) if args.memory: resources.minimumRamGb = args.memory if args.disk_size: resources.disks = [] for disk_encoding in args.disk_size.split(','): disk_args = disk_encoding.split(':', 1) resources.disks.append(genomics_messages.Disk( name=disk_args[0], sizeGb=int(disk_args[1]) )) # Progression for picking the right zones... # If specified on the command line, use them. # If specified in the Pipeline definition, use them. # If there is a GCE default zone in the local configuration, use it. # Else let the API select a zone if args.zones: resources.zones = args.zones elif pipeline.resources and pipeline.resources.zones: pass elif properties.VALUES.compute.zone.Get(): resources.zones = [properties.VALUES.compute.zone.Get()] request = genomics_messages.RunPipelineRequest( ephemeralPipeline=pipeline, pipelineArgs=genomics_messages.RunPipelineArgs( inputs=genomics_messages.RunPipelineArgs.InputsValue( additionalProperties=inputs), outputs=genomics_messages.RunPipelineArgs.OutputsValue( additionalProperties=outputs), clientId=args.run_id, logging=genomics_messages.LoggingOptions(gcsPath=args.logging), labels=labels_util.Diff.FromCreateArgs(args).Apply( genomics_messages.RunPipelineArgs.LabelsValue), projectId=genomics_util.GetProjectId(), serviceAccount=genomics_messages.ServiceAccount( email=args.service_account_email, scopes=args.service_account_scopes), resources=resources)) result = apitools_client.pipelines.Run(request) log.status.Print('Running [{0}].'.format(result.name)) return result