Пример #1
0
    def _DoAddRows(self, json_file, args):
        """Add rows from json_file to args.target_table."""
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        resource = resource_parser.Parse(args.target_table,
                                         collection='bigquery.tables')
        reference = message_conversions.TableResourceToReference(
            bigquery_messages, resource)
        message_builder = bigquery_json_object_messages.MessageBuilder(
            bigquery_messages)

        batch = []
        lineno = 0
        errors_found = False

        for line in json_file:
            lineno += 1
            trimmed_line = line.strip()
            if trimmed_line:
                try:
                    parsed_row = message_builder.Build(trimmed_line)
                except bigquery.ClientError as e:
                    raise bigquery.Error(
                        'Line {num}: {msg}'.format(num=lineno, msg=str(e)),
                        None, [])
                batch.append(parsed_row)
                if _MAX_ROWS_PER_REQUEST and (len(batch)
                                              == _MAX_ROWS_PER_REQUEST):
                    result = TablesAddRows._InsertTableRows(
                        reference, batch, apitools_client, bigquery_messages)
                    del batch[:]
                    if result.insertErrors:
                        errors_found = True
                        break

        if lineno == 0:
            log.status.Print('JSON file was empty.')
            return

        if batch and not errors_found:
            result = TablesAddRows._InsertTableRows(reference, batch,
                                                    apitools_client,
                                                    bigquery_messages)
            errors_found = bool(result.insertErrors)

        if errors_found:
            for entry in result.insertErrors:
                log.err.Print('Record {0} could not be inserted:'.format(
                    entry.index))
                for error in entry.errors:
                    log.err.Print('\t{0}: {1}'.format(error.reason,
                                                      error.message))
            raise bigquery.Error('Rows not added')
        else:
            if not args.quiet:
                log.status.Print('Rows added successfully.')
Пример #2
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace, All the arguments that were provided to this
        command invocation.

    Raises:
      bigquery.Error: if no job id was provided and no jobs were running.
      bigquery.TimeoutError: on time out.
    Returns:
      A Job message for the job we were waiting for.
    """
        apitools_client = self.context[commands.APITOOLS_CLIENT_KEY]
        bigquery_messages = self.context[commands.BIGQUERY_MESSAGES_MODULE_KEY]
        resource_parser = self.context[commands.BIGQUERY_REGISTRY_KEY]
        project_id = properties.VALUES.core.project.Get(required=True)

        try:
            max_wait = int(args.max_wait) if args.max_wait else sys.maxint
        except ValueError:
            raise bigquery.ClientError('Invalid wait time: {0}'.format(
                args.max_wait))

        if args.job_id:
            job_resource = resource_parser.Parse(args.job_id,
                                                 collection='bigquery.jobs')
            job_reference = message_conversions.JobResourceToReference(
                bigquery_messages, job_resource)
        else:
            project = bigquery.Project(project_id)
            running_jobs = [
                job for job in project.GetCurrentJobsGenerator()
                if job.state in ('PENDING', 'RUNNING')
            ]
            if len(running_jobs) != 1:
                raise bigquery.Error(
                    'No job ID provided, found {0} running jobs'.format(
                        len(running_jobs)))
            job_reference = running_jobs[0].jobReference

        start_time = bigquery.CurrentTimeInSec()
        job = None
        progress_reporter = job_progress.ProgressReporter(
            job_progress.STATUS_REPORTING_CHANGES if args.
            changed_status_only else job_progress.STATUS_REPORTING_PERIODIC)

        # Create an iterator for polling intervals that yields 1,1,1,1,1,1,1,1,
        # 2,5,8,11,14,17,20,23,26,29, 30,30,30,...
        polling_intervals = itertools.chain(itertools.repeat(1, 8),
                                            xrange(2, 30, 3),
                                            itertools.repeat(30))

        total_wait_so_far = 0
        current_status = 'UNKNOWN'

        while total_wait_so_far < max_wait:

            try:
                request = bigquery_messages.BigqueryJobsGetRequest(
                    projectId=job_reference.projectId,
                    jobId=job_reference.jobId)
                job = apitools_client.jobs.Get(request)
                current_status = job.status.state
                if current_status == 'DONE':
                    progress_reporter.Print(job_reference.jobId,
                                            total_wait_so_far, current_status)
                    break
            except apitools_base.HttpError as server_error:
                try:
                    raise bigquery.Error.ForHttpError(server_error)
                except bigquery.CommunicationError as e:
                    # Communication errors while waiting on a job are okay.
                    log.status.Print(
                        'Transient error during job status check: {0}'.format(
                            e))
                except bigquery.BackendError as e:
                    # Temporary server errors while waiting on a job are okay.
                    log.status.Print(
                        'Transient error during job status check: {0}'.format(
                            e))

            # Every second of this polling interval, update the display of the time
            # waited so far:
            seconds_in_interval = polling_intervals.next()
            total_wait_so_far = bigquery.CurrentTimeInSec() - start_time
            for _ in xrange(seconds_in_interval):
                progress_reporter.Print(job_reference.jobId, total_wait_so_far,
                                        current_status)
                bigquery.Wait(1)
                total_wait_so_far = bigquery.CurrentTimeInSec() - start_time

        else:
            raise bigquery.TimeoutError(
                ('Wait timed out. Operation not finished, in state {0}'.format(
                    current_status)), None, [])
        progress_reporter.Done()
        return job