示例#1
0
class ResultCheck(object):
    __MAX_RETRY_COUNT = 6

    def __init__(self):
        self.BQ = BigQuery()

    def check(self, result_check_request):
        self.__copy_job_type_id = result_check_request.copy_job_type_id
        self.__post_copy_action_request = result_check_request.post_copy_action_request
        job_json = self.BQ.get_job(result_check_request.job_reference)

        logging.info('Checking result (retryCount=%s) of job: %s',
                     result_check_request.retry_count, json.dumps(job_json))

        copy_job_result = CopyJobResult(job_json)

        if copy_job_result.is_done():
            logging.info('Copy job %s complete',
                         result_check_request.job_reference)
            self.__process_copy_job_result(
                copy_job_result,
                result_check_request.retry_count
            )
        else:
            logging.info(
                "Copy job '%s' not completed yet. Another result check "
                "is put on the queue.",
                result_check_request.job_reference)
            TaskCreator.create_copy_job_result_check(result_check_request)

    def __process_copy_job_result(self, job_result, retry_count):
        if job_result.has_errors():
            logging.info("retry_count: %s", retry_count)
            logging.error(job_result.error_message)
            if self.__should_retry(job_result.error_result) \
                    and retry_count < self.__MAX_RETRY_COUNT:

                logging.error('We may need to re-trigger this task.')
                retry_count += 1
                TaskCreator.create_copy_job(
                    CopyJobRequest(
                        task_name_suffix=None,
                        copy_job_type_id=self.__copy_job_type_id,
                        source_big_query_table=job_result.source_bq_table,
                        target_big_query_table=job_result.target_bq_table,
                        create_disposition=job_result.create_disposition,
                        write_disposition=job_result.write_disposition,
                        retry_count=retry_count,
                        post_copy_action_request=self.__post_copy_action_request
                    )
                )
                return

        if self.__post_copy_action_request is not None:
            TaskCreator.create_post_copy_action(
                copy_job_type_id=self.__copy_job_type_id,
                post_copy_action_request=self.__post_copy_action_request,
                job_json=job_result.get_raw_job_json()
            )

    @staticmethod
    def __should_retry(error_result):
        reason = error_result['reason']
        if reason == 'backendError':  # BigQuery error, retry
            return True
        if reason == 'internalError':  # BigQuery error, retry
            return True
        if reason == 'quotaExceeded':  # copy jobs quota exceeded
            return True
        if reason == 'duplicate':  # table exists already
            return False
        if reason == 'invalid':  # invalid table type, check it
            return False
        return False
class LoadDatastoreBackupsToBigQueryService(object):
    def __init__(self, date):
        self.date = date
        self.big_query = BigQuery()
        self.location = AppInfo().get_location()

    def load(self, source_uri, kinds):
        self.big_query.create_dataset(
            configuration.metadata_storage_project_id, DATASET_ID,
            self.location)

        load_jobs = []
        for kind in kinds:
            job_reference = self.big_query.insert_job(
                project_id=configuration.backup_project_id,
                body=self.__create_job_body(source_uri, kind))
            load_jobs.append(job_reference)

        return self.__all_finished_with_success(load_jobs)

    def __create_job_body(self, source_uri, kind):
        return {
            "projectId": configuration.backup_project_id,
            "location": self.location,
            "configuration": {
                "load": {
                    "sourceFormat":
                    "DATASTORE_BACKUP",
                    "writeDisposition":
                    "WRITE_TRUNCATE",
                    "sourceUris": [
                        "{}/all_namespaces/kind_{}/all_namespaces_kind_{}"
                        ".export_metadata".format(source_uri, kind, kind)
                    ],
                    "destinationTable": {
                        "projectId": configuration.metadata_storage_project_id,
                        "datasetId": DATASET_ID,
                        "tableId": kind + "_" + self.date
                    }
                }
            }
        }

    def __all_finished_with_success(self, load_jobs):
        result = True
        for load_job in load_jobs:
            if not self.__is_finished_with_success(load_job):
                result = False
        return result

    def __is_finished_with_success(self, load_job):
        finish_time = time.time() + TIMEOUT
        self.__wait_till_done(load_job)

        if time.time() > finish_time:
            ErrorReporting().report("Timeout (%d seconds) exceeded !!!" %
                                    TIMEOUT)
            logging.warning("Export from GCS to BQ finished with timeout.")
            return False
        logging.info("Export from GCS to BQ finished successfully.")
        return True

    def __wait_till_done(self, load_job):
        while True:
            result = self.big_query.get_job(load_job)
            if 'errors' in result['status']:
                raise LoadDatastoreBackupsToBigQueryException(
                    "Export from GCS to BQ failed, job reference: {}".format(
                        load_job))
            if result['status']['state'] == 'DONE':
                return

            logging.info(
                "Export from GCS to BQ still in progress... %s Waiting %d seconds to check the results again.",
                load_job, PERIOD)
            time.sleep(PERIOD)