def __validate_restoration_date(date): try: datetime.strptime(date, '%Y-%m-%d') except ValueError: raise ParameterValidationException( "Wrong date value format for parameter 'restoration_date'. " "Should be 'YYYY-mm-dd'")
def __get_backup_entities(backup_items): try: backup_keys = [i.backup_key for i in backup_items] for key, entity in \ zip(backup_keys, ndb.get_multi(backup_keys, use_cache=False, use_memcache=False)): if not entity: error_message = "Backup entity (key={}) doesn't exist " \ "in datastore.".format(key) raise ParameterValidationException(error_message) yield entity except BadRequestError, e: error_message = "Couldn't obtain backup entity in datastore. " \ "Error: \n{}".format(e.message) raise ParameterValidationException(error_message)
def __get_source_table_entity(backup_entity): source_table_entity = Table.get_table_from_backup(backup_entity) if not source_table_entity: error_message = "Backup ancestor doesn't exists: '{}:{}'. " \ .format(backup_entity.dataset_id, backup_entity.table_id) raise ParameterValidationException(error_message) return source_table_entity
def parse_url_safe_key(url_safe_key): try: return ndb.Key(urlsafe=url_safe_key) except (TypeError, ProtocolBufferDecodeError), e: raise ParameterValidationException( "Unable to parse url safe key: {}, error type: {}, " "error message: {}".format(url_safe_key, type(e).__name__, e.message))
def test(self, big_query_table_metadata, table_entity): if big_query_table_metadata.is_daily_partitioned() and not big_query_table_metadata.is_partition(): raise ParameterValidationException("Partition id is required for partitioned table in on-demand mode") table_validation_status, table_validation_message = self._is_possible_to_copy_table(big_query_table_metadata) if not table_validation_status: if table_validation_message == "Table not found": raise NotFoundException(table_validation_message) else: raise ParameterValidationException(table_validation_message) logging.info("Performing on-demand backup for %s." "It is performed without checking " "if table already has up to date backup", big_query_table_metadata.table_reference()) return True
def parse_bq_key(backup_bq_key): try: key_parts = backup_bq_key.decode('base64') \ .replace("\"", "").replace(" ", "").split(",") if len(key_parts) != 4: raise ParameterValidationException( "Unable to parse backup BQ key: {}, " "key doesn't consist of 4 parts".format(backup_bq_key)) table_kind = key_parts[0] table_id = int(key_parts[1]) backup_kind = key_parts[2] backup_id = int(key_parts[3]) return ndb.Key(backup_kind, backup_id, parent=ndb.Key(table_kind, table_id)) except (Error, ValueError), e: raise ParameterValidationException( "Unable to parse backup BQ key: {}, error type: {}, " "error message: {}".format(backup_bq_key, type(e).__name__, e.message))
def validate_parameters(self, project_id, dataset_id, target_project_id, target_dataset_id, max_partition_days): if target_project_id is None: raise ParameterValidationException( "Required target project id parameter is None") any_backup = self.__get_backup(project_id, dataset_id, max_partition_days) self.__validate_locations(any_backup, target_project_id, target_dataset_id)
def __validate_locations(self, any_backup, target_project_id, target_dataset_id): target_location = self.__get_target_dataset_location( target_project_id, target_dataset_id) if target_location is None: return backup_location = self.__get_backup_dataset_location(any_backup) if target_location != backup_location: raise ParameterValidationException( "Target dataset already exist and has different location than backup dataset" )
def create(cls, date, location, project): """ :return: Dataset id for specified project and location in 'year_week_location_project' format. If date, location or project are not specified throws ParameterValidationException. """ if date is None: raise ParameterValidationException( 'No date specified, attribute is mandatory.') if location is None: raise ParameterValidationException( 'No location specified, attribute is mandatory.') if project is None: raise ParameterValidationException( 'No project id specified, attribute is mandatory.') year = str(date.year) week = format(date.isocalendar()[1], '02') return '_'.join((year, week, location, project)).replace('-', '_')
def create(cls, project_id, dataset_id, table_id, timestamp, partition_id=None): """ :return: Backup id for specified project, dataset, table, timestamp and partition(optional). If created id exceeds number of 1024 chars, it replace last 24 signs with '-' and 18-20 length hash If project, dataset, table or timestamp are not specified throws ParameterValidationException. """ if project_id is None: raise ParameterValidationException( 'No project specified, attribute is mandatory.') if dataset_id is None: raise ParameterValidationException( 'No dataset specified, attribute is mandatory.') if table_id is None: raise ParameterValidationException( 'No table specified, attribute is mandatory.') if timestamp is None: raise ParameterValidationException( 'No timestamp specified, attribute is mandatory.') name = '_'.join( (timestamp.strftime("%Y%m%d_%H%M%S"), project_id.replace('-', '_'), dataset_id, table_id)) + ('' if partition_id is None else '_partition_' + str(partition_id)) if len(name) > 1024: # checksum returns long int with a sign, 18-20 characters long checksum = str(hash(name)).replace('-', '_') return '_'.join((name[:1000], checksum))[:1024] else: return name
def start(table_reference): big_query_table_metadata = BigQueryTableMetadata.get_table_by_reference( table_reference) if big_query_table_metadata.is_daily_partitioned( ) and not big_query_table_metadata.is_partition(): raise ParameterValidationException( "Partition id is required for partitioned table in on-demand mode" ) BackupProcess( table_reference=table_reference, big_query=BigQuery(), big_query_table_metadata=big_query_table_metadata, should_backup_predicate=OnDemandBackupPredicate()).start()
def __get_backup(self, project_id, dataset_id, max_partition_days): logging.info( "Getting backups for project '%s' for dataset '%s'" " with max_partition_days '%s'", project_id, dataset_id, max_partition_days) table_entities_found = False tables = self.__get_tables(project_id, dataset_id, max_partition_days) for table in tables: table_entities_found = True table_backup = table.last_backup if table_backup is not None: return table_backup if not table_entities_found: # @refactor: this should be NotFoundException that is mapped to 404. # Not ParameterValidationException that is mapped to 400 raise ParameterValidationException( "No Tables was found in Datastore for project {}, dataset {}". format(project_id, dataset_id)) # @refactor: same as above raise ParameterValidationException( "No Backups was found in Datastore for project {}, dataset {}". format(project_id, dataset_id))
def validate_restore_request_params( source_project_id=None, source_dataset_id=None, target_project_id=None, target_dataset_id=None, create_disposition=None, write_disposition=None): try: if source_project_id: validate_project_id(source_project_id) if source_dataset_id: validate_dataset_id(source_dataset_id) if target_project_id: validate_project_id(target_project_id) if target_dataset_id: validate_dataset_id(target_dataset_id) if write_disposition: validate_write_disposition(write_disposition) if create_disposition: validate_create_disposition(create_disposition) except (WrongDatasetNameException, WrongProjectNameException, WrongWriteDispositionException, WrongCreateDispositionException), e: raise ParameterValidationException(e.message)
class TestOnDemandTableBackupHandler(unittest.TestCase): def setUp(self): patch('googleapiclient.discovery.build').start() app = on_demand_table_backup_handler.app self.under_test = webtest.TestApp(app) self.testbed = testbed.Testbed() self.testbed.activate() self.testbed.init_memcache_stub() def tearDown(self): self.testbed.deactivate() patch.stopall() @patch.object(OnDemandTableBackup, 'start') def test_on_demand_request_for_partitioned_table_is_properly_parsed( self, on_demand_table_backup_start): # given table_reference = TableReference('example-proj-name', 'example-dataset-name', 'example-table-name', '20171201') url = '/tasks/backups/on_demand/table/{}/{}/{}/{}' \ .format(table_reference.get_project_id(), table_reference.get_dataset_id(), table_reference.get_table_id(), table_reference.get_partition_id()) # when self.under_test.get(url) # then on_demand_table_backup_start.assert_called_with(table_reference) @patch.object(OnDemandTableBackup, 'start') def test_on_demand_request_for_non_partitioned_table_is_properly_parsed( self, on_demand_table_backup_start): # given table_reference = TableReference('example-proj-name', 'example-dataset-name', 'example-table-name') url = '/tasks/backups/on_demand/table/{}/{}/{}'.format( table_reference.get_project_id(), table_reference.get_dataset_id(), table_reference.get_table_id()) # when self.under_test.get(url) # then on_demand_table_backup_start.assert_called_with(table_reference) @patch.object(OnDemandTableBackup, 'start', side_effect=ParameterValidationException("error msg")) def test_on_demand_request_for_partitioned_but_without_passing_partition_should_casue_400( self, on_demand_table_backup_start): # given table_reference = TableReference('example-proj-name', 'example-dataset-name', 'example-table-name') url = '/tasks/backups/on_demand/table/{}/{}/{}'.format( table_reference.get_project_id(), table_reference.get_dataset_id(), table_reference.get_table_id()) # when response = self.under_test.get(url, expect_errors=True) # then self.assertEquals(400, response.status_int)