def startMapReduce(self, json_data): configuration_sheet = DataSeederConfigurationSheet(json=json_data) configuration_sheet.put() reader_parameters = {'configuration_sheet_key': str(configuration_sheet.key())} return start_map(self.MAPPER_NAME, self.HANDLER_SPEC, self.READER_SPEC, reader_parameters, self.SHARD_COUNT, queue_name=self.QUEUE_NAME)
def startMapReduce(self, json): configuration_sheet = DataSeederConfigurationSheet(json=json) configuration_sheet.put() reader_parameters = { 'configuration_sheet_key': str(configuration_sheet.key()) } return start_map(self.MAPPER_NAME, self.HANDLER_SPEC, self.READER_SPEC, reader_parameters, self.SHARD_COUNT, queue_name=self.QUEUE_NAME)
def split_input(cls, mapper_spec): """Returns a list of input readers for the input spec. Args: mapper_spec: The MapperSpec for this InputReader. Returns: A list of InputReaders. Raises: BadReaderParamsError: required parameters are missing or invalid. """ if mapper_spec.input_reader_class() != cls: raise BadReaderParamsError("Input reader class mismatch") params = mapper_spec.params if cls.CONFIGURATION_SHEET_KEY_PARAM not in params: raise BadReaderParamsError("Missing mapper parameter '%s'" % cls.CONFIGURATION_SHEET_KEY_PARAM) params = mapper_spec.params configuration_sheet_key = params[cls.CONFIGURATION_SHEET_KEY_PARAM] configuration_sheet = DataSeederConfigurationSheet.get( Key(configuration_sheet_key)) data = simplejson.loads(configuration_sheet.json) shards = [] for model in data: json = simplejson.dumps(model) model_configuration_sheet = DataSeederConfigurationSheet(json=json) model_configuration_sheet.put() key = str(model_configuration_sheet.key()) shards.append(cls(key, 0, int(model['number']))) return shards
def seed_model(configuration_sheet_key): """Seed a model using data sent by the input reader from a configuration sheet. """ configuration_sheet = DataSeederConfigurationSheet.get( configuration_sheet_key) data = simplejson.loads(configuration_sheet.json) seeder_logic.processReferences(data) seeder_logic.validateModel(data) model = seeder_logic.getModel(data) db.put(model) processBackReferences(model, data)
def split_input(cls, mapper_spec): """Returns a list of input readers for the input spec. Args: mapper_spec: The MapperSpec for this InputReader. Returns: A list of InputReaders. Raises: BadReaderParamsError: required parameters are missing or invalid. """ if mapper_spec.input_reader_class() != cls: raise BadReaderParamsError("Input reader class mismatch") params = mapper_spec.params if cls.CONFIGURATION_SHEET_KEY_PARAM not in params: raise BadReaderParamsError("Missing mapper parameter '%s'" % cls.CONFIGURATION_SHEET_KEY_PARAM) params = mapper_spec.params configuration_sheet_key = params[cls.CONFIGURATION_SHEET_KEY_PARAM] configuration_sheet = DataSeederConfigurationSheet.get( Key(configuration_sheet_key)) data = json.loads(configuration_sheet.json) shards = [] for model in data: model_json = json.dumps(model) model_configuration_sheet = DataSeederConfigurationSheet( json=model_json) model_configuration_sheet.put() key = str(model_configuration_sheet.key()) shards.append(cls(key, 0, int(model['number']))) return shards