def run(self): """ Run sanity check. """ col = self._get_collection() # check that the collection contains at least min_total_results entries fields = [] for field in self.non_null_fields: fields.append({field, None}) if fields: limit = self.min_total_results num_results = col.find({"$and":fields}).limit(limit).count(True) if num_results < limit: exception_string = 'Sanity check failed: only found %s / %s expected results in collection %s' % \ (num_results, limit, self.collection_name()) logger.warn(exception_string) raise MongoDBTaskException(exception_string) # do a check on specific ids self._sanity_check_ids(col) # write token to note completion target_factory.write_file(self.output_token())
def run(self): """ Run sanity check. """ dynamodb_client = DynamoDBClient() table = dynamodb_client.get_table(self.table_name()) # check that the table contains at least min_total_results entries limit = self.min_total_results kw = {'limit': limit} for field in self.non_null_fields: kw['%s__null' % field] = False results = [r for r in table.scan(**kw)] num_results = len(results) if num_results < limit: exception_string = 'Sanity check failed: only found %s / %s expected results in table %s' % \ (num_results, limit, self.table_name()) logger.warn(exception_string) raise DynamoDBTaskException(exception_string) # do a check on specific ids self._sanity_check_ids(table) # write token to note completion target_factory.write_file(self.output_token())
def run(self): """ Run a Mortar job using the Mortar API. This method writes out several "tokens" as it executes to ensure idempotence: * `running_token`: This token indicates that the job is currently running. If a token exists at this path, Luigi will poll the currently running job instead of starting a new one. * `success_token`: This token indicates that the job has already completed successfully. If this token exists, Luigi will not rerun the task. """ api = self._get_api() if self.running_token().exists(): job_id = self.running_token().open().read().strip() else: job_id = self._run_job(api) # to guarantee idempotence, record that the job is running target_factory.write_file(self.running_token(), text=job_id) job = self._poll_job_completion(api, job_id) final_job_status_code = job.get('status_code') # record that the job has finished self.running_token().remove() if final_job_status_code != jobs.STATUS_SUCCESS: for out in self.script_output(): logger.info('Mortar script failed: removing incomplete data in %s' % out) out.remove() raise Exception('Mortar job_id [%s] failed with status_code: [%s], error details: %s' % (job_id, final_job_status_code, job.get('error'))) else: target_factory.write_file(self.success_token()) logger.info('Mortar job_id [%s] completed successfully' % job_id)
def run(self): """ Run a sanity check on the table, ensuring that data was loaded appropriately. Raises a :py:class:`DBMSTaskException` if the sanity check fails. """ cur = self.get_connection().cursor() overall_query = self._create_overall_query() cur.execute(overall_query) rows = cur.fetchall() if len(rows) < self.min_total_results: exception_string = 'Sanity check failed: only found %s / %s expected results in collection %s' % \ (len(rows), self.min_total_results, self.table_name()) logger.warn(exception_string) cur.close() self.get_connection().close() raise DBMSTaskException(exception_string) # do a check on specific ids self._sanity_check_ids() cur.close() self.get_connection().close() # write token to note completion target_factory.write_file(self.output_token())
def run(self): """ Verify API. """ self._verify_api() # write an output token to S3 to confirm that we finished target_factory.write_file(self.output()[0])
def run(self): connection = self.get_connection() cur = connection.cursor() table_query = self.create_table_query() cur.execute(table_query) connection.commit() cur.close() connection.close() target_factory.write_file(self.output_token())
def run(self): """ Update DynamoDB table throughput. """ dynamodb_client = DynamoDBClient() throughput={'read': self.read_throughput, 'write': self.write_throughput} dynamodb_client.update_throughput(self.table_name(), throughput) # write token to note completion target_factory.write_file(self.output_token())
def run(self): """ Create database table. """ connection = self.get_connection() cur = connection.cursor() table_query = self._create_table_query() cur.execute(table_query) connection.commit() cur.close() connection.close() # write token to acknowledge table creation target_factory.write_file(self.output_token())
def run(self): """ Update DynamoDB table throughput. """ dynamodb_client = DynamoDBClient() throughput = { 'read': self.read_throughput, 'write': self.write_throughput } dynamodb_client.update_throughput(self.table_name(), throughput) # write token to note completion target_factory.write_file(self.output_token())
def run(self): """ Create the DynamoDB table. """ dynamodb_client = DynamoDBClient() schema = [HashKey(self.hash_key, data_type=self.hash_key_type)] if self.range_key: schema.append(RangeKey(self.range_key, data_type=self.range_key_type)) throughput={'read': self.read_throughput, 'write': self.write_throughput} if self.indexes: dynamodb_client.create_table(self.table_name(), schema, throughput, indexes=self.generate_indexes()) else: dynamodb_client.create_table(self.table_name(), schema, throughput) # write token to note completion target_factory.write_file(self.output_token())
def run(self): """ Run an R script using the Rscript program. Pipes stdout and stderr back to the logging facility. """ cmd = self._subprocess_command() output = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1) for line in iter(output.stdout.readline, b''): logger.info(line) out, err = output.communicate() rc = output.returncode if rc != 0: raise RuntimeError('%s returned non-zero error code %s' % (self._subprocess_command(), rc)) target_factory.write_file(self.output_token())
def run(self): cmd = self.subprocess_commands() output = subprocess.Popen( cmd, shell=True, stdout = subprocess.PIPE, stderr = subprocess.PIPE ) out, err = output.communicate() # generate output message message = self._create_message(cmd, out, err) self._check_error(err, message) self.cmd_output = { 'cmd' : cmd, 'stdout': out, 'stderr': err } logger.debug('%s - output:%s' % (self.__class__.__name__, message)) if err == '': target_factory.write_file(self.output_token())
def run(self): """ Run an R script using the Rscript program. Pipes stdout and stderr back to the logging facility. """ cmd = self._subprocess_command() output = subprocess.Popen( cmd, shell=True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize=1 ) for line in iter(output.stdout.readline, b''): logger.info(line) out, err = output.communicate() rc = output.returncode if rc != 0: raise RuntimeError('%s returned non-zero error code %s' % (self._subprocess_command(), rc) ) target_factory.write_file(self.output_token())
def run(self): """ Run the mortar job. """ api = self._get_api() if self.running_token().exists(): job_id = self.running_token().open().read().strip() else: job_id = self._run_job(api) # to guarantee idempotence, record that the job is running target_factory.write_file(self.running_token(), text=job_id) job = self._poll_job_completion(api, job_id) final_job_status_code = job.get('status_code') # record that the job has finished self.running_token().remove() if final_job_status_code != jobs.STATUS_SUCCESS: for out in self.script_output(): logger.info('Mortar script failed: removing incomplete data in %s' % out) out.remove() raise Exception('Mortar job_id [%s] failed with status_code: [%s], error details: %s' % (job_id, final_job_status_code, job.get('error'))) else: target_factory.write_file(self.success_token()) logger.info('Mortar job_id [%s] completed successfully' % job_id)
def run(self): """ Create the DynamoDB table. """ dynamodb_client = DynamoDBClient() schema = [HashKey(self.hash_key, data_type=self.hash_key_type)] if self.range_key: schema.append( RangeKey(self.range_key, data_type=self.range_key_type)) throughput = { 'read': self.read_throughput, 'write': self.write_throughput } if self.indexes: dynamodb_client.create_table(self.table_name(), schema, throughput, indexes=self._generate_indexes()) else: dynamodb_client.create_table(self.table_name(), schema, throughput) # write token to note completion target_factory.write_file(self.output_token())
def run(self): cmd = self.subprocess_commands() output = subprocess.Popen( cmd, shell=True, stdout = subprocess.PIPE, stderr = subprocess.PIPE ) out, err = output.communicate() rc = output.returncode # generate output message message = self._create_message(cmd, out, err, rc) self._check_error(rc, err, message) self.cmd_output = { 'cmd' : cmd, 'stdout' : out, 'stderr' : err, 'return_code' : rc } logger.debug('%s - output:%s' % (self.__class__.__name__, message)) if err == '': target_factory.write_file(self.output_token())
def run(self): """ Run a Mortar job using the Mortar API. This method writes out several "tokens" as it executes to ensure idempotence: * `running_token`: This token indicates that the job is currently running. If a token exists at this path, Luigi will poll the currently running job instead of starting a new one. * `success_token`: This token indicates that the job has already completed successfully. If this token exists, Luigi will not rerun the task. """ api = self._get_api() if self.running_token().exists(): job_id = self.running_token().open().read().strip() else: job_id = self._run_job(api) # to guarantee idempotence, record that the job is running target_factory.write_file(self.running_token(), text=job_id) job = self._poll_job_completion(api, job_id) final_job_status_code = job.get('status_code') # record that the job has finished self.running_token().remove() if final_job_status_code != jobs.STATUS_SUCCESS: for out in self.script_output(): logger.info( 'Mortar script failed: removing incomplete data in %s' % out) out.remove() raise Exception( 'Mortar job_id [%s] failed with status_code: [%s], error details: %s' % (job_id, final_job_status_code, job.get('error'))) else: target_factory.write_file(self.success_token()) logger.info('Mortar job_id [%s] completed successfully' % job_id)
def run(self): self._set_tables() # write an output token to S3 to confirm that we finished target_factory.write_file(self.output()[0])