def init_luigi_logging(): """ Initialize loggers with Luigi's logging configuration """ env_params = luigi_interface.core() try: # Since Luigi 2.8.1 setup_logging = importlib.import_module('luigi.setup_logging') setup_logging.InterfaceLogging._configured = False setup_logging.InterfaceLogging.setup(env_params) return except ImportError: pass if hasattr(luigi_interface, 'setup_interface_logging'): # Before Luigi 2.8.1 logging_conf = env_params.logging_conf_file if logging_conf != '' and not os.path.exists(logging_conf): raise Exception( "Error: Unable to locate specified logging configuration file!" ) if not configuration.get_config().getboolean( 'core', 'no_configure_logging', False): luigi_interface.setup_interface_logging(logging_conf, env_params.log_level) else: # Otherwise sys.stderr.write("Cannot configure logger.")
def main(): update_markers = mongo_get_update_markers() # Make sure the updates have all mongo classes bulk_tasks = [ MongoCollectionIndexTask, MongoCollectionEventTask, MongoCatalogueTask, MongoTaxonomyTask, # MongoMultimediaTask, MongoSiteTask, UnpublishTask, MongoDeleteTask ] def _get_task_names(tasks): """ We need to initiate and get the family name, not just the class name MongoDeleteTask => DeleteTask @param tasks: @return: """ return [unicode(task(date=0).task_family) for task in tasks] full_export_date = int(config.get('keemu', 'full_export_date')) for date, update_marker in update_markers.iteritems(): # If this is the fll export date, MongoDeleteTask is not required if full_export_date and date == full_export_date: bulk_task_copy = list(bulk_tasks) bulk_task_copy.remove(MongoDeleteTask) bulk_task_names = _get_task_names(bulk_task_copy) else: bulk_task_names = _get_task_names(bulk_tasks) # Assert that for every date we have all the bulk tasks missing_tasks = list(set(bulk_task_names) - set(update_marker)) assert missing_tasks == [], 'There are missing mongo tasks for date %s: %s' % (date, missing_tasks) # Get a list of all export files to process export_dates = [d for d in get_export_file_dates() if d not in update_markers.keys()] # Run setup_interface_logging to ensure luigi commands setup_interface_logging() sch = scheduler.CentralPlannerScheduler() w = BulkWorker(scheduler=sch) for export_date in export_dates: log.info('Processing date %s', export_date) # We only need to call the mongo delete task, as all other tasks are a requirement # NB: This doesn't delete anything from CKAN - if that's needed change this to DeleteTask w.add(MongoDeleteTask(date=export_date, force=True)) w.run() w.stop()
def source(self): return target.storage_mail_path(self.day).path def schema(self): return [ {"name": "datetime", "type": "timestamp", "mode": "nullable"}, {"name": "name", "type": "string", "mode": "nullable"}, {"name": "email", "type": "integer", "mode": "nullable"}, {"name": "campaign", "type": "string", "mode": "nullable"}, {"name": "id", "type": "string", "integer": "nullable"} ] def configuration(self): return { 'sourceFormat': "CSV" } class ErrorAll(luigi.WrapperTask): def requires(self): return [ ErrorStorageToBigQuery(datetime.date(2015, 11, 23)) ] if __name__ == "__main__": setup_interface_logging('examples/logging.ini') load_default_client("examples", "examples") luigi.run()
def google_default_api(): global gclient if gclient is None: gclient = GCloudClient() gcore.set_default_client(gclient) class AllExamples(luigi.WrapperTask): def requires(self): return [ CopyAllLocalToStorage(), DataProcExamples(), CopyViaDataFlowToStorage(datetime.date(2015, 11, 23)), CopyBigQueryToStorage(datetime.date(2015, 11, 24)) ] class DataProcExamples(luigi.WrapperTask): def requires(self): return [ DataProcPigCopy(datetime.date(2015, 11, 23)), DataProcSparkCopy(datetime.date(2015, 11, 24)), ] if __name__ == "__main__": load_default_client("examples", "examples") setup_interface_logging('examples/logging.ini') luigi.run()
return luigi.LocalTarget('/tmp/a-created') def run(self): os.system('touch "%s"' % self.output().path) class DepTask(DependencyTriggeredTask): def requires(self): return [SimpleTask()] def output(self): return luigi.LocalTarget('/tmp/b-created') def run(self): os.system('touch "%s"' % self.output().path) if __name__ == '__main__': os.system('rm -f /tmp/a-created /tmp/b-created') interface.setup_interface_logging() sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(SimpleTask()) w.run() w.add(DepTask()) w.run() os.system('rm -f /tmp/a-created /tmp/b-created') w.add(DepTask()) w.run() os.system('rm -f /tmp/a-created') w.add(DepTask()) w.run()
def run(self): os.system('touch "%s"' % self.output().path) class DepTask(DependencyTriggeredTask): def requires(self): return [SimpleTask()] def output(self): return luigi.LocalTarget('/tmp/b-created') def run(self): os.system('touch "%s"' % self.output().path) if __name__ == '__main__': os.system('rm -f /tmp/a-created /tmp/b-created') interface.setup_interface_logging() sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(SimpleTask()) w.run() w.add(DepTask()) w.run() os.system('rm -f /tmp/a-created /tmp/b-created') w.add(DepTask()) w.run() os.system('rm -f /tmp/a-created') w.add(DepTask()) w.run()
def main(): update_markers = mongo_get_update_markers() # Make sure the updates have all mongo classes bulk_tasks = [ MongoCollectionIndexTask, MongoCollectionEventTask, MongoCatalogueTask, MongoTaxonomyTask, # MongoMultimediaTask, MongoSiteTask, UnpublishTask, MongoDeleteTask ] def _get_task_names(tasks): """ We need to initiate and get the family name, not just the class name MongoDeleteTask => DeleteTask @param tasks: @return: """ return [unicode(task(date=0).task_family) for task in tasks] full_export_date = int(config.get('keemu', 'full_export_date')) for date, update_marker in update_markers.iteritems(): # If this is the fll export date, MongoDeleteTask is not required if full_export_date and date == full_export_date: bulk_task_copy = list(bulk_tasks) bulk_task_copy.remove(MongoDeleteTask) bulk_task_names = _get_task_names(bulk_task_copy) else: bulk_task_names = _get_task_names(bulk_tasks) # Assert that for every date we have all the bulk tasks missing_tasks = list(set(bulk_task_names) - set(update_marker)) assert missing_tasks == [], 'There are missing mongo tasks for date %s: %s' % ( date, missing_tasks) # Get a list of all export files to process export_dates = [ d for d in get_export_file_dates() if d not in update_markers.keys() ] # Run setup_interface_logging to ensure luigi commands setup_interface_logging() sch = scheduler.CentralPlannerScheduler() w = BulkWorker(scheduler=sch) for export_date in export_dates: log.info('Processing date %s', export_date) # We only need to call the mongo delete task, as all other tasks are a requirement # NB: This doesn't delete anything from CKAN - if that's needed change this to DeleteTask w.add(MongoDeleteTask(date=export_date, force=True)) w.run() w.stop()