def postProcessing(options): """ Executes some curating operations over imported data """ init_logger() logger.info('> Executing ckan postprocessing... ') ds_name = None if ('ds_name' in options.keys()): ds_name = options['ds_name'] user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user'); passwtemp = config.get('AuthenticationSection', 'database.password'); user = usertemp passw = passwtemp except Exception as e: logger.info ("Not user info found, using anonymous user... ") logger.info (e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username(ds_name,user,passw) # We want to change all courses from mygoblet.org tagged as Training Materials ckan_conditions = [ ['AND',[ ['EQ','link','http://www.mygoblet.org//training-portal/courses/*'], ['EQ','resource_type',["Training Material"]] ] ] ] previous_count = dbManager.count_data_by_conditions(ckan_conditions) #print (previous_count) results = dbManager.get_data_by_conditions(ckan_conditions) # delete all of them , and then we insert them again modified. We will have to implement update operation in AbstractManager dbManager.delete_data_by_conditions(ckan_conditions) new_count = dbManager.count_data_by_conditions(ckan_conditions) #print (new_count) numSuccess = 0 for result in results: #print (result) exists = util.existURL(result.get("link")) # logger.info ('Exists? '+get_link(record)+' :'+str(exists)) if (exists): success = dbManager.insert_data({ "title":result.get("title"), "description":result.get("description"), "field":result.get("field"), "source":result.get("source"), "resource_type":["Event"], # Now they are Events, not Training Materials! "insertion_date":result.get("insertion_date"), "created":result.get("created"), "audience":result.get("audience"), "link":result.get("link") }) if success: numSuccess=numSuccess+1 #print (numSuccess) logger.info('Changed '+str(numSuccess)+' mygoblet.org records tagged as Training Materials to Events') logger.info('< Finished ckan postprocessing')
def main_options(options): """ Executes the main functionality of this script: it extracts information from iAnn events data and insert some of them into the DB * options {list} specific configurations for initialization. ds_name: specific dataset/database to use with the DB manager delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase registriesFromTime {datetime} time from registries will be obtained updateRegistries {boolean} if we want to get new regiestries or not In this script we will insert these fields into each registry: "title" {string} Title for the event; "start" - Date the event starts; "end" - Date the event ends; "city" {string} City where the event is hosted; "country" {string} Country where the event is hosted; "field" {string} Branches of science in molecular biology. "provider" {string} Organization hosting the event; "link" {string} Link to the data registry. "source" {string} Default ('ckan'); "insertion date" {date} Current date and time. "created" {date} Date and time of creation of the original registry. See more eg: http://iann.pro/iann-web-services """ # IannDataLocking.lock() init_logger() ds_name = None delete_all_old_data = False registriesFromTime = None updateRegistries = True if options is not None: logger.info ('>> Starting iann importing process... params: ') if ('ds_name' in options.keys()): ds_name = options['ds_name'] logger.info ('ds_name='+ds_name) if ('delete_all_old_data' in options.keys()): delete_all_old_data = options['delete_all_old_data'] logger.info ('delete_all_old_data='+str(delete_all_old_data)) if ('registriesFromTime' in options.keys()): registriesFromTime = options['registriesFromTime'] logger.info ('registriesFromTime='+str(registriesFromTime)) if ('updateRegistries' in options.keys()): updateRegistries = options['updateRegistries'] logger.info ('updateRegistries='+str(updateRegistries)) else: logger.info ('>> Starting iann importing process...') iann_data = None if updateRegistries: iann_data = get_iann_data(registriesFromTime) user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user'); passwtemp = config.get('AuthenticationSection', 'database.password'); user = usertemp passw = passwtemp except Exception as e: logger.info ("Not user info found, using anonymous user... ") logger.info (e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username(ds_name,user,passw) if (delete_all_old_data is not None and delete_all_old_data): iann_conditions = [['EQ','source',get_source_field()]] previous_count = dbManager.count_data_by_conditions(iann_conditions) dbManager.delete_data_by_conditions(iann_conditions) new_count = dbManager.count_data_by_conditions(iann_conditions) if (previous_count is not None and new_count is not None): logger.info ('Deleted '+str( (previous_count-new_count) )+' registries') if iann_data is not None: numSuccess = 0 for result in iann_data: if (result is not None): exists = util.existURL(get_link(record)) # logger.info ('Exists? '+get_link(record)+' :'+str(exists)) if (exists): success = dbManager.insert_data({ "title":get_title(result), "start":get_start(result), "end":get_end(result), "city":get_city(result), "country":get_country(result), "field":get_field(result), "provider":get_provider(result), "link":get_link(result), "source":get_source_field(), "resource_type":get_resource_type_field(), "insertion_date":get_insertion_date_field(), "created":get_creation_date_field(result) }) if success: numSuccess=numSuccess+1 logger.info ('Inserted '+str(numSuccess)+' new registries') logger.info ('<< Finished iann importing process.')
def postProcessing(options): """ Executes some curating operations over imported data """ init_logger() logger.info('> Executing ckan postprocessing... ') ds_name = None if ('ds_name' in options.keys()): ds_name = options['ds_name'] user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user') passwtemp = config.get('AuthenticationSection', 'database.password') user = usertemp passw = passwtemp except Exception as e: logger.info("Not user info found, using anonymous user... ") logger.info(e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username( ds_name, user, passw) # We want to change all courses from mygoblet.org tagged as Training Materials ckan_conditions = [[ 'AND', [['EQ', 'link', 'http://www.mygoblet.org//training-portal/courses/*'], ['EQ', 'resource_type', ["Training Material"]]] ]] previous_count = dbManager.count_data_by_conditions(ckan_conditions) #print (previous_count) results = dbManager.get_data_by_conditions(ckan_conditions) # delete all of them , and then we insert them again modified. We will have to implement update operation in AbstractManager dbManager.delete_data_by_conditions(ckan_conditions) new_count = dbManager.count_data_by_conditions(ckan_conditions) #print (new_count) numSuccess = 0 for result in results: #print (result) exists = util.existURL(result.get("link")) # logger.info ('Exists? '+get_link(record)+' :'+str(exists)) if (exists): success = dbManager.insert_data({ "title": result.get("title"), "description": result.get("description"), "field": result.get("field"), "source": result.get("source"), "resource_type": ["Event"], # Now they are Events, not Training Materials! "insertion_date": result.get("insertion_date"), "created": result.get("created"), "audience": result.get("audience"), "link": result.get("link") }) if success: numSuccess = numSuccess + 1 #print (numSuccess) logger.info('Changed ' + str(numSuccess) + ' mygoblet.org records tagged as Training Materials to Events') logger.info('< Finished ckan postprocessing')
def main_options(options): """ Executes the main functionality of this script: it extracts information from iAnn events data and insert some of them into the DB * options {list} specific configurations for initialization. ds_name: specific dataset/database to use with the DB manager delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase registriesFromTime {datetime} time from registries will be obtained updateRegistries {boolean} if we want to get new regiestries or not In this script we will insert these fields into each registry: "title" {string} Title for the event; "start" - Date the event starts; "end" - Date the event ends; "city" {string} City where the event is hosted; "country" {string} Country where the event is hosted; "field" {string} Branches of science in molecular biology. "provider" {string} Organization hosting the event; "link" {string} Link to the data registry. "source" {string} Default ('ckan'); "insertion date" {date} Current date and time. "created" {date} Date and time of creation of the original registry. See more eg: http://iann.pro/iann-web-services """ # IannDataLocking.lock() init_logger() ds_name = None delete_all_old_data = False registriesFromTime = None updateRegistries = True if options is not None: logger.info('>> Starting iann importing process... params: ') if ('ds_name' in options.keys()): ds_name = options['ds_name'] logger.info('ds_name=' + ds_name) if ('delete_all_old_data' in options.keys()): delete_all_old_data = options['delete_all_old_data'] logger.info('delete_all_old_data=' + str(delete_all_old_data)) if ('registriesFromTime' in options.keys()): registriesFromTime = options['registriesFromTime'] logger.info('registriesFromTime=' + str(registriesFromTime)) if ('updateRegistries' in options.keys()): updateRegistries = options['updateRegistries'] logger.info('updateRegistries=' + str(updateRegistries)) else: logger.info('>> Starting iann importing process...') iann_data = None if updateRegistries: iann_data = get_iann_data(registriesFromTime) user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user') passwtemp = config.get('AuthenticationSection', 'database.password') user = usertemp passw = passwtemp except Exception as e: logger.info("Not user info found, using anonymous user... ") logger.info(e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username( ds_name, user, passw) if (delete_all_old_data is not None and delete_all_old_data): iann_conditions = [['EQ', 'source', get_source_field()]] previous_count = dbManager.count_data_by_conditions(iann_conditions) dbManager.delete_data_by_conditions(iann_conditions) new_count = dbManager.count_data_by_conditions(iann_conditions) if (previous_count is not None and new_count is not None): logger.info('Deleted ' + str((previous_count - new_count)) + ' registries') if iann_data is not None: numSuccess = 0 for result in iann_data: if (result is not None): exists = util.existURL(get_link(record)) # logger.info ('Exists? '+get_link(record)+' :'+str(exists)) if (exists): success = dbManager.insert_data({ "title": get_title(result), "start": get_start(result), "end": get_end(result), "city": get_city(result), "country": get_country(result), "field": get_field(result), "provider": get_provider(result), "link": get_link(result), "source": get_source_field(), "resource_type": get_resource_type_field(), "insertion_date": get_insertion_date_field(), "created": get_creation_date_field(result) }) if success: numSuccess = numSuccess + 1 logger.info('Inserted ' + str(numSuccess) + ' new registries') logger.info('<< Finished iann importing process.')