def convert_all_to_rdf(self, start_from = 0): conversion_log = DatabasePlainFiles(self.conversion_log_folder) process_log = DatabasePlainFiles(self.log_folder) process_log_filename = "rdf_conversion.log" all_ids = self.get_files() overall = len(all_ids) for num, resource_id in enumerate(all_ids): if(num < start_from): continue print "Converting resource to RDF " + str(num) + " out of " + str(overall) print str(resource_id) string = "Converting resource to RDF " + str(num) + " out of " + str(overall) + "\n" process_log.addDbaseRaw(process_log_filename, string) string = str(resource_id) + "\n" process_log.addDbaseRaw(process_log_filename, string) #Skip folders if(resource_id == ".analyzed" or resource_id == '.all-resources' or resource_id =='.broken_retrieved' or resource_id == "files.tar.gz"): continue #Init the resource resource = ckaninterface.Resource(resource_id) #create wiki-page for resource string = "creating wiki page for resource" + "\n" process_log.addDbaseRaw(process_log_filename, string) wiki_page = resource.generate_default_wiki_page() string = str(resource.create_wiki_page(wiki_page)) process_log.addDbaseRaw(process_log_filename, string) #transform resource to RDF sparqlify_message, returncode = resource.transform_to_rdf('default-tranformation-configuration') conversion_log.addDbaseRaw(resource_id + '.log', sparqlify_message + "\n" + str(returncode))
def download_all_csv_resources(self): """ Download csv resources if resource unaccessible (404 or 503) - add to the list post-processing - check mimetype of the file - if not csv - report """ db = DatabasePlainFiles(self.log_folder) download_all_log = "download_all_log.txt" ckan = ckaninterface.CKAN_Application() csv_resource_list = ckan.get_csv_resource_list() csv_resource_list_max = len(csv_resource_list) - 1 for i in range(csv_resource_list_max): resource = ckaninterface.Resource(csv_resource_list[i]) db.addDbaseRaw(download_all_log, resource._download())
def download_n_random_csv(self, n): db = DatabasePlainFiles(self.log_folder) random_csv_filename = "random_csv.txt" import random ckan = ckaninterface.CKAN_Application() csv_resource_list = ckan.get_csv_resource_list() csv_resource_list_max = len(csv_resource_list) - 1 for i in range(n): rand = random.randint(0, csv_resource_list_max) db.addDbaseRaw(random_csv_filename, str(rand) + "\n") resource = ckaninterface.Resource(csv_resource_list[rand]) try: resource._download() except: pass