def mirror_ckan(source, target, api_key, dryrun, update): sourceCKAN = ckanclient.CkanClient(base_location=source) targetCKAN = ckanclient.CkanClient(base_location=target, api_key=api_key) indent = ' ' for name in sourceCKAN.package_register_get(): #if name == 'hospital-compare': sourceCKAN.package_entity_get(name) # Get the dataset description. dataset = sourceCKAN.last_message altID = source.replace('/api', '') + '/dataset/' + dataset['id'] altName = source.replace('/api', '') + '/dataset/' + dataset['name'] dataset['extras']['prov_alternateOf'] = altName # Would like to assert two alternates, but their model is limiting. if not dryrun: del dataset['id'] # DELETING print name + ' ' + dataset['name'] if 'download_url' in dataset: print indent + 'download_url: ' + dataset['download_url'] if 'url' in dataset: print indent + 'url: ' + dataset['url'] for resource in dataset['resources']: if not dryrun: del resource['id'] # DELETING if 'url' in resource: print indent + 'resource: ' + resource['url'] print indent + 'format: ' + resource['format'] # Formats seen on healthdata.gov: # CSV Text XLS XML Feed Query API Widget RDF #print json.dumps(dataset,sort_keys=True, indent=4) if not dryrun: try: # See if dataset is listed in targetCKAN targetCKAN.package_entity_get(dataset['name']) if update: # Update target's existing entry from source's targetCKAN.package_entity_put(dataset) else: print('NOTE: skipping ' + dataset['name'] + ' ' + 'b/c already listed at ' + target) #update = targetCKAN.last_message #update['notes'] = 'Updated.' #targetCKAN.package_entity_put(update) except ckanclient.CkanApiNotFoundError: # Dataset is not listed on this CKAN print 'INFO: adding ' + dataset['name'] + ' to ' + target try: targetCKAN.package_register_post(dataset) # POST except ckanclient.CkanApiConflictError: print('WARNING: ' + 'Conflict error when trying to POST ' + dataset['name'])
def publish_to_ckan(): """Updates the dataset in the CKAN repository or creates a new dataset Returns: None """ global ckan_client # Initialize the CKAN client ckan_client = ckanclient.CkanClient(base_location=args.ckan_api, api_key=args.ckan_api_key) # Create the name of the dataset on the CKAN instance dataset_id = args.ckan_dataset_name_prefix + args.dataset_name # Get the dataset from CKAN dataset_entity = get_remote_dataset(dataset_id) # Check to see if the dataset exists on CKAN or not if dataset_entity is None: # Create a new dataset create_dataset(dataset_id) else: # Update an existing dataset update_dataset(dataset_entity) # Update the dataset version on the CKAN repository (causes the last modified date to be updated) if args.increment != "none": update_dataset_version()
def update_dataset_version(): """Updates the dataset version number on CKAN repository Returns: None """ global args logger.info('Updating CKAN dataset version') # Initialize CKAN client ckan = ckanclient.CkanClient(base_location=args.ckan_api, api_key=args.ckan_api_key) # Create the name of the dataset on the CKAN instance dataset_id = args.ckan_dataset_name_prefix + args.dataset_name try: # Get the dataset dataset_entity = ckan.package_entity_get(dataset_id) # Increment the version number version = dataset_entity['version'] version = increment_version(version, args.increment) dataset_entity['version'] = version # Update the dataset ckan.package_entity_put(dataset_entity) except ckanclient.CkanApiNotFoundError: logger.info(" Dataset " + dataset_id + " not found on OpenColorado")
def searcher(valid_ids, invalid_ids, apikey, server, times=1, count=50): import time import random import ckanclient errors = [] for _ in range(times): valid = random.randint(5, count) invalid = random.randint(5, count) samples = random.sample(valid_ids, valid) samples.extend(random.sample(invalid_ids, invalid)) s = time.time() ckan = ckanclient.CkanClient(base_location=server, api_key=apikey) opts = {'offset': 0, 'limit': 0} q = ' OR '.join(samples) try: search_results = ckan.package_search(q, opts) datasets = list(search_results['results']) log.info("%d items found from %d ids in %s" % (len(datasets), valid + invalid, time.time() - s)) except: log.error( "Search failed with %d valid and %d invalid items in query" % ( valid, invalid, ))
def main(): args = parser.parse_args() client = ckanclient.CkanClient(args.url) rows = [] for pkg_name in client.package_register_get(): pkg = client.package_entity_get(pkg_name) for extra, value in pkg.get('extras', {}).items(): pkg['extras_' + extra] = value if 'extras' in pkg: del pkg['extras'] resources = pkg.get('resources', []) for resource in resources: rpkg = pkg.copy() for resprop, value in resource.items(): rpkg['resource_' + resprop] = value rows.append(rpkg) if not len(resources): rows.append(pkg) del pkg['resources'] print pkg_name headers = set() for row in rows: headers.update(row.keys()) fh = open(args.outfile, 'wb') writer = csv.DictWriter(fh, headers) writer.writerow(dict(zip(headers, headers))) for row in rows: row_ = {} for column, value in row.items(): if isinstance(value, unicode): value = value.encode('utf-8') row_[column] = value writer.writerow(row_) fh.close()
def __init__(self): self.apikey = open(API_KEY_FILENAME).read() self.ckan = ckanclient.CkanClient(base_location=API_URL, api_key=self.apikey) self.package_list = self.ckan.package_register_get() self.started = False self.start_key = '' pass
def __init__(self): self.apikey = open(API_KEY_FILENAME).read() self.ckan = ckanclient.CkanClient(base_location=API_URL, api_key=self.apikey) self.package_list = self.ckan.package_register_get() self.started = True self.start_key = '' self.tags = [u'twitter', u'микроблоги'] pass
def __init__(self): self.apikey = open(API_KEY_FILENAME).read() self.ckan = ckanclient.CkanClient(base_location=API_URL, api_key=self.apikey) self.package_list = self.ckan.package_register_get() self.started = True self.start_key = '' self.tags = [u'youtube', u'архивы', u'видеоканалы'] pass
def __init__(self): self.apikey = open(API_KEY_FILENAME).read() self.ckan = ckanclient.CkanClient(base_location=API_URL, api_key=self.apikey) self.package_list = self.ckan.package_register_get() self.started = True self.start_key = '' self.tags = [u'ЕГЭ', u'экзамены', u'статистика'] pass
def process_ckan_datasets(): global ckan_host # Initialize the CKAN client ckan_client = ckanclient.CkanClient(base_location=ckan_host) package_id_list = ckan_client.package_register_get() # print package_id_list index = 0 for package_id in package_id_list: # Get the package details package = ckan_client.package_entity_get(package_id) # Get the package name (slug) package_name = package['name'] #print package_name print "------------------------------" print "Processing dataset " + str(index) + " of " + str( len(package_id_list)) + ": " + package_name print "Created: " + package[ 'metadata_created'] + ", modified: " + package['metadata_modified'] shapefile_found = False resources = package['resources'] for resource in resources: ## Look for a shapefile resource if (resource['mimetype'] and 'shp' in resource['mimetype'].lower()) or \ (resource['mimetype_inner'] and 'shp' in resource['mimetype_inner'].lower()) or \ (resource['format'] and 'shp' in resource['format'].lower()) or \ (resource['format'] and 'shapefile' in resource['format'].lower()) or \ (resource['name'] and 'shp' in resource['name'].lower()) or \ (resource['name'] and 'shapefile' in resource['name'].lower()) or \ (resource['description'] and 'shp' in resource['description'].lower()) or \ (resource['description'] and 'shapefile' in resource['description'].lower()): shapefile_found = True print "Shapefile found! Attepting download..." # Get the resource URL url = resource["url"] #### Download the shapefile shapefile = download_shapefile(package_name, url) reproject_shapefile(package_name, shapefile) if shapefile_found == False: print "No shapefile found." index = index + 1
def urispace_of_dataset(ckan_loc='http://datahub.io', dataset_name='2000-us-census-rdf'): ckan = ckanclient.CkanClient(base_location=ckan_loc + '/api') dataset = ckan.package_entity_get(dataset_name) # u'extras': {u'namespace': u'http://www.rdfabout.com/rdf/usgov/geo/' if 'extras' in dataset: if 'namespace' in dataset['extras']: print dataset['extras']['namespace']
def run(directory): url = 'http://iatiregistry.org/api' registry = ckanclient.CkanClient(base_location=url) for pkg_name in registry.package_register_get(): pkg = registry.package_entity_get(pkg_name) for resource in pkg.get('resources', []): print resource.get('url') try: save_file(pkg_name, resource.get('url'), directory) except Exception, e: print "Failed:", e
def index(request): out = [] # Instantiate the CKAN client. #ckan = ckanclient.CkanClient(base_location='http://open.alberta.ca/api') ckan = ckanclient.CkanClient(base_location='https://datahub.io/api') #ckan = ckanclient.CkanClient(base_location='http://opendata.aragon.es/api') i = 0 out = {} out['nodes'] = [] out['links'] = [] title = [] org = [] # Get the package list. package_list = ckan.package_register_get() for pack in package_list: if i < 15: ckan.package_entity_get(pack) package_entity = ckan.last_message if package_entity.has_key('organization'): #print True title.append(package_entity['title']) org.append(package_entity['organization']['title']) i += 1 else: break for ti in title: nodos = ast.literal_eval('{"name":"' + ti.encode('utf-8') + '","group":"uno"}') out['nodes'].append(nodos) j = 0 for res in org: k = 0 for lov in org: if res == lov: links = ast.literal_eval('{"source":' + str(j) + ',"target":' + str(k) + ',"weight":1}') out['links'].append(links) print res, " ", j, ",", k k += 1 j += 1 os.getcwd() os.path.exists("profundidad_ckan") with open('profundidad_ckan' + '/static/profundidad_ckan/data.json', 'w') as fs: json.dump(out, fs) return render(request, "index.html", { 'out': out['nodes'], 'tam': len(package_list) })
def __init__(self, base_location, api_key=None, is_remote=True, ckan_version=2.2): self.ckan_target = ckanclient.CkanClient(base_location, api_key) self.ckanapi = ckanapi.RemoteCKAN( base_location, apikey=api_key, user_agent='CkanApiScript (+http://TBD)') '''TODO: supporter API LOCAL'''
def __init__(self): self.apikey = open(API_KEY_FILENAME).read() self.ckan = ckanclient.CkanClient(base_location=API_URL, api_key=self.apikey) self.package_list = self.ckan.package_register_get() self.started = True self.start_key = '' self.tags = [ u'политика', u'выборы', u'политические партии', u'финансы' ] self.package_keys = {'govbody': u'ЦИК России'} pass
def __init__(self): self.apikey = open(API_KEY_FILENAME).read() self.ckan = ckanclient.CkanClient(base_location=API_URL, api_key=self.apikey) self.package_list = self.ckan.package_register_get() self.started = True self.start_key = '' self.tags = [ u'минюст россии', u'статистика', ] self.package_keys = {'govbody': u'Минюст России'} pass
def __init__(self, ckan_host, store, talisuser, talispassword): api_key = None # ckan connection if not ckan_host.startswith('http://'): ckan_host = 'http://' + ckan_host ckan_host = ckan_host + '/api' self.ckan = ckanclient.CkanClient(base_location=ckan_host, api_key=api_key) # talis connection talis.TalisLogin.init(store, talisuser, talispassword) self._talis = talis.Talis()
def __init__(self): self.apikey = open(API_KEY_FILENAME).read() self.ckan = ckanclient.CkanClient(base_location=API_URL, api_key=self.apikey) self.package_list = self.ckan.package_register_get() self.started = True self.start_key = '' self.tags = [ u'фас россии', 'RSS', u'новости', u'официально', u'госсайты' ] self.feedtype = 'RSS' pass
def load_registry(url='http://iatiregistry.org/api'): import ckanclient transactions = [] registry = ckanclient.CkanClient(base_location=url) for pkg_name in registry.package_register_get(): pkg = registry.package_entity_get(pkg_name) for resource in pkg.get('resources', []): print resource.get('url') try: transactions.extend( load_file(resource.get('url'), {'registry_package': pkg_name})) except Exception, e: print "Failed:", e
def __init__(self): self.apikey = open(API_KEY_FILENAME).read() self.ckan = ckanclient.CkanClient(base_location=API_URL, api_key=self.apikey) self.package_list = self.ckan.package_register_get() self.started = True self.start_key = '' self.tags = [ u'ЕГЭ', u'экзамены', u'статистика', u'удмуртская республика' ] self.package_keys = { 'region_code': u'18', 'region': u'Удмуртская республика' } pass
def command(self): super(SchemaChecker, self)._load_config() context = self.create_context() data = { 'field_paths': defaultdict(int), 'broken_rules': defaultdict(dict), 'datasets_per_portal': defaultdict(set), 'invalid_datasets': 0, 'valid_datasets': 0 } if len(self.args) == 0: context = { 'model': model, 'session': model.Session, 'ignore_auth': True } validator = schema_checker.SchemaChecker() num_datasets = 0 for i, dataset in enumerate(iterate_local_datasets(context)): print 'Processing dataset %s' % i normalize_action_dataset(dataset) validator.process_record(dataset) num_datasets += 1 general = {'num_datasets': num_datasets} validator.redis_client.set('general', general) elif len(self.args) == 2 and self.args[0] == 'remote': endpoint = self.args[1] ckan = ckanclient.CkanClient(base_location=endpoint) rows = 1000 total = self.get_dataset_count(ckan) steps = int(ceil(total / float(rows))) for i in range(0, steps): if i == steps - 1: rows = total - (i * rows) datasets = self.get_datasets(ckan, rows, i) self.validate_datasets(datasets, data) self.write_validation_result(self.render_template(data))
def iterate_remote_datasets(endpoint, max_rows=1000): ckan = ckanclient.CkanClient(base_location=endpoint) print 'Retrieve total number of datasets' total = ckan.action('package_search', rows=1)['count'] steps = int(ceil(total / float(max_rows))) rows = max_rows for i in range(0, steps): if i == steps - 1: rows = total - (i * rows) datasets = (i * 1000) + 1 print 'Retrieve datasets %s - %s' % (datasets, datasets + rows - 1) records = ckan.action('package_search', rows=rows, start=rows * i) records = records['results'] for record in records: yield record
def dump_ckan_to_pickle(keyfile): #Connect [ckankeys, googlekeys] = read_keys(keyfile) fout = open("pickled_ckan_contents.pk1", "wb") ckan = ckanclient.CkanClient(base_location=ckankeys['url'] + 'api', api_key=ckankeys['apikey']) #tag list tag_list = ckan.tag_register_get() pickle.dump(tag_list, fout, -1) #force pickle to use highest protocol available #packages package_entities = {} package_list = ckan.package_register_get() print package_list for package_name in package_list: ckan.package_entity_get(package_name) package_entities[package_name] = ckan.last_message pickle.dump(package_entities, fout, -1) #groups groups = {} group_list = ckan.group_register_get() print group_list for group_name in group_list: groups[group_name] = ckan.group_entity_get(group_name) pickle.dump(groups, fout, -1) ###datasets ##datasets = {} ##dataset_list = ckan.dataset_register_get() ##for dataset_name in dataset_list: ## datasets[dataset_name] = ckan.dataset_entity_get(dataset_name) ##pickle.dump(datasets, fout, -1) fout.close() return ()
def update_dataset_version(): global args # Initialize ckan client ckan = ckanclient.CkanClient(base_location=ckan_api,api_key=ckan_api_key) # Create the name of the dataset on the CKAN instance dataset_id = ckan_dataset_prefix + args.catalog_dataset try: # Get the dataset dataset_entity = ckan.package_entity_get(dataset_id) # Increment the version number version = dataset_entity['version'] version = increment_minor_version(version) dataset_entity['version'] = version # Update the dataset ckan.package_entity_put(dataset_entity) except ckanclient.CkanApiNotFoundError: info(" Dataset " + dataset_id + " not found on OpenColorado")
import os, json import ckanclient # see https://github.com/okfn/ckanclient README # Get latest download URL from http://pypi.python.org/pypi/ckanclient#downloads --\/ # sudo easy_install http://pypi.python.org/packages/source/c/ckanclient/ckanclient-0.10.tar.gz # See also https://github.com/timrdf/DataFAQs/wiki/CKAN # section "Automatically publish dataset on CKAN" source = 'http://hub.healthdata.gov/api' target = 'http://aquarius.tw.rpi.edu/projects/healthdata/api' MIRROR = False # Modify target CKAN with listings from source CKAN. UPDATE = MIRROR and False # If a dataset already exists in target, update it. sourceCKAN = ckanclient.CkanClient(base_location=source) api_key = os.environ['X_CKAN_API_Key'] # api_key must be defined to POST/PUT. targetCKAN = ckanclient.CkanClient(base_location=target, api_key=api_key) indent = ' ' for name in sourceCKAN.package_register_get(): if name == 'hospital-compare': sourceCKAN.package_entity_get(name) # Get the dataset description. dataset = sourceCKAN.last_message altID = source.replace('/api', '') + '/dataset/' + dataset['id'] altName = source.replace('/api', '') + '/dataset/' + dataset['name'] dataset['extras']['prov_alternateOf'] = altName # Would like to assert two alternates, but their model is limiting.
# Get latest download URL from http://pypi.python.org/pypi/ckanclient#downloads --\/ # sudo easy_install http://pypi.python.org/packages/source/c/ckanclient/ckanclient-0.10.tar.gz # See also https://github.com/timrdf/DataFAQs/wiki/CKAN # section "Automatically publish dataset on CKAN" #source = 'http://hub.healthdata.gov/api' target = 'http://healthdata.tw.rpi.edu/hub/api' MIRROR = False # Modify target CKAN with listings from source CKAN. UPDATE = MIRROR and False # If a dataset already exists in target, update it. #sourceCKAN = ckanclient.CkanClient(base_location=target) api_key = os.environ['X_CKAN_API_Key'] # api_key must be defined to POST/PUT. print api_key ckan = ckanclient.CkanClient(base_location=target, api_key=api_key) indent = ' ' for name in ckan.package_register_get(): print name ckan.package_entity_get(name) # Get the dataset description. dataset = ckan.last_message resources = dict([(r['name'], r) for r in dataset['resources']]) if "Data Dictionary" not in resources and "Data Dictionary" in dataset[ 'extras']: print "datadict" ddURL = dataset['extras']['Data Dictionary'] ddFormat = ddURL.split(".")[-1] if len(ddFormat) > 5: ddFormat = None
s = dateutil.parser.parse(s) return s def shorten(longURL): result = None f = urllib.urlopen("http://tinyurl.com/api-create.php?url=%s" % longURL) try: result = f.read() finally: f.close() return result now = datetime.datetime.now() ckan = ckanclient.CkanClient(base_location='http://dati.trentino.it/api') package_list = ckan.package_register_get() with open('ckan_packages.csv', 'wb') as csvfile: csvoutput = csv.writer(csvfile, delimiter=';', quoting=csv.QUOTE_ALL) csvoutput.writerow([ "name", "author", "maintainer", "url", "metadata_created", "metadata_modified", "dayaftercreation" ]) for package in package_list: ckan.package_entity_get(package) package_entity = ckan.last_message message = "Pubblicato oggi il dataset %s %s #opendatatrentino" % ( package_entity['title'], shorten(package_entity['ckan_url'])) maintainer = package_entity['maintainer'] ckanurl = package_entity['ckan_url'] #name = package_entity('name')
def main(): pstat = { 'status': {}, 'text': {}, 'short': {}, } now = time.strftime("%Y-%m-%d %H:%M:%S") jid = os.getpid() ckanlistrequests = ['package_list', 'group_list', 'tag_list'] ## Get options and arguments args = get_args(ckanlistrequests) # Output instance OUT = Output(pstat, now, jid, args) logger = OUT.setup_custom_logger('root', args.verbose) ## Settings for CKAN client and API ckanapi3 = 'http://' + args.ckan + '/api/3' if PY2: ckan = ckanclient.CkanClient(ckanapi3) else: auth = '12345' ckan = CKAN_CLIENT(args.ckan, auth) ckan_limit = 500000 start = time.time() if args.request.endswith('list'): try: if args.request == 'community_list': action = 'group_list' else: action = args.request if PY2: answer = ckan.action(action, rows=ckan_limit) else: answer = ckan.action(action) except ckanclient.CkanApiError as e: print('\t\tError %s Supported list requests are %s.' % (e, ckanlistrequests)) sys.exit(1) ## print '|- The list of %ss :\n\t%s' % (args.request.split('_')[0],'\n\t'.join(answer).encode('utf8')) print('\n\t%s' % '\n\t'.join(answer).encode('utf8')) sys.exit(0) # create CKAN search pattern : ckan_pattern = '' sand = '' pattern = ' '.join(args.pattern) if (args.community): ckan_pattern += "groups:%s" % args.community sand = " AND " if (args.pattern): ckan_pattern += sand + pattern print(' | - Search\n\t|- in\t%s\n\t|- for\t%s\n' % (args.ckan, ckan_pattern)) if args.request == 'package_search': if PY2: answer = ckan.action('package_search', q=ckan_pattern, rows=ckan_limit) else: answer = ckan.action('package_search', {"q": ckan_pattern}) for key, value in answer.items(): logger.warning('answer has key %s' % key) if PY2: tcount = answer['count'] else: tcount = answer['result']['count'] print(' | - Results:\n\t|- %d records found in %d sec' % (tcount, time.time() - start)) # Read in B2FIND metadata schema and fields schemafile = '%s/mapfiles/b2find_schema.json' % (os.getcwd()) with open(schemafile, 'r') as f: b2findfields = json.loads(f.read(), object_pairs_hook=OrderedDict) if tcount > 0 and args.keys is not None: if len(args.keys) == 0: akeys = [] else: if args.keys[0] == 'B2FIND.*': akeys = OrderedDict(sorted(b2findfields.keys())) else: akeys = args.keys suppid = b2findfields.keys() fh = io.open(args.output, "w", encoding='utf8') record = {} totlist = [] count = {} count['id'] = 0 statc = {} for outt in akeys: if outt not in suppid: print(' [WARNING] Not supported key %s is removed' % outt) akeys.remove(outt) else: count[outt] = 0 statc[outt] = Counter() printfacets = '' if (len(akeys) > 0): printfacets = "and related facets %s " % ", ".join(akeys) print('\t|- IDs %sare written to %s ...' % (printfacets, args.output)) counter = 0 cstart = 0 oldperc = 0 start2 = time.time() while (cstart < tcount): if (cstart > 0): if PY2: answer = ckan.action('package_search', q=ckan_pattern, rows=ckan_limit, start=cstart) else: answer = ckan.action('package_search', { "q": ckan_pattern, "rows": ckan_limit, "start": cstart }) if PY2: if len(answer['results']) == 0: break #HEW-D else: ##HEW-D if len(answer['result']['results']) == 0 : ##HEW-D break # loop over found records if PY2: results = answer['results'] else: results = answer['result']['results'] for ds in results: #### answer['results']: counter += 1 logger.debug(' | %-4d | %-40s |' % (counter, ds['name'])) perc = int(counter * 100 / tcount) bartags = perc / 5 if perc % 10 == 0 and perc != oldperc: oldperc = perc print('\r\t[%-20s] %5d (%3d%%) in %d sec' % ('=' * int(bartags), counter, perc, time.time() - start2)) sys.stdout.flush() record['id'] = '%s' % (ds['name']) outline = record['id'] # loop over facets for facet in akeys: ##HEW-T print 'facet : %s' % facet ckanFacet = b2findfields[facet]["ckanName"] if ckanFacet in ds: ## CKAN default field if facet == 'Group': record[facet] = ds[ckanFacet][0]['display_name'] else: record[facet] = ds[ckanFacet] else: ## CKAN extra field ##HEW-T print 'ds extras %s' % ds['extras'] efacet = [e for e in ds['extras'] if e['key'] == facet] if efacet: ##HEW-T print 'rrrr %s effff %s' % (record[facet],efacet[0]['value']) record[facet] = efacet[0]['value'] else: record[facet] = 'N/A' if record[facet] is None: record[facet] = 'None' statc[facet][record[facet]] += 1 else: if not isinstance(record[facet], list): words = record[facet].split(';') else: words = record[facet] for word in words: if isinstance(word, dict): word = word['name'] statc[facet][word] += 1 if not (record[facet] == 'N/A' or record[facet] == 'Not Stated') and len(record[facet]) > 0: count[facet] += 1 outline += '\t | %-30s' % record[facet][:30] fh.write(outline + '\n') cstart += len(results) logger.warning('%d records done, %d in total' % (cstart, tcount)) fh.close() if len(akeys) > 0: statfh = io.open('stat_' + args.output, "w", encoding='utf8') ##print "\n|- Statistics :\n\t| %-16s | %-10s | %6s |\n\t%s " % ('Facet','Occurence','%',"-" * 50) print('|- Statistics written to file %s' % 'stat_' + args.output) statline = unicode("") for outt in akeys: statline += "| %-16s\n\t| %-15s | %-6d | %3d |\n" % ( outt, '-Total-', count[outt], int(count[outt] * 100 / tcount)) for word in statc[outt].most_common(10): statline += '\t| %-15s | %-6d | %3d |\n' % ( word[0][:100], word[1], int(word[1] * 100 / tcount)) statfh.write(statline) statfh.close()
Sara-Jayne Farmer 2013 ''' import ckanclient import pickle #Connect fin = open("../key.txt", 'rb') key = fin.read().strip() fin.close() fout = open("pickled_ckan_contents.pk1", "wb") ckan = ckanclient.CkanClient( base_location='http://ec2-54-228-69-142.eu-west-1.compute.amazonaws.com/api', api_key=key) #tag list tag_list = ckan.tag_register_get() pickle.dump(tag_list, fout, -1) #force pickle to use highest protocol available #packages package_entities = {} package_list = ckan.package_register_get() print package_list for package_name in package_list: ckan.package_entity_get(package_name) package_entities[package_name] = ckan.last_message pickle.dump(package_entities, fout, -1)
def __init__(self): self.apikey = open("apikey.txt").read() self.ckan = ckanclient.CkanClient(base_location=API_URL, api_key=self.apikey) self.package_list = self.ckan.package_register_get() pass