def cmd_create(self, vocabulary_name, has_relations=False, *args, **kwargs): """ Create vocabulary syntax: create vocabulary_name [has_relations,default=no] """ Vocabulary.create(vocabulary_name, bool(has_relations))
def cmd_rename_term(self, vocabulary_name, old_term, new_term, *args, **kwargs): """ Rename occurences of vocabulary old_term to new_term syntax: rename_term vocabulary_name old_term new_term """ v = Vocabulary.get(vocabulary_name) count = v.rename_term_in_extras(old_term, new_term) print(u'Updated {} datasets.'.format(count))
def cmd_list(self, *args, **kwargs): """ List vocabularies """ print(_('Vocabularies:')) for voc in Vocabulary.get_all(): print(_('vocabulary name: {}').format(voc.name)) print(_(' has relations: {}').format(voc.has_relations)) print() print(_('[end of vocabularies list]'))
def fao_datatype(value, context): DEFAULT_DATATYPE = config.get(CONFIG_FAO_DATATYPE) if not value and DEFAULT_DATATYPE: return DEFAULT_DATATYPE try: v = Vocabulary.get(Vocabulary.VOCABULARY_DATATYPE) if not v.valid_term(value): raise ValueError(_("Term not valid")) return value except Exception, err: raise Invalid(_("Invalid datatype value: {}: {}").format(value, err))
def test_vocabulary_create(self): """ Test vocabulary command items """ cli = VocabularyCommands('vocabulary') cli.cmd_create('test') resp = Vocabulary.get('test') self.assertIsNotNone(resp) self.assertEqual(resp.name, 'test') cli.cmd_list() usage = cli.usage self.assertIsNotNone(usage)
def fao_m49_regions(key, flattened_data, errors, context): # we use extended api to update data dict in-place # this way we avoid various errors in harvesters, # which don't populate extras properly value = flattened_data[key] if isinstance(value, Missing) or value is None: flattened_data[key] = [] else: value = _deserialize_from_array(value) validated = [] try: v = Vocabulary.get(Vocabulary.VOCABULARY_M49_REGIONS) for term in value: if not v.valid_term(term): errors[key].append( ValueError(_("Term not valid: {}").format(term))) break validated.append(term) flattened_data[key] = validated except Exception, err: errors[key].append( Invalid(_("Invalid m49 regions: {} {}").format(value, err)))
def cmd_import_agrovoc(self, in_file, *args, **kwargs): """ Import AGROVOC terms from RDF file syntax: import_agrovoc rdf_file """ OFFERED_LANGS = (config.get('ckan.locales_offered') or 'en es fr de it').lower().split(' ') header = ('parent', 'term',) + tuple(('lang:{}'.format(L) for L in OFFERED_LANGS)) + ( 'property:parents',) rdata = [] rdata.append(header) g = Graph() g.parse(in_file, format='nt') for o, p, s in g.triples((None, RDF.type, SKOS.Concept)): cid = str(o).split('/')[-1] row = {'term': cid} for label_r in g.triples((o, SKOS.prefLabel, None)): label = label_r[2] if not label.language in OFFERED_LANGS: continue row['lang:{}'.format(label.language)] = label.value iparents = g.triples((o, SKOS.broader, None)) parents = [] for to in iparents: parent = to[-1] row['parent'] = str(parent).split('/')[-1] parents.append(row['parent']) if not parents: row['parent'] = None row['property:parents'] = ','.join(parents) row_data = [] for col in header: if col.startswith('lang'): val = row.get(col) or row.get('lang:en') or row.get('lang:fr') else: val = row[col] row_data.append(val.encode('utf-8') if isinstance(val, unicode) else val) if row['parent']: rdata.append(row_data) else: # top-level should be first rdata.insert(1, row_data) log.info('AGROVOC terms parsed: %s', len(rdata)) csvdata = StringIO() w = csv.writer(csvdata) w.writerows(rdata) csvdata.seek(0) voc_name = Vocabulary.VOCABULARY_AGROVOC try: voc = Vocabulary.get(voc_name) except ValueError: voc = Vocabulary.create(voc_name, has_relations=True) count = load_vocabulary(voc_name, csvdata) log.info('AGROVOC terms imported: %s', count) cleanup_stats = find_unused_terms(voc_name, 'fao_agrovoc') if cleanup_stats['datasets']: print("Following dataset have terms not present in vocabulary:") for dname, tvals in sorted(cleanup_stats['datasets'].items()): print(' dataset', dname,':', ','.join(tvals))
def cmd_import_m49(self, in_file, *args, **kwargs): """ Convert xlsx file with m49 data into vocabulary syntax: import_m49 in_file """ wb = load_workbook(in_file) sheet = wb.active IDX_COUNTRY_M49 = 1 IDX_COUNTRY_ISO3 = 2 IDX_COUNTRY_NAME = 3 IDX_L1_M49 = 8 IDX_L1_NAME = 9 #IDX_L2_CODE = 10 #IDX_L2_NAME = 11 level1_cells = (IDX_L1_M49, IDX_L1_NAME,) countries_cells = (IDX_COUNTRY_M49, IDX_COUNTRY_NAME, IDX_COUNTRY_ISO3) countries_parent_cell = IDX_L1_M49 countries = {} # key: id , value : row level1 = {} # key: id , value : row for row in sheet.iter_rows(min_row=6): for indexes, parent_idx, container in ((countries_cells, countries_parent_cell, countries,), (level1_cells, None, level1,)): # ( parent, data..) rdata = [] rdata.append(row[parent_idx-1].value if parent_idx else None) for idx in indexes: value = row[idx-1].value try: value = value.replace('(M49)', '').replace('(MDG=M49)', '') if value else value except AttributeError: # prolly it has been parsed as a number pass rdata.append(value.encode('utf-8') if isinstance(value, unicode) else value) if not any(rdata): continue id = rdata[1] container[id] = rdata # l1 and l2 may be repeated csvdata = StringIO() w = csv.writer(csvdata) w.writerow(['parent', 'term', 'property:country_code', 'lang:en', 'lang:fr', 'lang:es']) for id,r in level1.iteritems(): # print('L1 ROW {}: {}'.format(id,r)) w.writerow([r[0]] + [r[1]] + [""] + [r[2]] + [r[2]+" [FR]"] + [r[2]+" [ES]"] ) for id,r in countries.iteritems(): # print('CNTY ROW {}: {}'.format(id,r)) w.writerow([r[0]] + [r[1]] + [r[3]] + [r[2]] + [r[2]+" [FR]"] + [r[2]+" [ES]"] ) csvdata.seek(0) voc_name = Vocabulary.VOCABULARY_M49_REGIONS try: voc = Vocabulary.get(voc_name) except ValueError: voc = Vocabulary.create(voc_name, has_relations=True) count = load_vocabulary(voc_name, csvdata) print(_('loaded {} terms from {} to {} vocabulary').format(count, in_file, voc_name))