def _load_one_csv_file(self, name): path, csv_name = os.path.split(name) assert csv_name.endswith('.csv'), csv_name resource_name = csv_name[:-4] print resource_name chromo = get_chromo(resource_name) dataset_type = chromo['dataset_type'] method = 'upsert' if chromo.get('datastore_primary_key') else 'insert' lc = LocalCKAN() for org_name, records in csv_data_batch(name, chromo): results = lc.action.package_search(q='type:%s organization:%s' % (dataset_type, org_name), rows=2)['results'] if not results: print 'type:%s organization:%s not found!' % (dataset_type, org_name) return 1 if len(results) > 1: print 'type:%s organization:%s multiple found!' % ( dataset_type, org_name) return 1 for r in results[0]['resources']: if r['name'] == resource_name: break else: print 'type:%s organization:%s missing resource:%s' % ( dataset_type, org_name, resource_name) return 1 print '-', org_name, len(records) lc.action.datastore_upsert(method=method, resource_id=r['id'], records=records) return 0
def rebuild(command_name, csv_files=None, solr_url=None, strict=True): """ Implement rebuild command :param csv_file: path to .csv file for input :type csv_file: str :return: Nothing :rtype: None """ clear_index(command_name, solr_url, False) conn = solr_connection(command_name, solr_url) lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ':' prev_org = None unmatched = None firstpart, filename = os.path.split(csv_file) assert filename.endswith('.csv') resource_name = filename[:-4] chromo = get_chromo(resource_name) geno = get_geno(chromo['dataset_type']) for org_id, records in csv_data_batch(csv_file, chromo, strict=strict): records = [ dict((k, safe_for_solr(v)) for k, v in row_dict.items()) for row_dict in records ] if org_id != prev_org: unmatched = None try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) unmatched = _update_records(records, org_detail, conn, resource_name, unmatched) else: for org in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org) unmatched = None for resource_name, records in data_batch(org_detail['id'], lc, command_name): unmatched = _update_records(records, org_detail, conn, resource_name, unmatched) count += len(records) print org, count print "commit" conn.commit()
def rebuild(command_name, csv_files=None, solr_url=None): """ Implement rebuild command :param csv_file: path to .csv file for input :type csv_file: str :return: Nothing :rtype: None """ clear_index(command_name, solr_url, False) conn = solr_connection(command_name, solr_url) lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ':' prev_org = None unmatched = None firstpart, filename = os.path.split(csv_file) assert filename.endswith('.csv') resource_name = filename[:-4] chromo = get_chromo(resource_name) geno = get_geno(chromo['dataset_type']) for org_id, records in csv_data_batch(csv_file, chromo): records = [dict((k, safe_for_solr(v)) for k, v in row_dict.items()) for row_dict in records] if org_id != prev_org: unmatched = None try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) unmatched = _update_records( records, org_detail, conn, resource_name, unmatched) else: for org in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org) unmatched = None for resource_name, records in data_batch(org_detail['id'], lc, command_name): unmatched = _update_records( records, org_detail, conn, resource_name, unmatched) count += len(records) print org, count print "commit" conn.commit()
def _rebuild(self, csv_files=None, solr_url=None, strict=True): """ Implement rebuild command :param csv_files: sequence of paths to .csv files for input :type csv_files: sequence of str :return: Nothing :rtype: None """ self._clear_index(solr_url, False) conn = solr_connection('ati', solr_url) lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ':' firstpart, filename = os.path.split(csv_file) assert filename.endswith('.csv') resource_name = filename[:-4] chromo = get_chromo(resource_name) geno = get_geno(chromo['dataset_type']) assert geno.get('target_dataset') == TARGET_DATASET for org_id, records in csv_data_batch(csv_file, chromo, strict=strict): records = [ dict((k, safe_for_solr(v)) for k, v in row_dict.items()) for row_dict in records ] try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) _update_records(records, org_detail, conn) else: for org_id in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org_id) for resource_name, records in data_batch( org_detail['id'], lc, TARGET_DATASET): _update_records(records, org_detail, conn) count += len(records) print org_id, count print "commit" conn.commit()
def _load_one_csv_file(self, name): path, csv_name = os.path.split(name) assert csv_name.endswith('.csv'), csv_name resource_name = csv_name[:-4] print resource_name chromo = get_chromo(resource_name) dataset_type = chromo['dataset_type'] method = 'upsert' if chromo.get('datastore_primary_key') else 'insert' lc = LocalCKAN() for org_name, records in csv_data_batch(name, chromo): results = lc.action.package_search( q='type:%s organization:%s' % (dataset_type, org_name), include_private=True, rows=2)['results'] if not results: print 'type:%s organization:%s not found!' % ( dataset_type, org_name) return 1 if len(results) > 1: print 'type:%s organization:%s multiple found!' % ( dataset_type, org_name) return 1 for res in results[0]['resources']: if res['name'] == resource_name: break else: print 'type:%s organization:%s missing resource:%s' % ( dataset_type, org_name, resource_name) return 1 # convert list values to lists list_fields = [f['datastore_id'] for f in chromo['fields'] if f['datastore_type'] == '_text'] if list_fields: for r in records: for k in list_fields: if not r[k]: r[k] = [] else: r[k] = r[k].split(',') print '-', org_name, len(records) lc.action.datastore_upsert( method=method, resource_id=res['id'], records=records) return 0
def _rebuild(self, csv_files=None, solr_url=None): """ Implement rebuild command :param csv_files: sequence of paths to .csv files for input :type csv_files: sequence of str :return: Nothing :rtype: None """ self._clear_index(solr_url, False) conn = solr_connection('ati', solr_url) lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ':' firstpart, filename = os.path.split(csv_file) assert filename.endswith('.csv') resource_name = filename[:-4] chromo = get_chromo(resource_name) geno = get_geno(chromo['dataset_type']) assert geno.get('target_dataset') == TARGET_DATASET for org_id, records in csv_data_batch(csv_file, chromo): records = [dict((k, safe_for_solr(v)) for k, v in row_dict.items()) for row_dict in records] try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) _update_records(records, org_detail, conn) else: for org_id in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org_id) for resource_name, records in data_batch(org_detail['id'], lc, TARGET_DATASET): _update_records(records, org_detail, conn) count += len(records) print org_id, count print "commit" conn.commit()
def _load_one_csv_file(self, name): path, csv_name = os.path.split(name) assert csv_name.endswith('.csv'), csv_name resource_name = csv_name[:-4] print resource_name chromo = get_chromo(resource_name) dataset_type = chromo['dataset_type'] method = 'upsert' if chromo.get('datastore_primary_key') else 'insert' lc = LocalCKAN() for org_name, records in csv_data_batch(name, chromo): results = lc.action.package_search( q='type:%s organization:%s' % (dataset_type, org_name), rows=2)['results'] if not results: print 'type:%s organization:%s not found!' % ( dataset_type, org_name) return 1 if len(results) > 1: print 'type:%s organization:%s multiple found!' % ( dataset_type, org_name) return 1 for r in results[0]['resources']: if r['name'] == resource_name: break else: print 'type:%s organization:%s missing resource:%s' % ( dataset_type, org_name, resource_name) return 1 print '-', org_name, len(records) lc.action.datastore_upsert( method=method, resource_id=r['id'], records=records) return 0