def _initialize_columns(context, col_name, ds, total, reference_resource): # Get current datastore's fields current_fields = ds.get('fields') fields = current_fields # Get reference dataset's fields that should be stored in datastore reference_field_names = lucene_access.getFields(reference_resource, True) if isinstance(reference_field_names, list): # Get fields as they supposed to be stored in the datastore final_fields = [] final_fields.append({'id': reference_field_names[0], 'type': 'text'}) final_fields.append({'id': u"int__score", 'type': 'text'}) final_fields.append({'id': u"int__checked_flag", 'type': 'boolean'}) final_fields.append({'id': u"int__all_results", 'type': 'text'}) for field in reference_field_names: if field != reference_field_names[0]: final_fields.append({'id': field, 'type': 'text'}) # Check that all final_fields already exist in the datastore datastore_recreation_needed = False for final_field in final_fields: exists = False for current_field in current_fields: if final_field['id'] == current_field['id']: exists = True break if exists == False: datastore_recreation_needed = True break if datastore_recreation_needed == False: return # Drop and recreate datastore table p.toolkit.get_action('datastore_delete')(context, {'resource_id': ds['resource_id'], 'force':True}) # Update fields with datastore_create new_ds = p.toolkit.get_action('datastore_create')(context, { 'resource_id': ds.get('resource_id'), 'force':True, 'allow_update_with_id':True, 'fields': final_fields #'records':[{col_name:''}] }) return final_fields else: # It carries -1 value as an error code return reference_field_names
def _interlink_column(context, res, col_name, original_ds, new_ds, reference, ref_fields): res_id = original_ds.get('resource_id') total = original_ds.get('total') columns = json.loads(res.get('interlinking_columns_status','{}')) # The interlinked column is marked with the reference resource with which it is interlinked. for k,v in columns.iteritems(): if k == col_name: columns.update({k:reference}) columns = json.dumps(columns) original_res = p.toolkit.get_action('resource_show')(context, {'id': res.get('interlinking_parent_id')}) original_res['interlinked_column'] = col_name original_res = p.toolkit.get_action('resource_update')(context, original_res) res = p.toolkit.get_action('resource_show')(context, res) res['interlinking_resource'] = True res['interlinking_columns_status'] = columns res['interlinking_status'] = 'undergoing' res['reference_fields'] = json.dumps(ref_fields) res = p.toolkit.get_action('resource_update')(context, res) STEP = 100 offset = 0 for k in range(0,int(ceil(total/float(STEP)))): offset = k*STEP recs = p.toolkit.get_action('datastore_search')(context, { 'resource_id':res_id, 'offset': offset, 'limit': STEP, 'sort':'_id'}).get('records') nrecs = [] for rec in recs: original_term = rec.get(col_name) suggestions = lucene_access.search(original_term, reference, 'search') if isinstance(suggestions, int): return -1 # If any suggestions were returned if len(suggestions['records']) > 0: # The first field is the field on which the search was run search_field = suggestions['fields'][0] if len(suggestions['records']) > 0: best_suggestion = suggestions['records'][0] for suggestion in suggestions['records']: if suggestion['scoreField'] > best_suggestion['scoreField']: best_suggestion = suggestion nrec = {'_id': rec.get('_id'), search_field: best_suggestion[search_field], 'int__score': best_suggestion['scoreField'], 'int__checked_flag': False, 'int__all_results': json.dumps(suggestions)} for field in suggestions['fields']: if field != search_field and field != 'scoreField': nrec[field] = best_suggestion[field] nrecs.append(nrec) # No suggestions were returned else: real_fields = lucene_access.getFields(reference, False) if isinstance(real_fields, list): suggestions = { "fields": real_fields, "records": [], } search_field = real_fields[0] nrec = {'_id': rec.get('_id'), search_field: "", 'int__score': "", 'int__checked_flag': False, 'int__all_results': json.dumps(suggestions)} for field in suggestions['fields']: if field != search_field and field != 'scoreField': nrec[field] = "" nrecs.append(nrec) else: return -1 ds = p.toolkit.get_action('datastore_upsert')(context, { 'resource_id': new_ds.get('resource_id'), 'allow_update_with_id':True, 'force': True, 'records': nrecs }) offset=offset+STEP return new_ds