def interlinking_star_search(context, data_dict): ''' It searches lucene with a '*' wildcard. The wildcard is positioned at the end of \ the search string. ''' schema = context.get('schema', dsschema.interlinking_star_search_schema()) data_dict, errors = _validate(data_dict, schema, context) if errors: raise p.toolkit.ValidationError(errors) term = data_dict.get('term') reference_resource = data_dict.get('reference_resource') terms = lucene_access.search(term, reference_resource, 'like') if isinstance(terms, int): return '' return terms
def _interlink_column(context, res, col_name, original_ds, new_ds, reference, ref_fields): res_id = original_ds.get('resource_id') total = original_ds.get('total') columns = json.loads(res.get('interlinking_columns_status','{}')) # The interlinked column is marked with the reference resource with which it is interlinked. for k,v in columns.iteritems(): if k == col_name: columns.update({k:reference}) columns = json.dumps(columns) original_res = p.toolkit.get_action('resource_show')(context, {'id': res.get('interlinking_parent_id')}) original_res['interlinked_column'] = col_name original_res = p.toolkit.get_action('resource_update')(context, original_res) res = p.toolkit.get_action('resource_show')(context, res) res['interlinking_resource'] = True res['interlinking_columns_status'] = columns res['interlinking_status'] = 'undergoing' res['reference_fields'] = json.dumps(ref_fields) res = p.toolkit.get_action('resource_update')(context, res) STEP = 100 offset = 0 for k in range(0,int(ceil(total/float(STEP)))): offset = k*STEP recs = p.toolkit.get_action('datastore_search')(context, { 'resource_id':res_id, 'offset': offset, 'limit': STEP, 'sort':'_id'}).get('records') nrecs = [] for rec in recs: original_term = rec.get(col_name) suggestions = lucene_access.search(original_term, reference, 'search') if isinstance(suggestions, int): return -1 # If any suggestions were returned if len(suggestions['records']) > 0: # The first field is the field on which the search was run search_field = suggestions['fields'][0] if len(suggestions['records']) > 0: best_suggestion = suggestions['records'][0] for suggestion in suggestions['records']: if suggestion['scoreField'] > best_suggestion['scoreField']: best_suggestion = suggestion nrec = {'_id': rec.get('_id'), search_field: best_suggestion[search_field], 'int__score': best_suggestion['scoreField'], 'int__checked_flag': False, 'int__all_results': json.dumps(suggestions)} for field in suggestions['fields']: if field != search_field and field != 'scoreField': nrec[field] = best_suggestion[field] nrecs.append(nrec) # No suggestions were returned else: real_fields = lucene_access.getFields(reference, False) if isinstance(real_fields, list): suggestions = { "fields": real_fields, "records": [], } search_field = real_fields[0] nrec = {'_id': rec.get('_id'), search_field: "", 'int__score': "", 'int__checked_flag': False, 'int__all_results': json.dumps(suggestions)} for field in suggestions['fields']: if field != search_field and field != 'scoreField': nrec[field] = "" nrecs.append(nrec) else: return -1 ds = p.toolkit.get_action('datastore_upsert')(context, { 'resource_id': new_ds.get('resource_id'), 'allow_update_with_id':True, 'force': True, 'records': nrecs }) offset=offset+STEP return new_ds