def anotateshowonresults(query_filtered, user, isadvanced, query_reference): # Operations print "start annotating database appearing on results" c = CoreEngine() results = c.search_fingerprint(query_filtered) for result in results: fingerprint_id = result['id'] if not fingerprint_id.startswith("questionaire_"): try: fp = Fingerprint.objects.get(fingerprint_hash=fingerprint_id) print "processing " + str(fp) if isadvanced: fingerprintreturn = FingerprintReturnedAdvanced( fingerprint=fp, searcher=user, query_reference=query_reference) fingerprintreturn.save() else: fingerprintreturn = FingerprintReturnedSimple( fingerprint=fp, searcher=user, query_reference=query_reference) fingerprintreturn.save() except Fingerprint.DoesNotExist: print fingerprint_id + ' doesnt exist on db' print "ends annotation of databases appearing on results" return 0
def get_query_from_more_like_this(request, doc_id, type, maxx=100): try: eprofile = EmifProfile.objects.get(user=request.user) except EmifProfile.DoesNotExist: print "-- ERROR: Couldn't get emif profile for user" if eprofile.restricted == True: query = restriction(request.user) c = CoreEngine() #results = c.search_fingerprint(query, sort=sort, rows=rows, start=start) results = c.more_like_this(doc_id, type, maxx=maxx) if len(results)>0: queryString = "id:(" for r in results: if "id" in r: queryString = queryString + r["id"]+"^"+str(r["score"])+ " " queryString = queryString + ")" else: queryString = None ## PY SOLR IS STUPID, OTHERWISE THIS WOULD BE AVOIDED database_name = "" results = c.search_fingerprint("id:"+doc_id , start=0, rows=1, fl="database_name_t") for r in results: if "database_name_t" in r: database_name = r["database_name_t"] return (queryString, database_name)
def anotateshowonresults(query_filtered, user, isadvanced, query_reference): # Operations print "start annotating database appearing on results" c = CoreEngine() results = c.search_fingerprint(query_filtered) for result in results: fingerprint_id = result['id'] if not fingerprint_id.startswith("questionaire_"): try: fp = Fingerprint.objects.get(fingerprint_hash=fingerprint_id) print "processing "+str(fp) if isadvanced: fingerprintreturn = FingerprintReturnedAdvanced(fingerprint=fp, searcher=user, query_reference=query_reference) fingerprintreturn.save() else: fingerprintreturn = FingerprintReturnedSimple(fingerprint=fp, searcher=user, query_reference=query_reference) fingerprintreturn.save() except Fingerprint.DoesNotExist: print fingerprint_id + ' doesnt exist on db' print "ends annotation of databases appearing on results" return 0
def get(self, request, *args, **kw): database_name = request.GET['name'] c = CoreEngine() results = c.search_fingerprint("database_name_t:\"" + database_name + '"') contain = len(results) != 0 # Dirty hack: check if the database name is really equals if contain: contain = False for r in results: try: if database_name.lower().strip( ) == r['database_name_t'].lower().strip(): contain = True break except: pass result = {'contains': contain} response = Response(result, status=status.HTTP_200_OK) return response
def generate_statistics_from_multiple_choice(question_slug): choices = Choice.objects.filter(question=q) total_values = calculate_total_values() c = CoreEngine() for choice in choices: query = "question_slug:" + "choice.value" results = c.search_fingerprint(query) number_results = len(results)
def reindexQuestionnaires(): print "Reindexing questionnaires on solr" c = CoreEngine() c.reindex_quest_solr() cache.delete('reindexingQuestionnaires')
def merge_highlight_results(query, resultHighlights): c = CoreEngine() h = {} h["results"] = resultHighlights if query: qresults = c.highlight_questions(query) h["questions"] = qresults.highlighting return h
def querylog_created(sender, **kwargs): query = kwargs['instance'] c = CoreEngine(core='suggestions') qdict = query.__dict__ cleanqdict = {} cleanqdict['query'] = qdict['query'].strip().lower() cleanqdict['user_id'] = qdict['user_id'] cleanqdict['id'] = qdict['id'] cleanqdict['created_date'] = qdict['created_date'] c.index_fingerprint(cleanqdict)
def get(self, request, *args, **kw): self.__subscribed = [] self.__mlt_fused = {} if request.user.is_authenticated(): ordered = cache.get('recommendations_' + str(request.user.id)) if ordered == None: maxx = 100 subscriptions = FingerprintSubscription.active().filter( user=request.user) # first we generate the list of already subscribed databases, since they wont appear on suggestions for subscription in subscriptions: self.__subscribed.append( subscription.fingerprint.fingerprint_hash) c = CoreEngine() for subscription in subscriptions: fingerprint = subscription.fingerprint this_mlt = c.more_like_this(fingerprint.fingerprint_hash, fingerprint.questionnaire.slug, maxx=maxx) self.__merge(this_mlt) ordered = sorted(self.__mlt_fused.values(), reverse=True, key=lambda x: x['score'])[:10] for entry in ordered: try: fingerprint = Fingerprint.valid().get( fingerprint_hash=entry['id']) except Fingerprint.DoesNotExist: continue entry['name'] = fingerprint.findName() entry[ 'href'] = 'fingerprint/' + fingerprint.fingerprint_hash + '/1/' cache.set('recommendations_' + str(request.user.id), ordered, 720) response = Response({'mlt': ordered}, status=status.HTTP_200_OK) else: response = Response({}, status=status.HTTP_403_FORBIDDEN) return response
def index_all(): indexes = [] c = CoreEngine() fingerprints = Fingerprint.valid() c.deleteQuery('type_t:*') for fingerprint in fingerprints: print "-- Indexing fingerprint hash "+str(fingerprint.fingerprint_hash) indexes.append(fingerprint.indexFingerprint(batch_mode=True)) print "-- Committing to solr" c.index_fingerprints(indexes)
def calculate_databases_per_location(): users = EmifProfile.objects.all() c = CoreEngine() contries = [] for u in users: # Count number of DB's for each user query = "subject_id_t:" + u.user.id results = c.search_fingerprint(query) # Number of dbs number_of_dbs = len(results) if contries.has_key(u.contry.name): contries[u.contry.name] = contries[u.contry.name] + number_of_dbs else: contries[u.contry.name] = number_of_dbs
def get_databases_from_solr_with_highlight(request, query="*:*", sort="", rows=100, start=0, hlfl="*"): try: eprofile = EmifProfile.objects.get(user=request.user) except EmifProfile.DoesNotExist: print "-- ERROR: Couldn't get emif profile for user" if eprofile.restricted == True: query += restriction(request.user) c = CoreEngine() results = c.search_highlight(query, sort=sort, rows=rows, start=start, hlfl=hlfl) list_databases = get_databases_process_results(results) return (list_databases,results.hits, results.highlighting)
def validate_fingerprint(user, fingerprintID): """ Verify if fingerprint belongs to given user :param user: :param fingerprintID: """ result = False c = CoreEngine() results = c.search_fingerprint('user_t:' + '"' + user.username + '"') for r in results: if fingerprintID == r['id']: result = True break return result
def get_databases_from_solr_v2(request, query="*:*", sort="", rows=100, start=0, fl='',post_process=None): try: eprofile = EmifProfile.objects.get(user=request.user) except EmifProfile.DoesNotExist: print "-- ERROR: Couldn't get emif profile for user" c = CoreEngine() if eprofile.restricted == True: query += restriction(request.user) results = c.search_fingerprint(query, sort=sort, rows=rows, start=start, fl=fl) list_databases = get_databases_process_results(results) if post_process: list_databases = post_process(results, list_databases) return (list_databases, results.hits)
def get(self, request, *args, **kw): self.__subscribed = [] self.__mlt_fused = {} if request.user.is_authenticated(): ordered = cache.get('recommendations_'+str(request.user.id)) if ordered == None: maxx = 100 subscriptions = FingerprintSubscription.active().filter(user=request.user) # first we generate the list of already subscribed databases, since they wont appear on suggestions for subscription in subscriptions: self.__subscribed.append(subscription.fingerprint.fingerprint_hash) c = CoreEngine() for subscription in subscriptions: fingerprint = subscription.fingerprint this_mlt = c.more_like_this(fingerprint.fingerprint_hash, fingerprint.questionnaire.slug, maxx=maxx) self.__merge(this_mlt) ordered = sorted(self.__mlt_fused.values(), reverse=True, key=lambda x:x['score'])[:10] for entry in ordered: try: fingerprint = Fingerprint.valid().get(fingerprint_hash=entry['id']) except Fingerprint.DoesNotExist: continue entry['name'] = fingerprint.findName() entry['href'] = 'fingerprint/'+fingerprint.fingerprint_hash+'/1/' cache.set('recommendations_'+str(request.user.id), ordered, 720) response = Response({'mlt': ordered}, status=status.HTTP_200_OK) else: response = Response({}, status=status.HTTP_403_FORBIDDEN) return response
def get_database_from_id(id): c = CoreEngine() results = c.search_fingerprint("id:"+id) database_aux = None for r in results: try: database_aux = Database() #print r['id'] #print r['created_t'] #print r['database_name_t'] database_aux.id = r['id'] database_aux.date = convert_date(r['created_t']) database_aux.name = r['database_name_t'] break except: pass return database_aux
def get_database_from_id(id): c = CoreEngine() results = c.search_fingerprint("id:" + id) database_aux = None for r in results: try: database_aux = Database() #print r['id'] #print r['created_t'] #print r['database_name_t'] database_aux.id = r['id'] database_aux.date = convert_date(r['created_t']) database_aux.name = r['database_name_t'] break except: pass return database_aux
def indexFingerprint(self, batch_mode=False): def is_if_yes_no(question): return question.type in 'choice-yesno' or \ question.type in 'choice-yesnocomment' or \ question.type in 'choice-yesnodontknow' d = {} # Get parameters that are only on fingerprint # type_t d['id'] = self.fingerprint_hash d['type_t'] = self.questionnaire.slug d['date_last_modification_t'] = self.last_modification.strftime( '%Y-%m-%d %H:%M:%S.%f') d['created_t'] = self.created.strftime('%Y-%m-%d %H:%M:%S.%f') d['user_t'] = self.unique_users_string() d['percentage_d'] = self.fill adicional_text = "" # Add answers answers = Answer.objects.filter(fingerprint_id=self) for answer in answers: question = answer.question # We try to get permissions preferences for this question permissions = self.getPermissions( QuestionSet.objects.get(id=question.questionset.id)) slug = question.slug_fk.slug1 if permissions.allow_indexing or slug == 'database_name': setProperFields(d, question, slug, answer.data) if is_if_yes_no(question) and 'yes' in answer.data: adicional_text += question.text + " " if answer.comment != None: d['comment_question_' + slug + '_t'] = answer.comment d['text_t'] = generateFreeText(d) + " " + adicional_text d['mlt_t'] = generateMltText(d) if batch_mode: return d else: print "-- Indexing unique fingerprint hash " + str( self.fingerprint_hash) c = CoreEngine() results = c.search_fingerprint("id:" + self.fingerprint_hash) if len(results) == 1: # Delete old entry if any c.delete(results.docs[0]['id']) c.index_fingerprint_as_json(d)
def fix_wrong_names_on_observational(): c = CoreEngine() documents = c.search_fingerprint("type_t:observationaldatasources") for doc in documents: old = None try: del doc["_version_"] old = doc["If_yes_:_repeated_measurements__t"] del doc["If_yes_:_repeated_measurements__t"] except: pass try: old = doc["If_yes_:_repeated_measurements_t"] del doc["If_yes_:_repeated_measurements_t"] except: pass if old != None: print "Replacing" doc["If_yes__repeated_measurements_t"] = old c.update(doc)
def fix_wrong_names_on_observational(): c = CoreEngine() documents = c.search_fingerprint("type_t:observationaldatasources") for doc in documents: old = None try: del doc['_version_'] old = doc['If_yes_:_repeated_measurements__t'] del doc['If_yes_:_repeated_measurements__t'] except: pass try: old = doc['If_yes_:_repeated_measurements_t'] del doc['If_yes_:_repeated_measurements_t'] except: pass if old != None: print "Replacing" doc['If_yes__repeated_measurements_t'] = old c.update(doc)
def __indexQueryLog(self): c = CoreEngine(core='suggestions') queries = QueryLog.objects.all() print "-- Indexing "+str(len(queries))+" simple queries" temp_array = [] for query in queries: qdict = query.__dict__ del qdict['_state'] qdict['query'] = qdict['query'].strip().lower() temp_array.append(qdict) if len(temp_array) == 200: c.index_fingerprints(temp_array) temp_array = [] if len(temp_array) > 0: c.index_fingerprints(temp_array)
def get_database_from_id_with_tlv(db): c = CoreEngine() results = c.search_fingerprint('id:' + db.id) class Tag: tag = '' value = '' list_values = [] blacklist = ['created_t', 'type_t', '_version_'] name = "Not defined" for result in results: questionnaire_slug = result['type_t'] q_main = Questionnaire.objects.filter(slug=questionnaire_slug)[0] for k in result: if k in blacklist: continue t = Tag() results = Slugs.objects.filter( slug1=k, question__questionset__questionnaire=q_main.pk) if len(results) > 0: text = results[0].description else: text = k info = text[:75] + (text[75:] and '..') t.tag = info value = clean_value(str(result[k])) value = value[:75] + (value[75:] and '..') t.value = value if k == "database_name_t": name = t.value list_values.append(t) break db.fields = list_values return db
def get_database_from_id_with_tlv(db): c = CoreEngine() results = c.search_fingerprint('id:'+db.id) class Tag: tag = '' value = '' list_values = [] blacklist = ['created_t', 'type_t', '_version_'] name = "Not defined" for result in results: questionnaire_slug = result['type_t'] q_main = Questionnaire.objects.filter(slug=questionnaire_slug)[0] for k in result: if k in blacklist: continue t = Tag() results = Slugs.objects.filter(slug1=k, question__questionset__questionnaire=q_main.pk) if len(results)>0: text = results[0].description else: text = k info = text[:75] + (text[75:] and '..') t.tag = info value = clean_value(str(result[k])) value = value[:75] + (value[75:] and '..') t.value = value if k== "database_name_t": name = t.value list_values.append(t) break db.fields = list_values return db
def indexFingerprint(self, batch_mode=False): def is_if_yes_no(question): return question.type in 'choice-yesno' or \ question.type in 'choice-yesnocomment' or \ question.type in 'choice-yesnodontknow' d = {} # Get parameters that are only on fingerprint # type_t d['id']=self.fingerprint_hash d['type_t'] = self.questionnaire.slug d['date_last_modification_t'] = self.last_modification.strftime('%Y-%m-%d %H:%M:%S.%f') d['created_t'] = self.created.strftime('%Y-%m-%d %H:%M:%S.%f') d['user_t'] = self.unique_users_string() d['percentage_d'] = self.fill adicional_text = "" # Add answers answers = Answer.objects.filter(fingerprint_id=self) for answer in answers: question = answer.question # We try to get permissions preferences for this question permissions = self.getPermissions(QuestionSet.objects.get(id=question.questionset.id)) slug = question.slug_fk.slug1 if permissions.allow_indexing or slug == 'database_name': setProperFields(d, question, slug, answer.data) if is_if_yes_no(question) and 'yes' in answer.data: adicional_text += question.text+ " " if answer.comment != None: d['comment_question_'+slug+'_t'] = answer.comment d['text_t']= generateFreeText(d) + " " + adicional_text d['mlt_t'] = generateMltText(d) if batch_mode: return d else: print "-- Indexing unique fingerprint hash "+str(self.fingerprint_hash) c = CoreEngine() results = c.search_fingerprint("id:"+self.fingerprint_hash) if len(results) == 1: # Delete old entry if any c.delete(results.docs[0]['id']) c.index_fingerprint_as_json(d)
def get(self, request, *args, **kw): database_name = request.GET['name'] c = CoreEngine() results = c.search_fingerprint("database_name_t:\"" +database_name+'"') contain = len(results) != 0 # Dirty hack: check if the database name is really equals if contain: contain = False for r in results: try: if database_name.lower().strip() == r['database_name_t'].lower().strip(): contain = True break except: pass result = {'contains': contain} response = Response(result, status=status.HTTP_200_OK) return response
def index_all(): indexes = [] c = CoreEngine() fingerprints = Fingerprint.valid() c.deleteQuery('type_t:*') for fingerprint in fingerprints: print "-- Indexing fingerprint hash " + str( fingerprint.fingerprint_hash) indexes.append(fingerprint.indexFingerprint(batch_mode=True)) print "-- Committing to solr" c.index_fingerprints(indexes)
class Statistic(object): def __init__(self, question): self.question = question self.search = CoreEngine() def get_percentage(self): #slug = self.question.slug slug = self.question.slug_fk.slug1 # print slug type_id = self.question.questionset.questionnaire.slug if slug is None: return None results = self.search.search_fingerprint( slug + "_t:* AND type_t:" + type_id, 0, 100, slug + "_t") values = [] # print "RESULTS: " + str(results.__len__()) if results: values_aux = dict() for r in results: for s in r.values()[0].split("#"): if s: try: if s in values_aux.keys(): values_aux[s] += 1 else: values_aux[s] = 1 except: raise values.append(values_aux) return values def tag_cloud(self): # http://www.jason-palmer.com/2011/05/creating-a-tag-cloud-with-solr-and-php/ # solr = query = "(.................. )" # solr.search([solrquery],facet = 'on' ,** {'facet.field' : ['fieldname']}) pass
def convertFieldsOnSolr(fields, new_type): c = CoreEngine() suffix = assert_suffix(new_type) if suffix == None: print '-- Invalid new type, process cancelled.' return False documents = c.search_fingerprint("*:*") print "Started converting fields on all databases, number of databases: " + str( len(documents)) for document in documents: doc = document del doc['_version_'] for field in fields: try: value = doc[str(field) + '_t'] value = convert_value(value, new_type) if value == None: print "-- Couldn't convert field " + str( field) + " for database " + doc['id'] + ". " + str( doc[str(field) + '_t']) + " is not of type " + str(new_type) else: doc[str(field) + suffix] = value except KeyError: print "-- " + str( doc['id']) + ' doesn\'t have the field ' + str( field) + ', ignoring this field on this database.' c.delete(doc['id']) c.index_fingerprint_as_json(doc) print "Done converting fields on all databases" return True
def __indexQueryLog(self): c = CoreEngine(core='suggestions') queries = QueryLog.objects.all() print "-- Indexing " + str(len(queries)) + " simple queries" temp_array = [] for query in queries: qdict = query.__dict__ del qdict['_state'] qdict['query'] = qdict['query'].strip().lower() temp_array.append(qdict) if len(temp_array) == 200: c.index_fingerprints(temp_array) temp_array = [] if len(temp_array) > 0: c.index_fingerprints(temp_array)
def convertFieldsOnSolr(fields, new_type): c = CoreEngine() suffix = assert_suffix(new_type) if suffix == None: print '-- Invalid new type, process cancelled.' return False documents = c.search_fingerprint("*:*") print "Started converting fields on all databases, number of databases: "+str(len(documents)) for document in documents: doc = document del doc['_version_'] for field in fields: try: value = doc[str(field)+'_t'] value = convert_value(value, new_type) if value == None: print "-- Couldn't convert field "+str(field)+" for database " + doc['id'] + ". "+str(doc[str(field)+'_t'])+" is not of type " + str(new_type) else: doc[str(field)+suffix] = value except KeyError: print "-- "+str(doc['id'])+' doesn\'t have the field '+str(field)+', ignoring this field on this database.' c.delete(doc['id']) c.index_fingerprint_as_json(doc) print "Done converting fields on all databases" return True
def __init__(self, question): self.question = question self.search = CoreEngine()
def old_publications_to_comments(): # ars, gepard, hsd csd lpd, ipci, maas, pedianet, thin greenlist = [ '768185357ce7e4e0aeae6d2e69f6d7e0', '45b7ccb3aca47bc37f9bd82504f09b3b', '52d4981701f0126d947014244744efea', '54d8384917b21fb7928ba72a1e72326b', '7b128593480b53409ac83c9582badbb7', '5d8f88d91f1dc3e2806d825f61260b76', '7a205644571c31bc50965c68d7565622' ] this_questionnaire = None try: this_questionnaire = Questionnaire.objects.get( slug="observationaldatasources") except Questionnaire.DoesNotExist: print "-- Cant find observational data sources questionnaire" return try: this_question = Question.objects.get( slug_fk__slug1='Publications', questionset__questionnaire=this_questionnaire) print "\n----------------------------------------------" print "Start looking through publications " print "-----------------------------------------------" #Find all questionnarie types c = CoreEngine() documents = c.search_fingerprint("type_t:observationaldatasources") for document in documents: if document['id'] not in greenlist: print " Processing id " + str(document.get('id')) publications_comment = document.get( 'list_of_peer_reviewed_papers_based_on_your_data_ba_t', "") try: this_fingerprint = Fingerprint.objects.get( fingerprint_hash=document['id']) try: this_answer = Answer.objects.get( fingerprint_id=this_fingerprint, question=this_question) print this_answer.comment this_answer.comment = publications_comment this_answer.save() except Answer.DoesNotExist: print "--- Answer does not exist, creating new answer." print publications_comment this_answer = Answer(question=this_question, data="", comment=publications_comment, fingerprint_id=this_fingerprint) this_answer.save() # after save must reindex to update solr too #this_fingerprint.indexFingerprint() except Fingerprint.DoesNotExist: print "--- ERROR: Fingerprint with id " + str( document.get('id')) + 'does not exist' print "-----------------------------------------------" print " End" print "-----------------------------------------------" except Question.DoesNotExist: print "--- ERROR: Theres no question with slug Publications_t"
def unindexFingerprint(fingerprint_id): c = CoreEngine() c.delete(fingerprint_id)
def add_questionnaires(): print "\n----------------------------------------------" print "Start adding " print "-----------------------------------------------" #Find all questionnarie types questionnaires = Questionnaire.objects.all() c = CoreEngine() documents = c.search_fingerprint("*:*") # for each solr document for doc in documents: # get id and question type quest_type = None this_id = doc['id'] if "questionnaire_ " in this_id: print "Passing " + this_id continue try: quest_type = doc['type_t'] except: print "Found database " + this_id + " on solr without type_t" continue # get fingerprint reference fingerprint = None try: fingerprint = Fingerprint.objects.get( fingerprint_hash=this_id) except Fingerprint.DoesNotExist: fingerprint = Fingerprint(fingerprint_hash=this_id) quest = get_questionnaire(questionnaires, quest_type) fingerprint.owner = User.objects.get(id=1) if quest != None: fingerprint.questionnaire = quest print fingerprint.fingerprint_hash + " - " + fingerprint.questionnaire.slug fingerprint.save() else: print "-- ERROR: cant find quest_type " + quest_type # we need to check if there are dummy fingerprints ids on the table fingerprints = Fingerprint.objects.all() for f in fingerprints: if f.questionnaire == None: print "Deleting, questionnaire not on " + f.fingerprint_hash f.delete() print "-----------------------------------------------" print " End" print "-----------------------------------------------"
def forwards(self, orm): c = CoreEngine() # get all documents and add them to answer with the following fields: def getFingerprint(hash): try: fingerprint = Fingerprint.objects.get(fingerprint_hash=hash) return fingerprint except Fingerprint.DoesNotExist: return None def getQuestion(questions, slug): #print slug for q in questions: if q.slug_fk.slug1 == slug[:-2]: return q return None def fix_wrong_names_on_observational(): c = CoreEngine() documents = c.search_fingerprint("type_t:observationaldatasources") for doc in documents: old = None try: del doc['_version_'] old = doc['If_yes_:_repeated_measurements__t'] del doc['If_yes_:_repeated_measurements__t'] except: pass try: old = doc['If_yes_:_repeated_measurements_t'] del doc['If_yes_:_repeated_measurements_t'] except: pass if old != None: print "Replacing" doc['If_yes__repeated_measurements_t'] = old c.update(doc) def convertAnswerToJson(question, value): ''' TYPES : TO DO open open-button open-upload-image open-textfield choice-yesno choice-yesnocomment choice-yesnodontknow datepicker email url comment choice choice-freeform choice-multiple choice-multiple-freeform choice-multiple-freeform-options range timeperiod custom publication numeric sameas ''' return value; def getOwners(owners_string): owners_split = owners_string.split(' \\ ') owners = [] for o in owners_split: try: user = User.objects.get(username=o) owners.append(user) except User.DoesNotExist: print "Couldnt find user "+o if len(owners) == 0: return (None, None) elif len(owners) == 1: return (owners[0], [owners[0]]) else: return (owners[0], owners[1:]) # First we need to fix wrongly named fields on observational_data_sources fix_wrong_names_on_observational() documents = c.search_fingerprint("*:*") for doc in documents: ignorelist = ['id', 'type_t', '_version_', 'text_t', 'date_last_modification_t', 'user_t', 'created_t', 'list_of_peer_reviewed_papers_based_on_your_data_ba_t'] this_id = doc['id'] print "-- Processing ---------------------- " + this_id + "\n" if "questionaire_" not in this_id: fingerprint_id = getFingerprint(this_id) date_last_modification = None try: date_last_modification = doc['date_last_modification_t'] except: date_last_modification = "1970-01-01 00:00:00.000000" date_create = doc['created_t'] user = doc['user_t'] # update parameters from data from solr if date_last_modification and date_create and user: fingerprint_id.last_modification = datetime.datetime.strptime(date_last_modification, '%Y-%m-%d %H:%M:%S.%f') fingerprint_id.created = datetime.datetime.strptime(date_create, '%Y-%m-%d %H:%M:%S.%f') (owner, shared) = getOwners(user) fingerprint_id.owner = owner for share in shared: fingerprint_id.shared.add(share) fingerprint_id.save() questions_possible = fingerprint_id.questionnaire.questions() for key in doc: if key not in ignorelist and not key.startswith('comment_question_') and key.endswith('_t'): question = getQuestion(questions_possible, key) data = convertAnswerToJson(question, doc[key]) comment = None try: comment = doc['comment_question_'+key] except: pass if question == None: print "EMPTY KEY ON:"+key try: existing_answer = Answer.objects.get(question=question, fingerprint_id=fingerprint_id) existing_answer.data = data existing_answer.comment = comment existing_answer.save() except Answer.DoesNotExist: ans = Answer(question=question, data=data, comment=comment, fingerprint_id=fingerprint_id) ans.save() print "---------------------------------------- " print " "
def forwards(self, orm): c = CoreEngine() # get all documents and add them to answer with the following fields: def getFingerprint(hash): try: fingerprint = Fingerprint.objects.get(fingerprint_hash=hash) return fingerprint except Fingerprint.DoesNotExist: return None def getQuestion(questions, slug): # print slug for q in questions: if q.slug_fk.slug1 == slug[:-2]: return q return None def fix_wrong_names_on_observational(): c = CoreEngine() documents = c.search_fingerprint("type_t:observationaldatasources") for doc in documents: old = None try: del doc["_version_"] old = doc["If_yes_:_repeated_measurements__t"] del doc["If_yes_:_repeated_measurements__t"] except: pass try: old = doc["If_yes_:_repeated_measurements_t"] del doc["If_yes_:_repeated_measurements_t"] except: pass if old != None: print "Replacing" doc["If_yes__repeated_measurements_t"] = old c.update(doc) def convertAnswerToJson(question, value): """ TYPES : TO DO open open-button open-upload-image open-textfield choice-yesno choice-yesnocomment choice-yesnodontknow datepicker email url comment choice choice-freeform choice-multiple choice-multiple-freeform choice-multiple-freeform-options range timeperiod custom publication numeric sameas """ return value def getOwners(owners_string): owners_split = owners_string.split(" \\ ") owners = [] for o in owners_split: try: user = User.objects.get(username=o) owners.append(user) except User.DoesNotExist: print "Couldnt find user " + o if len(owners) == 0: return (None, None) elif len(owners) == 1: return (owners[0], [owners[0]]) else: return (owners[0], owners[1:]) # First we need to fix wrongly named fields on observational_data_sources fix_wrong_names_on_observational() documents = c.search_fingerprint("*:*") for doc in documents: ignorelist = [ "id", "type_t", "_version_", "text_t", "date_last_modification_t", "user_t", "created_t", "list_of_peer_reviewed_papers_based_on_your_data_ba_t", ] this_id = doc["id"] print "-- Processing ---------------------- " + this_id + "\n" if "questionaire_" not in this_id: fingerprint_id = getFingerprint(this_id) date_last_modification = None try: date_last_modification = doc["date_last_modification_t"] except: date_last_modification = "1970-01-01 00:00:00.000000" date_create = doc["created_t"] user = doc["user_t"] # update parameters from data from solr if date_last_modification and date_create and user: fingerprint_id.last_modification = datetime.datetime.strptime( date_last_modification, "%Y-%m-%d %H:%M:%S.%f" ) fingerprint_id.created = datetime.datetime.strptime(date_create, "%Y-%m-%d %H:%M:%S.%f") (owner, shared) = getOwners(user) fingerprint_id.owner = owner for share in shared: fingerprint_id.shared.add(share) fingerprint_id.save() questions_possible = fingerprint_id.questionnaire.questions() for key in doc: if key not in ignorelist and not key.startswith("comment_question_") and key.endswith("_t"): question = getQuestion(questions_possible, key) data = convertAnswerToJson(question, doc[key]) comment = None try: comment = doc["comment_question_" + key] except: pass if question == None: print "EMPTY KEY ON:" + key try: existing_answer = Answer.objects.get(question=question, fingerprint_id=fingerprint_id) existing_answer.data = data existing_answer.comment = comment existing_answer.save() except Answer.DoesNotExist: ans = Answer(question=question, data=data, comment=comment, fingerprint_id=fingerprint_id) ans.save() print "---------------------------------------- " print " "
def add_questionnaires(): print "\n----------------------------------------------" print "Start adding " print "-----------------------------------------------" #Find all questionnarie types questionnaires = Questionnaire.objects.all() c = CoreEngine() documents = c.search_fingerprint("*:*") # for each solr document for doc in documents: # get id and question type quest_type = None this_id = doc['id'] if "questionnaire_ " in this_id: print "Passing "+this_id continue try: quest_type = doc['type_t'] except: print "Found database "+this_id+" on solr without type_t" continue # get fingerprint reference fingerprint = None try: fingerprint = Fingerprint.objects.get(fingerprint_hash=this_id) except Fingerprint.DoesNotExist: fingerprint = Fingerprint(fingerprint_hash=this_id) quest = get_questionnaire(questionnaires, quest_type) fingerprint.owner = User.objects.get(id=1) if quest != None : fingerprint.questionnaire = quest print fingerprint.fingerprint_hash + " - " +fingerprint.questionnaire.slug fingerprint.save() else: print "-- ERROR: cant find quest_type "+quest_type # we need to check if there are dummy fingerprints ids on the table fingerprints = Fingerprint.objects.all() for f in fingerprints: if f.questionnaire == None: print "Deleting, questionnaire not on "+f.fingerprint_hash f.delete() print "-----------------------------------------------" print " End" print "-----------------------------------------------"
def old_publications_to_comments(): # ars, gepard, hsd csd lpd, ipci, maas, pedianet, thin greenlist = ['768185357ce7e4e0aeae6d2e69f6d7e0', '45b7ccb3aca47bc37f9bd82504f09b3b', '52d4981701f0126d947014244744efea', '54d8384917b21fb7928ba72a1e72326b', '7b128593480b53409ac83c9582badbb7', '5d8f88d91f1dc3e2806d825f61260b76', '7a205644571c31bc50965c68d7565622'] this_questionnaire=None try: this_questionnaire = Questionnaire.objects.get(slug="observationaldatasources") except Questionnaire.DoesNotExist: print "-- Cant find observational data sources questionnaire" return try: this_question = Question.objects.get(slug_fk__slug1 ='Publications', questionset__questionnaire=this_questionnaire) print "\n----------------------------------------------" print "Start looking through publications " print "-----------------------------------------------" #Find all questionnarie types c = CoreEngine() documents = c.search_fingerprint("type_t:observationaldatasources") for document in documents: if document['id'] not in greenlist: print " Processing id " + str(document.get('id')) publications_comment = document.get('list_of_peer_reviewed_papers_based_on_your_data_ba_t', "") try: this_fingerprint = Fingerprint.objects.get(fingerprint_hash=document['id']) try: this_answer = Answer.objects.get(fingerprint_id=this_fingerprint, question=this_question) print this_answer.comment this_answer.comment = publications_comment this_answer.save() except Answer.DoesNotExist: print "--- Answer does not exist, creating new answer." print publications_comment this_answer = Answer(question=this_question, data="", comment=publications_comment, fingerprint_id=this_fingerprint) this_answer.save() # after save must reindex to update solr too #this_fingerprint.indexFingerprint() except Fingerprint.DoesNotExist: print "--- ERROR: Fingerprint with id " + str(document.get('id')) + 'does not exist' print "-----------------------------------------------" print " End" print "-----------------------------------------------" except Question.DoesNotExist: print "--- ERROR: Theres no question with slug Publications_t"
def validate_and_save(user, data): """ Verify if json structure is correct and create/update values of fingerprint :param user: :param data: """ result = {} fields_text = "" # Verify if json structure is valid if 'fingerprintID' in data.keys(): fingerprintID = data['fingerprintID'] # Verify if fingerprint belongs to user if validate_fingerprint(user, fingerprintID): if 'values' in data.keys(): for f in data['values']: # Check if field already exists if FingerprintAPI.objects.filter(fingerprintID=fingerprintID, field=f): try: fp = FingerprintAPI.objects.get(fingerprintID=fingerprintID, field=f) if str(fp.value) != str(data['values'][f]): # Update value fp.value += ' ' + data['values'][f] fields_text = data['values'][f] fp.save() result[f] = "Updated successfully" else: result[f] = "Not updated" except: # print "Erro a atualizar o registo" result[f] = "Error to update field" # If field does not exist else: try: fingerprint = FingerprintAPI(fingerprintID=fingerprintID, field=f, value=data['values'][f], user=user) # Create new field-value fields_text += ' ' + data['values'][f] fingerprint.save() result[f] = "Created successfully" except: # print "Erro a criar o novo registo" result[f] = "Error to create new field" # No values key in JSON structure else: # print "Não tem valores" result['error'] = "No values detected" else: result['error'] = "Error find FingerprintID" else: # print "Não tem nenhuma chave fingerprint" result['error'] = "No fingerprintID detected" c = CoreEngine() results = c.search_fingerprint("id:" + fingerprintID) _aux = None for r in results: _aux = r break if (_aux!=None): _aux['text_t'] = _aux['text_t'] + fields_text c.index_fingerprint_as_json(_aux) return result
def post(self, request, *args, **kw): sortmap = { 'name': 'database_name_t', 'type_name': 'type_t', 'id': 'id', 'last_activity': 'date_last_modification_t', 'date': 'created_t', } #defaults rows = 20 offset = 0 sort_field = 'name' sort_order = 'asc' schema = None sortFilter = None if request.user.is_authenticated() and ( request.user.is_staff or request.user.emif_profile.has_group('exporters') or request.user.emif_profile.has_group('developers')): search = request.DATA.get('search', None) crows = request.DATA.get('rows', None) coffset = request.DATA.get('offset', None) csortf = request.DATA.get('sort_field', None) csorto = request.DATA.get('sort_order', None) schema = request.DATA.get('schema', None) if search == None or len(search.strip()) == 0: return Response( { 'status': 'Authenticated', 'method': 'POST', 'Error': 'Must specify a search text filter' }, status=status.HTTP_400_BAD_REQUEST) if crows != None: rows = crows if coffset != None: offset = coffset if csortf != None: sort_field = csortf if csorto != None: sort_order = csorto if sort_order != 'asc' and sort_order != 'desc': return Response( { 'status': 'Authenticated', 'method': 'POST', 'Error': 'Available sort orders are "asc" and "desc"' }, status=status.HTTP_400_BAD_REQUEST) try: sortFilter = sortmap[sort_field] + " " + sort_order except: return Response( { 'status': 'Authenticated', 'method': 'POST', 'Error': 'sort_field can only be name, type_name, id, last_activity or date.' }, status=status.HTTP_400_BAD_REQUEST) filter_value = '' if schema != None: filter_value = 'AND type_t: "%s"' % escapeSolrArg(schema) c = CoreEngine() (list_databases, hits) = get_databases_from_solr_v2( request, 'text_t:"%s" %s' % (escapeSolrArg(search), filter_value), sort=sortFilter, rows=rows, start=offset) return Response( { 'link': { 'status': 'Authenticated', 'method': 'POST' }, 'filters': { 'search': search, 'rows': rows, 'offset': offset }, 'result': { 'count': len(list_databases), 'databases': [d.__dict__ for d in list_databases] } }, status=status.HTTP_200_OK) return Response({ 'status': 'NOT authenticated', 'method': 'POST' }, status=status.HTTP_401_UNAUTHORIZED)
# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from fingerprint.models import Fingerprint from searchengine.search_indexes import CoreEngine import sys print '\nbegin import of fingerprint hashes to database...\n' c = CoreEngine() results = c.search_fingerprint('*:*') for result in results: fingerprint_id = result['id'] print fingerprint_id if not fingerprint_id.startswith("questionaire_"): try: fp = Fingerprint(fingerprint_hash=fingerprint_id) fp.save() except: print fingerprint_id + ' already in DB' print '\nend!'