def upload_file(): if request.method == 'POST': f = request.files['zipfile'] f.save(secure_filename(f.filename)) #job_desc=request.form.get("jobdesc") #num_rank=request.form.get("rank") #req_details=[job_desc,num_rank] jd_skills=request.form.get("skills") c_sep_skills=jd_skills.split(',') final_skills=[] for skill in c_sep_skills: skill=skill.strip() skill=skill.replace(" " ,"_") final_skills.append(skill) reader=PDFReader() allresumes=reader.extract_resumes(f.filename) resumes_scores = {} #Initialize the headers by writing in to the csv, later we just append it. #row columns: id, filename, name, score, rank with open('cv_list.csv','w') as csvFile: writer=csv.writer(csvFile) writer.writerow(["ID","filename","Name","Score","Rank"]) csvFile.close() for index in range(len(allresumes[0])): analysed_list=reader.analyze_resume(allresumes[0][index], allresumes[1][index], index + 1, final_skills) resume_score = analysed_list[0] name=analysed_list[1] resumes_scores[allresumes[1][index]] = resume_score row=[index+1,allresumes[1][index],name,resume_score] with open('cv_list.csv','a') as csvFile: writer=csv.writer(csvFile) writer.writerow(row) csvFile.close() ranker=Ranker() ranked_list=ranker.rank_csv() return render_template('printresult.html', scores=resumes_scores, title="View Scores",skills=final_skills, ranked_list=ranked_list)
def mutate(cls, _, args, context, info): query = Learners.get_query(context) learner = query.filter(LearnerModel.email == args.get('email')).first() if learner is None: learner = LearnerModel(email=args.get('email'), timestamp=args.get('ts'), role=args.get('role'), availability=args.get('availability'), org_level=args.get('org_level'), org=args.get('org'), interest=args.get('interest'), change_track=args.get('change_track'), outside_org=args.get('outside_org'), requests=args.get('requests'), identify_as=args.get('identify_as'), fullname=args.get('full_name'), manager_email=args.get('manager_email'), #Sent_Wecome_Email__date_=args.get('welcome_email_date'), #Sent_Manager_Approval_Email__date_=args.get('manager_approval_email_date'), #Manager_Approved__date_=args.get('manager_approved_date'), #Mentee_Limit=args.get('mentee_limit'), #Gender=args.get('gender'), ) db_session.add(learner) else: learner.timestamp=args.get('ts') learner.role=args.get('role') learner.availability=args.get('availability') learner.org_level=args.get('org_level') learner.org=args.get('org') learner.interest=args.get('interest') learner.change_track=args.get('change_track') learner.outside_org=args.get('outside_org') learner.requests=args.get('requests') learner.identify_as=args.get('identify_as') learner.fullname=args.get('full_name') learner.manager_email=args.get('manager_email') db_session.commit() # on successful commit, compute ranking and update or overwrite in rankings table rankings = LearnerMentorRankingsModel(learner=args.get('email'), mentor='testmentor2', ranking=NaiveRanker.rank_mentors(args.get('email'))) db_session.add(rankings) db_session.commit() # return top 3 rankings for learner ok = True return createOrUpdateLearner(learner=learner, rankings=rankings, ok=ok)
def __init__(self, path_wieght: str, path_data: str, similarity, path_feat: str): self.path_weight = path_wieght self.path_data = path_data self.similarity = similarity self.flickr_dataset = ImageFlickrFeatures( path_feat) #dbs/features_contrastive.db # self.ranking = ranking imagenet_net = ResNet34() sketches_net = ResNet34() # print("Adapting output layers...") siamese_net = SiameseNetwork(sketches_net, imagenet_net) siamese_net.load_state_dict( torch.load(self.path_weight) ) # r'C:\Users\aleja\Desktop\Tareas\Reconocimiento Virtual con Deep Learning\T2\best_SiameseNetwork_contrastive.pth' self.net = siamese_net self.ranking = Ranker(self.path_data, image_dataset_features=self.flickr_dataset, feature_extractor=self.net, similarity_fn=self.similarity)
def mutate(cls, _, args, context, info): query = Mentors.get_query(context) mentor = query.filter(MentorModel.email == args.get('email')).first() if mentor is None: mentor = MentorModel(email=args.get('email'), timestamp=args.get('ts'), role=args.get('role'), availability=args.get('availability'), org_level=args.get('org_level'), org=args.get('org'), expertise=args.get('expertise'), outside_org=args.get('outside_org'), requests=args.get('requests'), identify_as=args.get('identify_as'), fullname=args.get('full_name'), manager_email=args.get('manager_email'), #Sent_Wecome_Email__date_=args.get('welcome_email_date'), #Sent_Manager_Approval_Email__date_=args.get('manager_approval_email_date'), #Manager_Approved__date_=args.get('manager_approved_date'), #Mentee_Limit=args.get('mentee_limit'), #Gender=args.get('gender'), ) db_session.add(mentor) else: mentor.timestamp=args.get('ts') mentor.role=args.get('role') mentor.availability=args.get('availability') mentor.org_level=args.get('org_level') mentor.org=args.get('org') mentor.expertise=args.get('expertise') mentor.outside_org=args.get('outside_org') mentor.requests=args.get('requests') mentor.identify_as=args.get('identify_as') mentor.fullname=args.get('full_name') mentor.manager_email=args.get('manager_email') db_session.commit() # on successful commit, compute ranking and update or overwrite in rankings table rankings = MentorLearnerRankingsModel(mentor=args.get('email'), learner='testlearner2', ranking=NaiveRanker.rank_learners(args.get('email'))) db_session.add(rankings) db_session.commit() ok = True return createOrUpdateMentor(mentor=mentor, rankings=rankings, ok=ok)
def evaluate(self, query: str, options: dict, ranker: Ranker, callback: Callable[[dict], Any]) -> None: """ Evaluates the given query, doing N-out-of-M ranked retrieval. I.e., for a supplied query having M terms, a document is considered to be a match if it contains at least N <= M of those terms. The matching documents are ranked by the supplied ranker, and only the "best" matches are returned to the client via the supplied callback function. The client can supply a dictionary of options that controls this query evaluation process: The value of N is inferred from the query via the "recall_threshold" (float) option, and the maximum number of documents to return to the client is controlled via the "hit_count" (int) option. """ # lager en iterator per term i query query = self._inverted_index.get_terms(query) qcounter = Counter(query) uniqe_terms = tuple(qcounter.keys()) multi = tuple(qcounter.values()) sieve = Sieve(options["hit_count"]) iter_list = [] query2 = [] for t in uniqe_terms: query2.append(t) list_iter = self._inverted_index.get_postings_iterator(t) iter_list.append(list_iter) # beregner antall terms fra query som må finnes i teksten recall_threshold = options["recall_threshold"] min_treff = max(1.0, math.floor(recall_threshold * len(iter_list))) # lager en liste over første element i alle iteratorene peek = [] for n in range(len(iter_list)): peek.append(next(iter_list[n], None)) antall_iter = len(iter_list) def finn_minste(): # finner doc med lavest id i peek minste = peek[0] i = 1 while minste is None: minste = peek[i] i += 1 if i >= len(peek): break for i in range(len(peek)): if peek[i] is not None: if minste.document_id > peek[i].document_id: minste = peek[i] return minste while antall_iter >= min_treff: min_doc = finn_minste() if min_doc is None: break # sjekk om vi tilfredstiller recall threshold for laveste docID ranker.reset(min_doc.document_id) antall_treff = 0 for n in range(len(peek)): if peek[n] is not None: if peek[n].document_id == min_doc.document_id: antall_treff += 1 ranker.update(uniqe_terms[n], multi[n], peek[n]) # evaluer ranking og kast til sieve if antall_treff >= min_treff: sieve.sift(ranker.evaluate(), min_doc.document_id) # fjerner dokument fra listene for n in range(len(iter_list)): if peek[n] is not None: if peek[n].document_id == min_doc.document_id: peek[n] = next(iter_list[n], None) if peek[n] is None: antall_iter -= 1 # rinse and repeat iteren = sieve.winners() lista = [] for f in iteren: lista.append(f) callback({ "score": int(f[0]), "document": self._corpus.get_document(f[1]) })
def evaluate(self, query: str, options: dict, ranker: Ranker, callback: Callable[[dict], Any]) -> None: """ Evaluates the given query, doing N-out-of-M ranked retrieval. I.e., for a supplied query having M terms, a document is considered to be a match if it contains at least N <= M of those terms. The matching documents are ranked by the supplied ranker, and only the "best" matches are returned to the client via the supplied callback function. The client can supply a dictionary of options that controls this query evaluation process: The value of N is inferred from the query via the "match_threshold" (float) option, and the maximum number of documents to return to the client is controlled via the "hit_count" (int) option. The callback function supplied by the client will receive a dictionary having the keys "score" (float) and "document" (Document). """ # Print verbose debug information? debug = options.get("debug", False) # Produce the query terms. We must use the same string processing here as we used when # building up the inverted index. Some terms might be duplicated (e.g., as in the query # "to be or not to be"). query_terms = self._inverted_index.get_terms(query) unique_query_terms = [ (term, count) for (term, count) in Counter(query_terms).items() ] # Get the posting lists for the unique query terms. posting_lists = [ self._inverted_index[term] for (term, _) in unique_query_terms ] # We require that at least N of the M query terms are present in the document, # for the document to be considered part of the result set. What should the minimum # value of N be? # TODO: Take multiplicity into account, and not just uniqueness. match_threshold = max(0.0, min(1.0, options.get("match_threshold", 0.5))) required_minimum = max( 1, min(len(unique_query_terms), int(match_threshold * len(unique_query_terms)))) # When traversing the posting lists using document-at-a-time traversal, we need to keep track # of where we are in each of the posting lists. Initially, all the cursors "point to" the first entry # in each posting list. Keep track of which posting lists that remain to be fully traversed. all_cursors = [next(p, None) for p in posting_lists] remaining_cursor_ids = [ i for i in range(len(all_cursors)) if all_cursors[i] ] # We're doing ranked retrieval. Assess relevance scores per document as we go along, as we're doing # document-at-a-time traversal. Keep track of the K highest-scoring documents. sieve = Sieve(max(1, min(100, options.get("hit_count", 10)))) # We're doing at least N-of-M matching. As we reach the end of the posting lists, we can abort when # the number of non-exhausted lists drops below the required minimum N. while len(remaining_cursor_ids) >= required_minimum: # The posting lists are sorted by the document identifiers in ascending order. Define the # "frontier" as the subset of non-exhausted posting lists that mention the lowest document # identifier. In a sense, if we imagine scanning the posting lists from left to right, the # frontier is the subset that has the "leftmost" cursors. # TODO: This can easily be done in a single pass over the remaining lists. document_id = min( [all_cursors[i].document_id for i in remaining_cursor_ids]) frontier_cursor_ids = [ i for i in remaining_cursor_ids if all_cursors[i].document_id == document_id ] # The number of elements on the "frontier" needs to be at least N. Otherwise, these documents # don't contain enough of the query terms, and aren't part of the result set. if len(frontier_cursor_ids) >= required_minimum: ranker.reset(document_id) for i in frontier_cursor_ids: ranker.update(unique_query_terms[i][0], unique_query_terms[i][1], all_cursors[i]) score = ranker.evaluate() sieve.sift(score, document_id) if debug: print("*** MATCH") print("document =", self._corpus[document_id]) print( "matches =", { unique_query_terms[i][0]: all_cursors[i] for i in frontier_cursor_ids }) print("score =", score) # Move along the cursors on the frontier. The cursors not on the frontier remain where they # are. We may or may not reach the end of some posting lists when we advance, so the set of # remaining non-exhausted lists might shrink. for i in frontier_cursor_ids: all_cursors[i] = next(posting_lists[i], None) remaining_cursor_ids = [ i for i in range(len(all_cursors)) if all_cursors[i] ] # Alert the client about the best-matching documents, using the supplied callback function. # Emit documents sorted accoring to their relevancy scores. for (score, document_id) in sieve.winners(): callback({"score": score, "document": self._corpus[document_id]})
class Evaluator: def __init__(self, path_wieght: str, path_data: str, similarity, path_feat: str): self.path_weight = path_wieght self.path_data = path_data self.similarity = similarity self.flickr_dataset = ImageFlickrFeatures( path_feat) #dbs/features_contrastive.db # self.ranking = ranking imagenet_net = ResNet34() sketches_net = ResNet34() # print("Adapting output layers...") siamese_net = SiameseNetwork(sketches_net, imagenet_net) siamese_net.load_state_dict( torch.load(self.path_weight) ) # r'C:\Users\aleja\Desktop\Tareas\Reconocimiento Virtual con Deep Learning\T2\best_SiameseNetwork_contrastive.pth' self.net = siamese_net self.ranking = Ranker(self.path_data, image_dataset_features=self.flickr_dataset, feature_extractor=self.net, similarity_fn=self.similarity) def calc_rank(self, path_img): rank = self.ranking.get_rank(path_img) return rank def calc_all_ranks(self, path_querys): self.imgs_names = listdir(path_querys) for i in range(len(self.imgs_names)): self.imgs_names[i] = path_querys + '/' + self.imgs_names[i] self.classes = [] self.ranks = [] for i in range(len(self.imgs_names)): c, rank = self.ranking.get_rank(self.imgs_names[i]) self.classes.append(c) self.ranks.append(rank) def calc_map(self): mean_ap = map(self.classes, self.ranks) return mean_ap def calc_recall_ratio(self, len_class_path: str): x, y = recall_ratio_tot(self.classes, self.ranks, len_class_path) plt.plot(x, y) plt.xlabel('Recall') plt.ylabel('Retrieved images') plt.title('Recall ratio Curve') return x, y def calc_recall_prec(self, len_class_path: str): rp = recall_prec_tot(self.classes, self.ranks, len_class_path) rec = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) plt.plot(rec, rp) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Recall-Precision Curve') return rp
def evaluate(self, query: str, options: dict, ranker: Ranker, callback: Callable[[dict], Any]) -> None: """ Evaluates the given query, doing N-out-of-M ranked retrieval. I.e., for a supplied query having M terms, a document is considered to be a match if it contains at least N <= M of those terms. The matching documents are ranked by the supplied ranker, and only the "best" matches are returned to the client via the supplied callback function. The client can supply a dictionary of options that controls this query evaluation process: The value of N is inferred from the query via the "match_threshold" (float) option, and the maximum number of documents to return to the client is controlled via the "hit_count" (int) option. The callback function supplied by the client will receive a dictionary having the keys "score" (float) and "document" (Document). if the query contains m unique query terms, each document in the result set should contain at least n of these m terms. """ terms = list(self._inverted_index.get_terms(query)) threshhold = options.get("match_threshold") debug = options.get("debug", False) counter_terms = Counter(terms) hit = options.get('hit_count') sieve = Sieve(hit) m = len(terms) n = max(1, min(m, int(threshhold * m))) class Aktiv(object): def __init__(self, invertedindex, term, multiplicity): self.term = term self.iterator = invertedindex.get_postings_iterator(term) self.posting = next(self.iterator, None) self.multiplicity = multiplicity self.hasBeenRanked = False @property def document_id(self): return self.posting.document_id def neste_posting(self): self.posting = next(self.iterator, None) aktive = [] # liste av posting liste-iteratorer for term in terms: aktiv = Aktiv(self._inverted_index, term, counter_terms[term]) if aktiv.posting is not None: aktive.append(aktiv) forrige_minste = None while len(aktive) > 0: (minste, index) = min((v.document_id, i) for i, v in enumerate(aktive)) current = aktive[index] if minste != forrige_minste: aktive_docids = [a for a in aktive if a.document_id == minste] ranker.reset(current.document_id) evaluated_terms = [] # må gå gjennom aktive_docids for å sjekke term og frequency for a in aktive_docids: if a.term not in evaluated_terms: ranker.update(a.term, a.multiplicity, a.posting) evaluated_terms.append(a.term) score = ranker.evaluate() if threshhold == 1: if not len(aktive_docids) < n and score >= n: sieve.sift(score, minste) else: if score >= n and len(aktive_docids) >= n: sieve.sift(score, minste) forrige_minste = minste current.neste_posting() post = current.posting if post is None: aktive.pop(index) for win in sieve.winners(): # append the winners doc = self._corpus.get_document(win[1]) callback({'score': win[0], 'document': doc})