def _raw_kw(self, tokens, lang, max_nb_returned=-1, min_ratio=0): # TEST """Look into the texts and returned a list of indices of the texts matching best with tokens entered. Best first. tokens = a list of keywords lang = a string definining which language should be use. max_nb_returned = an int which specifies the max number of results returned. -1 = all min_ratio = an int or a float which specifies the minimum ratio of results returned. 0 = all """ results = [] token_matcher = matcher.Matcher(tokens, lang) for i, month in enumerate(self._get_data(lang)['data']): for j, day in enumerate(month): day = [line.lower() for line in day] day_ratio = 0 for k, line in enumerate(day): line_ratio = token_matcher.fuzzer(line) if line_ratio > day_ratio: day_ratio = line_ratio matching_line = k results.append( TextRatio(i + 1, j + 1, matching_line, day_ratio)) results.sort(key=lambda item: item.ratio, reverse=True) if token_matcher.is_score_low(): if token_matcher.splitter(): results = self._raw_kw(token_matcher.tokens, lang, max_nb_returned, min_ratio) results = [result for result in results if result.ratio >= min_ratio] return results if max_nb_returned == -1 else results[:max_nb_returned]
def test_is_score_low(): m = matcher.Matcher(tokens, 'en') assert m.is_score_low() == True m._best_score = {key: 1 for key in m._best_score} m._best_score['maglor'] = 0 assert m.is_score_low() == True m._best_score['maglor'] = 0.84 assert m.is_score_low() == True assert m.is_score_low(floor=0.83) == False
def info_getter(self, video_code, file_path, file_name, url_choose=0): raw_info = self.__proxy_using(video_code, url_choose) # print('raw_info is ' + str(raw_info)) if raw_info: # if raw_info == 404: # self.info_getter(video_code, file_path, file_name, file_format, url_choose=1) result = matcher.Matcher().info_matcher(video_code, file_path, file_name, raw_info) # print(result) return result
def hello(): #Handle POST request if request.method == 'POST': start = datetime.now() #Check if the database has been proccesed if not list_images: calc_calculate_sift() img = request.files['pic'] #TODO, SAVE IMAGE TO FOLDER. name = secure_filename(img.filename) img.save(os.path.join(ROOT, "tmp/" + name)) query = ImageItem("tmp/" + name, name) matcher = match.Matcher() r = matcher.search(query, list_images) if not r: end = datetime.now() delta = end - start print delta return abort(404) name = str(r[0][1]) img_item = Item.query.filter_by(url=name).first() if img_item is None: return abort(404) t = { "title": img_item.title, "origin": img_item.origin, "category": img_item.category.name, "description": img_item.description, "ingredients": img_item.ingredients } end = datetime.now() delta = end - start print delta return jsonify(t) #Handle GET request elif request.method == 'GET': return '''
def __init__(self): self.match = matcher.Matcher()
def inversons(mots_bruts, Annee, debut, fin, plus=False, langue='fr', exit=True): """Function which returns a list of feasts matching with mots_bruts. It takes six args: - mots_bruts : a string for the research ; - Annee : a LiturgicalCalendar object ; - debut : a datetime.date for the older date ; - fin : a datetime.date for the latest date ; - samedi : the Saturday of the Virgin Fete ; # DEPRECATED no more useful - plus : a bool to define whether the results will be larger or not ; - langue : language used ; - exit : a bool to define whether the system have to exit or not in case of error ; """ if langue != 'fr': print(exit) return [erreur('01', langue, exit=exit)] if isinstance(mots_bruts, list): mots_bruts = [sans_accent(mot) for mot in mots_bruts] else: mots_bruts = sans_accent(mots_bruts).split() mots = [] for mot in mots_bruts: if ' ' in mot: mots = mots + mot.split() else: mots = mots + [mot] mots = modification(mots, langue) mots_str = '' for a in mots: mots_str += a # creating Matcher object matching_machine = matcher.Matcher(mots, 'fr') boucle = True date = debut if date == fin: date = datetime.date(date.year, 1, 1) fin = datetime.date(date.year, 12, 31) retenus = [] while date <= fin: try: for fete in Annee[date]: if not fete.__dict__.get('tokens_', False): fete.valeur = fete.Correspondance(mots_str, mots, plus) else: fete.valeur = matching_machine.fuzzer( fete.tokens_, False ) # WARNING using tokens_ because fete.tokens are not ready ; please replace it when ready WARNING fete.valeur = fete.valeur * 60 # hack to delete when all feasts will use tokens_ instead of regex_ if fete.valeur >= 50: retenus.append(fete) except KeyError: pass date += datetime.timedelta(1) retenus.sort(key=lambda x: x.valeur, reverse=True) superieurs = [x for x in retenus if x.valeur >= 70 and x.valeur < 100] elite = [x for x in retenus if x.valeur >= 100] if plus: liste = retenus elif len(elite) >= 1: liste = elite elif len(superieurs) >= 1: liste = superieurs elif len(superieurs) == 0 and len(retenus) >= 1: liste = retenus else: liste = [erreur(20, langue, exit=exit)] return liste
def test_reset_scores(): m = matcher.Matcher(tokens, 'en') m._best_score = {key: 0.8 for key in m._best_score} m.reset_scores() assert max(m._best_score.values()) == 0
def __init__(self, max_results=10, min_score=0.0): matcher.Matcher.__init__(self, max_results, min_score) self._exact_matcher = matcher.Matcher(max_results, min_score) self._re_matcher = RegexApproxMatcher(max_results, min_score) self._ed_matcher = EditDistanceMatcher(max_results, min_score)