def text_resolve(reference, returned_format): """ :param reference: :param returned_format: :return: """ try: resolved = cache_resolved_get(reference) if resolved: return format_resolved_reference(returned_format, resolved=resolved, reference=reference) if bool(RE_NUMERIC_VALUE.search(reference)): parsed_ref = text_parser(reference) if parsed_ref: return format_resolved_reference( returned_format, resolved=str(solve_reference(Hypotheses(parsed_ref))), reference=reference) raise NoSolution("NotParsed") else: raise ValueError( 'Reference with no year and volume cannot be resolved.') except (NoSolution, Incomplete, ValueError) as e: current_app.logger.error('Exception: {error}'.format(error=str(e))) return format_resolved_reference(returned_format, resolved='0.0 %s' % (19 * '.'), reference=reference) except Exception as e: current_app.logger.error('Exception: {error}'.format(error=str(e))) raise
def solve_reference(ref): """ returns a solution for what record is presumably meant by ref. ref is an instance of Reference (or rather, its subclasses). If no matching record is found, NoSolution is raised. :param ref: :return: """ if not enough_to_proceed(ref): current_app.logger.error( "Not enough information to resolve the record") raise Incomplete("Not enough information to resolve the record.", str(ref)) possible_solutions = [] for hypothesis in Hypotheses.iter_hypotheses(ref): try: return solve_for_fields(hypothesis) except Undecidable as ex: possible_solutions.extend(ex.considered_solutions) except (NoSolution, OverflowOrNone) as ex: current_app.logger.debug("(%s)" % ex.__class__.__name__) except (Solr, KeyboardInterrupt): raise except Exception as ex: current_app.logger.error( "Unhandled exception of type {0} occurred with arguments:{1!r}, thus killing a single hypothesis." .format(type(ex).__name__, ex.args)) current_app.logger.error(traceback.format_exc()) # if we have collected possible solutions for which we didn't want # to decide the first time around, now see if any one is better than # all others and accept that if possible_solutions: current_app.logger.debug("Considering stashed ties: %s" % (possible_solutions)) cands = {} for score, sol in possible_solutions: cands.setdefault(sol, []).append((score, sol)) for bibcode in cands: cands[bibcode] = max(cands[bibcode]) scored = sorted(zip(cands.values(), cands.keys())) if len(scored) == 1: return Solution(scored[0][1], scored[0][0], "only remaining of tied solutions") elif scored[-1][0] > scored[-2][0]: return Solution(scored[0][1], scored[0][0], "best tied solution") else: current_app.logger.debug("Remaining ties, giving up") raise NoSolution("Hypotheses exhausted", str(ref))
def choose_solution(candidates, query_string, hypothesis): """ returns the preferred solution from among candidates. The function will raise a NoSolution or Undecidable exception if no choice can be made. candidates must be a sequence of pairs of (evidences, solr_result). :param candidates: :param query_string: :param hypothesis: :return: """ min_score = current_app.config['MIN_SCORE_FIRST_ROUND'] filtered = [(score, solution) for score, solution in candidates if score >= min_score * len(score)] if len(filtered) == 0: if candidates: current_app.logger.debug( "No score above minimal score, inspecting doubtful solutions.") return inspect_doubtful_solutions(candidates, query_string, hypothesis) raise NoSolution("Not even a doubtful solution") elif len(filtered) == 1: current_app.logger.debug("Accepting single unique solution") evidence, solution = filtered[0] return evidence, solution elif len(filtered) > 1: current_app.logger.debug( "Trying to disentangle multiple equal-scored solutions") # get all equal-scored matches with the highest scores best_score = max(item[0].get_score() for item in filtered) best_solution = [(ev, solution) for ev, solution in filtered if ev.get_score() == best_score] if len(best_solution) == 1: evidence, solution = best_solution[0] return evidence, solution else: current_app.logger.debug("...impossible") raise Undecidable("%s solutions with equal (good) score." % len(best_solution))
def inspect_doubtful_solutions(scored_solutions, query_string, hypothesis): """ raises an Undecidable exception carrying halfway credible candidates. The goal is to add these to solve_reference's internal stash of candidates so we can look at them again when we're desperate. :param scored_solutions: :param query_string: :param hypothesis: :return: """ non_veto_solutions = [(evidences, solution) for evidences, solution in scored_solutions if not evidences.has_veto()] if len(non_veto_solutions) == 1: sol = non_veto_solutions raise Undecidable("Try again if desperate", considered_solutions=[(sol[0][0].get_score(), sol[0][1]["bibcode"])]) # Some of the following rules only make sense for fielded # hypotheses. Always be aware that input_fields might be None input_fields = hypothesis.get_detail("input_fields") if input_fields is not None: # Some publications are cited without a page number, but have # a page number of 1 in ADS (e.g., IBVS). So, without an input # page, we still accept a response page of 1 # we should base this on the result bibstem, I guess. for evidences, solution in scored_solutions: if evidences.single_veto_from( "page") and not input_fields.get("page"): raise Undecidable("Try again if desperate", considered_solutions=[(evidences.get_score(), solution["bibcode"])]) raise NoSolution(reason="No unique non-vetoed doubtful solution", ref=query_string)
except (NoSolution, Overflow), ex: current_app.logger.debug("(%s)" % ex.__class__.__name__) except KeyboardInterrupt: raise except: current_app.logger.error( "Unhandled exception killing a single hypothesis.") # if we have collected possible solutions for which we didn't want # to decide the first time around, now see if any one is better than # all others and accept that if possible_solutions: current_app.logger.debug("Considering stashed ties: %s" % (possible_solutions)) cands = {} for score, sol in possible_solutions: cands.setdefault(sol, []).append((score, sol)) for bibcode in cands: cands[bibcode] = max(cands[bibcode]) scored = sorted(zip(cands.values(), cands.keys())) if len(scored) == 1: return Solution(scored[0][1], scored[0][0], "only remaining of tied solutions") elif scored[-1][0] > scored[-2][0]: return Solution(scored[0][1], scored[0][0], "best tied solution") else: current_app.logger.debug("Remaining ties, giving up") raise NoSolution("Hypotheses exhausted", ref)