def __init__(self, name, expression, mathml=None, initRetrievalTime='undefined'): self.name = name self.mathml = mathml self.results = {} self.documents = {} if mathml is not None: # parse from mathml (additional information extracted) self.tree = MathExtractor.convert_and_link_mathml(mathml) self.expression = self.tree.tostring() else: # parse from SLT string (no mathml information available) self.tree = SymbolTree.parse_from_slt(expression) self.expression = expression self.constraints = Query.create_default_constraints(self.tree) self.sorted_results = None self.sorted_result_index = None self.sorted_abs_ranks = None self.sorted_documents = None self.sorted_document_index = None self.elapsed_time = 0.0 # RZ: add tuple-based retrieval time and other measures. self.initRetrievalTime = initRetrievalTime self.postings = None self.matchedFormulae = None self.matchedDocs = None # cache ... self.html_queryblock = {}
def __init__(self, query, expression, original_ranking, original_score, mathml=None): self.query = query self.original_ranking = original_ranking self.original_score = original_score self.mathml = mathml self.new_scores = [0.0] if mathml is not None: # parse from mathml (additional information extracted) self.tree = MathExtractor.convert_and_link_mathml(mathml) self.expression = self.tree.tostring() out_file = open("probando.txt", 'w', encoding='utf-8') out_file.write(self.tree.tostring()) out_file.close() else: # parse from SLT string (no mathml information available) self.tree = SymbolTree.parse_from_slt(expression) self.expression = expression if self.tree.tostring() != expression: print("Bad conversion for result for query " + query.name + ": " + expression + " -> " + self.tree.tostring()) exit(1) self.locations = [] self.matched_elements = [] self.unified_elements = {} self.wildcard_matches = {} self.all_unified = [] self.times_rendered = 0
def create_default_constraints(query_tree, default_value="U"): # duplicate structure ... root = Query.duplicate_structure(query_tree.root, default_value) # now create constraint nodes .... Query.convert_to_constraint_tree(root) # create and return symbol tree return SymbolTree(root)
def eval_similarity(query_data): # do actually evaluate similarity .... query, start_idx, expressions = query_data csv_reader = csv.reader(expressions, delimiter='\t', lineterminator='\n', quoting=csv.QUOTE_NONE, escapechar="\\") end_idx = start_idx + len(expressions) - 1 #create query slt query_name, query_expression = query query_tree = SymbolTree.parse_from_slt(query_expression) query_constraints = Query.create_default_constraints(query_tree) results = [] for idx, parts in enumerate(csv_reader): #for idx, expression_info in enumerate(expressions): #parts = expression_info.strip().split("\t") expression = parts[0] doc_id = parts[1] location = parts[2] candidate_tree = SymbolTree.parse_from_slt(expression) try: data = SIM_FUNCTION(query_tree, candidate_tree, query_constraints) scores = data[0] except: print("Error processing: ") print(query_expression, flush=True) print(expression, flush=True) print("Doc: " + doc_id, flush=True) print("Loc: " + location, flush=True) continue # the index is only returned because some expressions might be absent in case of errors results.append((scores, start_idx + idx)) print("Processed: " + str(start_idx) + " to " + str(end_idx) + " finished", flush=True) return results
def set_constraints(self, slt_string): # create the tree with the original text labels tree_constraints = SymbolTree.parse_from_slt(slt_string) # convert the text labels to constraints Query.convert_to_constraint_tree(tree_constraints.root) if not Query.equal_subtree_structure(self.tree.root, tree_constraints.root): print("Warning: Invalid constraint tree specified for " + self.name) else: self.constraints = tree_constraints
def eval_similarity(query_data): # do actually evaluate similarity .... query, start_idx, expressions = query_data end_idx = start_idx + len(expressions) - 1 #create query slt query_name, query_expression = query query_tree = SymbolTree.parse_from_slt(query_expression) query_constraints = Query.create_default_constraints(query_tree) results = [] for idx, expression_info in enumerate(expressions): parts = expression_info.strip().split("\t") expression = parts[0] doc_id = parts[1] location = parts[2] candidate_tree = SymbolTree.parse_from_slt(expression) try: scores, matched_q, matched_c, unified_c = similarity_v04(query_tree, candidate_tree, query_constraints) except: print("Error processing: ") print(query_expression, flush=True) print(expression, flush=True) print("Doc: " + doc_id, flush=True) print("Loc: " + location, flush=True) continue # the index is only returned because some expressions might be absent in case of errors results.append((scores, start_idx + idx)) print("Processed: " + str(start_idx) + " to " + str(end_idx) + " finished", flush=True) return results
def __init__(self, name, expression, mathml=None, initRetrievalTime='undefined', max_results=0): self.name = name self.mathml = mathml self.results = {} self.documents = {} if mathml is not None: # parse from mathml (additional information extracted) self.tree = MathExtractor.convert_and_link_mathml(mathml) self.expression = self.tree.tostring() else: # parse from SLT string (no mathml information available) self.tree = SymbolTree.parse_from_slt(expression) self.expression = expression self.constraints = Query.create_default_constraints(self.tree) self.sorted_results = None self.sorted_result_index = None self.sorted_abs_ranks = None self.sorted_documents = None self.sorted_document_index = None self.elapsed_time = 0.0 # RZ: add tuple-based retrieval time and other measures. self.initRetrievalTime = initRetrievalTime self.postings = None self.matchedFormulae = None self.matchedDocs = None # Re-rank at most K results self.max_results = max_results # cache ... self.html_queryblock = {}
def __init__(self, query, expression, original_ranking, original_score, mathml=None): self.query = query self.original_ranking = original_ranking self.original_score = original_score self.mathml = mathml self.new_scores = [0.0] if mathml is not None: # parse from mathml (additional information extracted) self.tree = MathExtractor.convert_and_link_mathml(mathml) self.expression = self.tree.tostring() else: # parse from SLT string (no mathml information available) self.tree = SymbolTree.parse_from_slt(expression) self.expression = expression # print(self.tree.tostring() == expression) self.locations = [] self.matched_elements = [] self.unified_elements = [] self.times_rendered = 0