def search(self, query_norms, verbose=False): """ Boolean Search by query """ oper = "" query_index = set() with open(self.bin_name, "rb") as f_backward: for norm in query_norms: if norm in ["AND", "OR", "NOT"]: oper = norm else: try: # if 1: # TODO: !!! offset, size = self.word_index[norm]["ids"] # offset, size = self.w_offsets[norm] except: if verbose: utils.print_utf("--- " + norm) continue f_backward.seek(offset) coded = f_backward.read(size) decoded = self.archiver.decode(coded) for i in xrange(1, len(decoded)): decoded[i] += decoded[i - 1] # print decoded decoded = set(decoded) if not query_index: query_index = decoded elif oper == "AND": query_index &= decoded elif oper == "OR": query_index |= decoded elif oper == "NOT": query_index -= decoded else: break return list(query_index)
def extract(self, query_norms, up=["ids", "lens", "posits", "hashes"], verbose=False): """ Extract all data by query words """ query_index = {} with open(self.bin_name, "rb") as f_backward: for norm in query_norms: if norm in self.cache: query_index[norm] = self.cache[norm]["index"] self.cache[norm]["time"] = time.time() continue if norm not in self.word_index: if verbose: utils.print_utf("--- " + norm) continue # start_time = time.time() query_index[norm] = self.read_and_decode_word_index(f_backward, norm, self.word_index[norm], up=up) # if verbose: print "arc. %.3f sec." % (time.time() - start_time), self.cache_insert(norm, query_index[norm]) # if verbose: print 'cache_len %d' % len(self.cache) return query_index