import tabulate as tab import numpy as np import settings as ENV sys.path.insert(0, 'src') import utils as util from indexing import docProcessor as dp from indexing import indexing as idx from indexing import tripleBuilder as tb from object_definitions import document as d termList = [] dfList = [] tripleList = [] documentList = [] stopTerms = util.extractStopTerms() if ENV.BUILD_ALL_INDEXES == True: indexTypes = ["INVERTED", "POSITIONAL", "STEM", "PHRASE"] else: indexTypes = [ENV.INDEX_TYPE] runStats = {} timeStats = {} # Empty our index folder indexFiles = os.listdir(ENV.INDEX_LOCATION) for f in indexFiles: os.remove(ENV.INDEX_LOCATION + f) # For every index type we wish to create...
import utils as util from query import queryProcessor as qp from query import index as i from query import vectorSpace as vsm from query import bm25 from query import languageModel as lang from query import queryReducer as q_red from query import queryExpander as q_exp from indexing import indexing as idx from object_definitions import document as d from object_definitions.query import Query start_time = datetime.datetime.now() ENV.STOP_TERMS = util.extractStopTerms() # we always want to extract phrases for the lexicon ENV.EXTRACT_PHRASES = True ''' LOAD NECESSARY INDEXES ''' if ENV.QUERY_PROCESSING_METHOD == "STANDARD": lexicon_path = ENV.INDEX_LOCATION + ENV.QUERY_PROCESSING_INDEX.lower() + "Lexicon.txt" doc_list_path = ENV.INDEX_LOCATION + ENV.QUERY_PROCESSING_INDEX.lower() + ENV.DOC_FILE_NAME + ".txt" posting_list_path = ENV.INDEX_LOCATION + ENV.QUERY_PROCESSING_INDEX.lower() + ENV.POSTING_LIST_NAME + ".txt" ENV.primary_index = i.Index(lexicon_path, posting_list_path, doc_list_path) # if the index we use is query dependent elif ENV.QUERY_PROCESSING_METHOD == "CONDITIONAL": ENV.QUERY_PROCESSING_INDEX = "PHRASE" lexicon_path = ENV.INDEX_LOCATION + ENV.QUERY_PROCESSING_INDEX.lower() + "Lexicon.txt" doc_list_path = ENV.INDEX_LOCATION + ENV.QUERY_PROCESSING_INDEX.lower() + ENV.DOC_FILE_NAME + ".txt"