def start(self): print("######################################################") print("############# PHASE 3 INITIALIZING QUERY #############") print("######################################################" + '\n') print("######################################################") print("############# REVIEW LOOKUP SYSTEM #############") #print("############# " + "Type 'q!' to quit" + " #############") print("######################################################" + '\n') self.reviewsDB = IndexDB('rw.idx') self.ptermsDB = IndexDB('pt.idx') self.rtermsDB = IndexDB('rt.idx') self.scoresDB = IndexDB('sc.idx') print("Type 'q!' to exit")
class Phase3: reviewsDB = None ptermsDB = None rtermsDB = None scoresDB = None rgx = None firstIntersectFlag = False def __init__(self): self.rgx = rgxHandler() def start(self): print("######################################################") print("############# PHASE 3 INITIALIZING QUERY #############") print("######################################################" + '\n') print("######################################################") print("############# REVIEW LOOKUP SYSTEM #############") #print("############# " + "Type 'q!' to quit" + " #############") print("######################################################" + '\n') self.reviewsDB = IndexDB('rw.idx') self.ptermsDB = IndexDB('pt.idx') self.rtermsDB = IndexDB('rt.idx') self.scoresDB = IndexDB('sc.idx') print("Type 'q!' to exit") def main(self): while(1): query = input("Please provide a Query: ") print("") if query == "q!": self.reviewsDB.close() self.ptermsDB.close() self.rtermsDB.close() self.scoresDB.close() exit() parsedQuery = self.queryParser(query) # print(parsedQuery) listOfReviews = self.getReviews(parsedQuery) # print(listOfReviews) self.displayReviews(listOfReviews) def displayReviews(self, listOfReviews): i = 0 for reviewKey in listOfReviews: i += 1 reviewValue = self.reviewsDB.get(reviewKey)[0] #print(reviewValue) print("######################################################") print("################# REVIEW " + str(i) + " #################") print("######################################################" + '\n') reviewValue = self.rgx.putLineTitlesBack(reviewValue) for line in reviewValue: if( "review/time" in line): time = datetime.fromtimestamp(float(line.split(":")[1].strip("\n").strip())) print("review/time: " + time.strftime("%b %d %Y")+ "\n") else: print(line, end='') print('\n') def getReviews(self, parsedQuery): """ Using the parsedQuery data, intersects the conditional filters amongs the reviews. Until a filtered list of results is generated. >>> p3 = Phase3() >>> p3.start() ###################################################### ############# PHASE 3 INITIALIZING QUERY ############# ###################################################### <BLANKLINE> ###################################################### ############# REVIEW LOOKUP SYSTEM ############# ###################################################### <BLANKLINE> Type 'q!' to exit >>> parsedQuery = ([], [], [], []) >>> p3.getReviews(parsedQuery) [] >>> parsedQuery = ([], [], [('r', 'ago')], []) >>> p3.getReviews(parsedQuery) ['9'] >>> parsedQuery = (['ago'], [], [], []) >>> p3.getReviews(parsedQuery) ['9'] >>> parsedQuery = (['again'], [], [], []) >>> p3.getReviews(parsedQuery) ['8', '10'] >>> parsedQuery = (['again', 'used'], [], [], []) >>> p3.getReviews(parsedQuery) ['10'] >>> parsedQuery = ([], ['ag'], [], []) >>> p3.getReviews(parsedQuery) ['8', '9', '10'] >>> parsedQuery = (['again'], ['ag'], [], []) >>> p3.getReviews(parsedQuery) ['8', '10'] >>> parsedQuery = ([], [], [], [('rdate', '<', '2000/01/01')]) >>> p3.getReviews(parsedQuery) ['4', '5'] >>> parsedQuery = ([], [], [], [('rdate', '<', '2000/01/01'), ('pprice', '<', '17')]) >>> p3.getReviews(parsedQuery) ['5'] >>> parsedQuery = (['cross'], [], [], []) >>> p3.getReviews(parsedQuery) ['5', '7', '8', '9', '10'] >>> parsedQuery = ([], [], [('r', 'cross')], []) >>> p3.getReviews(parsedQuery) ['5', '7', '8', '10'] >>> parsedQuery = ([], [], [('p', 'cross')], []) >>> p3.getReviews(parsedQuery) ['7', '8', '9', '10'] >>> parsedQuery = ([], ['not'], [], []) >>> p3.getReviews(parsedQuery) ['1', '2', '8', '9'] >>> parsedQuery = ([], ['not'], [('r', 'cross')], []) >>> p3.getReviews(parsedQuery) ['8'] >>> parsedQuery = ([], [], [], [('rscore', '<', '5')]) >>> p3.getReviews(parsedQuery) ['1', '3', '4'] >>> parsedQuery = ([], [], [], [('rscore', '>', '4')]) >>> p3.getReviews(parsedQuery) ['2', '5', '6', '7', '8', '9', '10'] >>> parsedQuery = (['find'], [], [], [('rscore', '<', '5')]) >>> p3.getReviews(parsedQuery) ['1', '4'] >>> parsedQuery = ([], [], [], [('pprice', '<', '16')]) >>> p3.getReviews(parsedQuery) ['5', '6'] >>> parsedQuery = (['old'], [], [], [('pprice', '<', '16')]) >>> p3.getReviews(parsedQuery) ['6'] >>> parsedQuery = ([], [], [], [('rdate', '<', '2000/01/01')]) >>> p3.getReviews(parsedQuery) ['4', '5'] >>> parsedQuery = (['find'], [], [], [('rdate', '<', '2000/01/01')]) >>> p3.getReviews(parsedQuery) ['4'] >>> parsedQuery = ([], [], [], [('rdate', '>', '2000/01/01')]) >>> p3.getReviews(parsedQuery) ['1', '2', '3', '6', '7', '8', '9', '10'] >>> parsedQuery = ([], [], [], [('rdate', '>', '2009/01/01'), ('pprice', '>', '16'), ('pprice', '<', '18')]) >>> p3.getReviews(parsedQuery) ['2'] >>> parsedQuery = (['shazam'], [], [], [('rdate', '>', '2009/01/01'), ('pprice', '>', '16'), ('pprice', '<', '18')]) >>> p3.getReviews(parsedQuery) [] """ self.firstIntersectFlag = False reviewList = [] tmpList = [] #Select by selections, selector = (selector, searchTerm) for entry in parsedQuery[2]: selector = entry[0] term = entry[1] if(selector == "r"): subList = self.rtermsDB.get(term) for i in subList: tmpList.append(i) elif(selector == "p"): subList = self.ptermsDB.get(term) for i in subList: tmpList.append(i) reviewList = self.ourIntersect(reviewList, tmpList) tmpList = [] #Select by words, word = (searchTerm) for entry in parsedQuery[0]: subList = self.rtermsDB.get(entry) for i in subList: tmpList.append(i) subList = self.ptermsDB.get(entry) for i in subList: tmpList.append(i) reviewList = self.ourIntersect(reviewList, tmpList) tmpList = [] #Select by wilds, wild = (searchTerm) for entry in parsedQuery[1]: subList = self.rtermsDB.getWild(entry) for i in subList: tmpList.append(i) subList = self.ptermsDB.getWild(entry) for i in subList: tmpList.append(i) reviewList = self.ourIntersect(reviewList, tmpList) tmpList = [] #Select by comparator, comparator = (comparator, operator, value) #pprice < 20 #rdate > 2007/06/20 #rscore < 3 #product/price: unknown #review/score: 5.0 #review/time: 1075939200 for entry in parsedQuery[3]: comparator = entry[0] oper = entry[1] value = entry[2] ops = {"<": operator.lt, ">": operator.gt} if(comparator == "rdate"): comparator = "rtime" year,month,day = value.split("/") try: value = datetime(int(year), int(month), int(day)) except: print("Invalid Date Provided. No Results Found.") return [] else: value = value + ".0" keys = self.reviewsDB.getAllReviewKeys() for key in keys: item = self.rgx.putLineTitlesBack( self.reviewsDB.get(key)[0] ) itemPrice = item[2].split(":")[1].strip("\n").strip() itemScore = item[6].split(":")[1].strip("\n").strip() itemDate = datetime.fromtimestamp( float(item[7].split(":")[1].strip("\n").strip() )) # print(itemPrice) # print(itemScore) # print(itemDate) # print("") comp_to_val = {"pprice": itemPrice, "rscore": itemScore, "rtime": itemDate } if ops[oper](comp_to_val[comparator], value) : tmpList.append(key) reviewList = self.ourIntersect(reviewList, tmpList) tmpList = [] # print(reviewList) return sorted(reviewList, key=float) def ourIntersect(self, b1, b2): if(not self.firstIntersectFlag): self.firstIntersectFlag = True return list(set(b2)) else: return list(set(b1).intersection(b2)) def queryParser(self, query): """ Parser returns tuples containing 4 lists containng tuples. ([words], [wilds], [selectors], [comparators]) word = (searchTerm) wild = (searchTerm) selector = (selector, searchTerm) comparator = (comparator, operator, value) >>> p3 = Phase3() >>> p3.queryParser("") ([], [], [], []) >>> result = p3.queryParser("P:caMeRa") >>> result[2] [('p', 'camera')] >>> result = p3.queryParser("r:grEaT") >>> result[2] [('r', 'great')] >>> result =p3.queryParser("cAmeRa") >>> result[0] ['camera'] >>> result = p3.queryParser("cam%") >>> result[1] ['cam'] >>> result = p3.queryParser("r:great cam%") >>> result[1] ['cam'] >>> result[2] [('r', 'great')] >>> result = p3.queryParser("rscore > 4") >>> result[3] == [('rscore', '>', '4')] True >>> result = p3.queryParser("camera rscore < 3") >>> result[0] ['camera'] >>> result[3] == [('rscore', '<', '3')] True >>> result = p3.queryParser("pprice < 60 camera") >>> result[0] ['camera'] >>> result[3] == [('pprice', '<', '60')] True >>> result = p3.queryParser("camera rdate > 2007/06/20") >>> result[0] ['camera'] >>> result[3] == [('rdate', '>', '2007/06/20')] True >>> result = p3.queryParser("camera rdate > 2007/06/20 pprice > 20 pprice < 60") >>> result[0] ['camera'] >>> result[3] == [('rdate', '>', '2007/06/20'), ('pprice', '>', '20'), ('pprice', '<', '60')] True """ query = query.strip().lower().rstrip('\r\n') searchTerms = [] #(searchTerm) wildCardTerms = [] #(searchTerm) selectors = [] #(selector, searchTerm) comparators = [] #(comparator, operator, value) selector = re.compile(r"(r:|p:)[a-z]*") wild = re.compile(r"[a-z]*%") comparator = re.compile(r"\w*\s(<|>)\s[\w/]*") word = re.compile(r"[a-z]+") while(query != ""): # time.sleep(3) # print("looping query: " + query) query.strip().rstrip('\r\n') if(comparator.search(query)): found = comparator.search(query).group(0) query = query.replace(found, "") if(">" in found): comparators.append( (found.split(">")[0].strip(),">",found.split(">")[1].strip()) ) else: comparators.append( (found.split("<")[0].strip(),"<",found.split("<")[1].strip()) ) continue elif (selector.match(query)): # print("Selector found") found = selector.search(query).group(0) query = query.replace(found, "") selectors.append((found.split(":")[0],found.split(":")[1])) continue elif (wild.search(query)): # print("wild found") found = wild.search(query).group(0) query = query.replace(found, "") wildCardTerms.append(found.strip("%")) continue elif (word.search(query)): # print("Word found") found = word.search(query).group(0) query = query.replace(found, "") searchTerms.append(found) continue else: break return (searchTerms,wildCardTerms,selectors,comparators)