def parseAndCreateObjects(inputFileName):
    parentUserIdToUserDict = dict()
    parentBusinessIdToBusinessDict = dict()
    parent_reviews = dict()
    isBusinessAlreadyPresent = False
    with open(inputFileName) as f:
        for line in f:
            if re.match('^B=', line):
                exec(line)
                #print 'B = ', B
                isBusinessAlreadyPresent = False
                if B[0] in parentBusinessIdToBusinessDict:
                    #business_already_present i am skipping
                    isBusinessAlreadyPresent=True
                bnss = business(B[0],B[1],B[2],B[4])
                parentBusinessIdToBusinessDict[bnss.getId()] = bnss
            elif re.match('^R=', line):
                exec(line)
                if isBusinessAlreadyPresent:
                    #business_already_present i am skipping
                    continue
                #print 'R = ', R
                for recoRev in R:
                    (username, imgSrc, userLocation, userFriendCount, userReviewCount) = recoRev[1]
                    usrId = (username, imgSrc, userLocation)
                    #usrId = (username, imgSrc, userLocation, userFriendCount, userReviewCount)
                    usr = user(usrId, recoRev[2])
                    dictUsr = parentUserIdToUserDict.get(usr.getId())
                    if not dictUsr:
                        parentUserIdToUserDict[usr.getId()] = usr
                        dictUsr = usr 
                    revw = review(recoRev[0], dictUsr.getId(), bnss.getId(), recoRev[3],recoRev[4], '', True)
                    revwKey = (revw.getUserId(),revw.getBusinessID())
                    if revwKey in parent_reviews:
                        continue 
                    parent_reviews[revwKey] = revw
            elif re.match('^NR=', line):
                exec(line)
                if isBusinessAlreadyPresent:
                    #business_already_present i am skipping
                    continue
                #print 'NR = ', NR
                for noRecoRev in NR:
                    (username, imgSrc, userLocation, userFriendCount, userReviewCount) = noRecoRev[1]
                    usrId = (username, imgSrc, userLocation)
                    #usrId = (username, imgSrc, userLocation, userFriendCount, userReviewCount)
                    usr = user(usrId, noRecoRev[2])
                    dictUsr = parentUserIdToUserDict.get(usr.getId())
                    if not dictUsr:
                        parentUserIdToUserDict[usr.getId()] = usr
                        dictUsr = usr
                    revw = review(noRecoRev[0], dictUsr.getId(), bnss.getId(), noRecoRev[3], noRecoRev[4], '', False)
                    revwKey = (revw.getUserId(),revw.getBusinessID())
                    if revwKey in parent_reviews:
                        continue
                    parent_reviews[revwKey] = revw
    return (parentUserIdToUserDict,parentBusinessIdToBusinessDict,parent_reviews)
Exemplo n.º 2
0
    def readDataForBnss(self, inputDirName, fileName):
        content = 'data='
        with open(join(inputDirName, fileName), mode='r') as f:
            data = dict()
            content = content+f.readline()
            exec(content)
            bnssName = data[BNSS_NAME]
            bnssAddress = data[ADDRESS]
            bnssUrl = data[URL]
            bnssId = (bnssUrl, bnssAddress)
            
            if bnssId not in self.bnssIdToBnssDict:
                bnss = business(bnssId, bnssName, url=bnssUrl)     
                self.bnssIdToBnssDict[bnss.getId()] = bnss
            
                nrReviews = data[NOT_RECOMMENDED]
                rReviews = data[RECOMMENDED]
            #print bnssName, len(rReviews), len(nrReviews)
                
                for r in rReviews:
                    review_rating = r[RATING]
                    review_text = r[REVIEW_TEXT]
                    review_date = r[REVIEW_DATE].split('Updated review')[0]
                    
                    usr_location = r[USR_LOCATION]
                    usr_name = r[NAME]
                    usr_review_count = r[REVIEW_COUNT]
                    usr_friend_count = r[FRIEND_COUNT]
                    if not usr_name or usr_name=='':
                        print "Continue"
                        continue
                    usrId = r['usrId']
                
                    if usrId in self.usrIdToUsrDict:
                        usr = self.usrIdToUsrDict[usrId]
                    else:
                        usrExtra = (usr_location, usr_review_count, usr_friend_count)
                        usr = user(usrId, usr_name, usrExtra)
                        self.usrIdToUsrDict[usr.getId()] = usr
                    
                    review_id = (usr.getId(), bnss.getId())
                    
                    revw = review(review_id, usr.getId(), bnss.getId(), float(review_rating), review_date, review_text, True)
                        
                    self.reviewIdToReviewDict[revw.getId()] = revw

                    
                for nr in nrReviews:
                    review_rating = nr[RATING]
                    review_text = nr[REVIEW_TEXT]
                    review_date = nr[REVIEW_DATE].split('Updated review')[0]
                    
                    usr_location = nr[USR_LOCATION]
                    usr_name = nr[NAME]
                    usr_review_count = nr[REVIEW_COUNT]
                    usr_friend_count = nr[FRIEND_COUNT]
                    if not usr_name or usr_name=='':
                        print "Continue"
                        continue
                    usrId = nr['usrId']
                    if usrId in self.usrIdToUsrDict:
                        usr = self.usrIdToUsrDict[usrId]
                    else:
                        usrExtra = (usr_location, usr_review_count, usr_friend_count)
                        usr = user(usrId, usr_name, usrExtra)
                        self.usrIdToUsrDict[usr.getId()] = usr
                    
                    review_id = (usr.getId(), bnss.getId())
                    
                    revw = review(review_id, usr.getId(), bnss.getId(), float(review_rating), review_date, review_text, False)
            
                    self.reviewIdToReviewDict[revw.getId()] = revw