def __extractReview(self, stringList, prod_id, cursor): # extrai os dados de uma review helpful = -1 costumer = -1 date = -1 rating = -1 votes = -1 aux = -1 reviewList = [] if len(stringList) > 1: stringList = stringList[1:] for string in stringList: date = string.find("cutomer:") if (date != -1): date = string[4:date - 1] costumer = string.find("cutomer:") aux = string.find("rating:") if (costumer != -1): costumer = string[costumer + 9:aux] rating = string.find("rating:") aux = string.find("votes:") if rating != -1: rating = string[rating + 8:aux] votes = string.find("votes:") aux = string.find("helpful:") if (votes != -1): votes = string[votes + 6:aux] aux = string.find("helpful:") if aux != -1: helpful = string[aux + 8:len(string)] if costumer != -1: costumer = str(costumer).strip() self.customerSet.add(costumer) r = Review(date, votes, rating, helpful, prod_id, costumer) reviewList.append(r.getValuesString()) # if r.executeInsertStatement(cursor): # pass # else: # print("error inserting:\n", r.toString()) # print(string) # break return ",".join(reviewList) # retorna a string para o bulk insert
def createReview(rec): room = Room.query.filter_by(listingid=rec['listing_id']).first() renters = User.query.filter_by(isHost=0, isAdmin=0).all() if room: ind = random.randrange(len(renters)) user_id = renters[ind].id uname = renters[ind].uname room.reviews.append( Review(int(rec['scores']), 'Review from user {}'.format(rec['reviewer_name']), rec['comments'], user_id)) else: rooms = Room.query.filter_by().all() ridx = random.randrange(len(rooms)) user_id = renters[ind].id uname = renters[ind].uname rooms[ridx].reviews.append( Review(int(rec['scores']), 'Review from user {}'.format(rec['reviewer_name']), rec['comments'], user_id))
def get_random_data(): rooms_number = 100 rooms = [] for i in range(rooms_number): room = Room(random.choice(list(RoomTypes)), random.randint(1, 5), random.randint(1, 5), random.randint(1, 10), bool(random.getrandbits(1)), "desc", bool(random.getrandbits(1)), bool(random.getrandbits(1)), bool(random.getrandbits(1)), bool(random.getrandbits(1)), bool(random.getrandbits(1)), bool(random.getrandbits(1)), bool(random.getrandbits(1)), bool(random.getrandbits(1)), bool(random.getrandbits(1)), bool(random.getrandbits(1)), 37.9754983 + random.uniform(-1, 1), 23.7356671 + random.uniform(-1, 1), "address", "info", random.randint(1, 5), random.randint(23, 300), random.uniform(10, 70), random_sentence(3), random.randint(6, 20), random.randint(1, 3), 7) for y in range(10): room.images.append( Image('https://picsum.photos/id/' + str(i * 10 + y) + '/400/400')) room.reviews.append( Review(random.uniform(1, 5), random_sentence(3), random_sentence(10), 4)) start_date = datetime.now() for x in range(10): next_date = start_date + timedelta(days=random.randint(2, 30)) room.reservations.append( Reservation(start_date, next_date, Status.rented)) start_date = next_date + timedelta(days=random.randint(20, 40)) next_date = None room.reservations.append( Reservation(start_date, next_date, Status.not_available)) db.session.add(room) db.session.commit()
def put_new_review(entity_id): session = get_session() if session.query(Entity).get(entity_id) is None: session.close() abort(404, 'Entity not found') return current_user_id = get_jwt_identity() review = session.query(Review).filter( Review.user_id == current_user_id, Review.entity_id == entity_id ).first() if review is not None: session.close() abort(400, "User with id = %s already has a review for entity with id = %s" % (current_user_id, entity_id)) return text = None language = None content = g.data if 'text' in content: text = content['text'] language = translation_init.translate.detect(text) review = Review( user_id=current_user_id, entity_id=entity_id, rating=content['rating'], time=datetime.now(), text=text, locale=language ) session.add(review) session.commit() session.close() return 'ok'
def new_review(room_id): data = request.get_json() room = Room.query.filter_by(id=room_id).first() if room is None: return jsonify({'message': 'ERROR'}) print(data) review = data['review'] user_public_id = review['user_public_id'] user = User.query.filter_by(public_id=user_public_id).first() user_id = user.id print(user_id) room.reviews.append( Review(review['rating'], review['title'], review['description'], user_id)) db.session.commit() return jsonify({'message': 'SUCCESS'})
def scrape_listing_detail(driver, listing_url, city, country): if driver is None: driver = prepare_driver() driver.get(listing_url) time.sleep(3) acc_type = "Hostel" try: name = driver.find_element_by_xpath( "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[1]/div/div[2]/div/div/h1" ).text.strip() except: print("Name is None.") name = "" try: address = driver.find_element_by_xpath( "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[1]/div/div[2]/div/div/div/span/a[1]" ).text.strip() address = address + ", " + city + ", " + country except: print("Address is None.") address = "" try: rating_score = driver.find_element_by_xpath( "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[4]/div/section[1]/div/div[1]/div[1]" ).text.strip() try: rating_score = float(rating_score) except: rating_score = None except: print("Rating Score is None.") rating_score = 0 try: rating_text = driver.find_element_by_xpath( "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[4]/div/section[1]/div/div[1]/div[2]/p" ).text.strip() except: print("Rating Text is None.") rating_text = "" try: total_number_of_ratings = driver.find_element_by_xpath( "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[4]/div/section[1]/div/div[1]/div[2]/a/span" ).text.strip().replace('Total Reviews', '').strip() try: total_number_of_ratings = int(total_number_of_ratings) except: total_number_of_ratings = 0 except: print("Total ratings is None.") total_number_of_ratings = 0 try: description = driver.find_element_by_xpath( "//*[@id='pagebody']/div[1]/div[1]/div[2]/div[8]/section[4]/div/section[2]/div/div/div" ).text except: print("Description is None.") description = "" try: image_urls_class = driver.find_element_by_name( "ms-gallery").find_element_by_class_name( "row").find_element_by_class_name( "small-12").find_element_by_class_name("gallery") image_urls = [] for image in image_urls_class.find_elements_by_class_name( "gallery-item"): img = image.find_element_by_tag_name("img").get_attribute("src") image_urls.append(img) print(len(image_urls), "Image urls") except: print("Image urls is None.") image_urls = [] try: reviews = [] reviews_class = driver.find_element_by_name("ms-latest-reviews") reviews_ul = reviews_class.find_element_by_tag_name("ul") reviews_li = reviews_ul.find_elements_by_tag_name("li") for review in reviews_li: r = review.find_element_by_class_name( "property-review").find_element_by_class_name("review-info") r_country = r.find_element_by_class_name( 'details-bottom').text.strip() r_text = r.find_element_by_class_name( "notes").find_element_by_class_name( "truncate-container").find_element_by_class_name( "text").text.strip() rev = Review(text=r_text, review_country=r_country) reviews.append(rev.__dict__) except: print("Reviews is None.") reviews = [] try: reviews_breakdown_list = [] reviews_class = driver.find_element_by_name("ms-reviews-and-ratings") reviews_breakdown_ul = reviews_class.find_element_by_tag_name("ul") reviews_breakdown_li = reviews_breakdown_ul.find_elements_by_tag_name( "li") for breakdown in reviews_breakdown_li: b_text = breakdown.find_element_by_class_name( 'rating-label').text.strip() b_value = breakdown.find_element_by_class_name( 'rating-label').find_element_by_class_name( 'pull-right').text.strip() try: b_value = int(b_value) except: b_value = None rev_b = ReviewBreakdown(type=b_text, value=b_value) reviews_breakdown_list.append(rev_b.__dict__) except: print("Reviews Breakdown missing") try: facilities = [] facilities_class = driver.find_element_by_name( "ms-facilities").find_element_by_class_name( "row").find_element_by_class_name( "small-12").find_element_by_class_name("pb-3") groups = facilities_class.find_elements_by_class_name("facility-group") for group in groups: facilities_ul = group.find_element_by_tag_name("ul") facilities_li = facilities_ul.find_elements_by_tag_name("li") for f in facilities_li: facilities.append(str(f.text.strip())) except: print("Facilities is None.") facilities = [] g = geocoder.geonames(city + ", " + country, key='developer005') if g.ok is True: latitude = g.lat longitude = g.lng else: latitude = None longitude = None acc = Accommodation(name=str(name), country=str(country), city=str(city), address=str(address), type=str(acc_type), description=str(description), image_urls=image_urls, latitude=latitude, longitude=longitude, rating_text=str(rating_text), rating_score=rating_score, total_number_of_ratings=total_number_of_ratings, facilities=facilities, scrape_url=str(listing_url), reviews=reviews, review_breakdown=reviews_breakdown_list) return acc
def _parseItem(self, item): review = Review() textNode = item.find("{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle") if textNode is not None: review.text = textNode.text authorNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}GotoURL/{http://www.apple.com/itms/}b") if authorNode is not None: review.author = authorNode.text.strip() else: review.author = u"Anonymous" ratingNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView") try: review.rating = int(ratingNode.attrib['alt'].strip(' stars')) except KeyError: review.rating = 0 reportConcernNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}VBoxView/{http://www.apple.com/itms/}GotoURL") if reportConcernNode is not None and reportConcernNode.attrib is not None and "url" in reportConcernNode.attrib: parseResult = urlparse.urlparse(reportConcernNode.attrib["url"]) queryResult = urlparse.parse_qs(parseResult.query) if queryResult is not None and "userReviewId" in queryResult: review.identifier = queryResult["userReviewId"][0] titleNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}b") if titleNode is not None: review.title = titleNode.text versionAndDateNode = item.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}GotoURL") if versionAndDateNode is not None: regexpResult = re.search("Version ([^\n^\ ]+)", versionAndDateNode.tail) if regexpResult: review.version = regexpResult.group(1) regexpResult = re.search("(((?P<day1>\d{1,2})\.(?P<month1>\d{1,2})\.)|((?P<month2>\w+) (?P<day2>\d{1,2})[ ,]+)|((?P<day3>\d{1,2})[ \-](?P<month3>.+?)\.?[ \-]))(?P<year>\d{4})", versionAndDateNode.tail) if regexpResult: dateObject = None dict = regexpResult.groupdict() if dict["day1"] is not None: dateObject = datetime(int(dict["year"]), int(dict["month1"]), int(dict["day1"]), 0, 0, 0) elif dict["day2"] is not None: k = dict["month2"].lower() if k in months: dateObject = datetime(int(dict["year"]), int(months[k]), int(dict["day2"]), 0, 0, 0) else: regexpResult = re.search(".*?Version\s.*?[\s\-]+?([^\s\-].+)", versionAndDateNode.tail) if regexpResult: print regexpResult.group(1) else: k = dict["month3"].lower() if k in months: dateObject = datetime(int(dict["year"]), int(months[k]), int(dict["day3"]), 0, 0, 0) else: regexpResult = re.search(".*?Version\s.*?[\s\-]+?([^\s\-].+)", versionAndDateNode.tail) if regexpResult: print regexpResult.group(1) review.date = dateObject else: regexpResult = re.search(".*?Version\s.*?[\s\-]+?([^\s\-].+)", versionAndDateNode.tail) if regexpResult: print regexpResult.group(1) if review.date is None: review.date = self.lastDate else: self.lastDate = review.date return review
def _getReviews(self, conditions = None, limit = 0): reviews = [] cursor = self.reviewsCollection.find(spec = conditions, limit = limit).sort([(u"date", pymongo.DESCENDING), (u"order", pymongo.DESCENDING)]) for rawReview in cursor: review = Review() review.rowId = rawReview["_id"] review.identifier = rawReview["identifier"] review.author = rawReview["author"] review.appId = rawReview["appId"] review.title = rawReview["title"] review.text = rawReview["text"] review.version = rawReview["version"] review.rating = rawReview["rating"] review.date = rawReview["date"] review.appStoreId = rawReview["appStoreId"] reviews.append(review) return reviews
def convert_numbers_r(self, review): new_content = self.convert_numbers(review.content) new_label = review.polarity return Review(new_content, new_label)
def fold_cases_r(self, review): new_content = self.fold_cases(review.content) new_label = review.polarity return Review(new_content, new_label)
def remove_punctuations_r(self, review): new_content = self.remove_punctuations(review.content) new_label = review.polarity return Review(new_content, new_label)