def __init__(self, screen_size='', max_screen_resolution='', chipset_brand='', card_description='', brand='', item_weight='', operating_system='', computer_memory_type='', batteries='', date_first_available=''): self.screen_size = screen_size self.max_screen_resolution = max_screen_resolution self.brand = chipset_brand self.card_description = card_description self.brand_name = brand self.item_weight = item_weight self.operating_system = operating_system self.computer_memory_type = computer_memory_type self.batteries = batteries self.date = date_first_available # Check if we didn't get empty values from the scraping. # If valid is 0, I will retry the scraping. if self.screen_size != '' or self.max_screen_resolution != '' or self.brand != '' \ or self.card_description != '' or self.brand_name != '' or self.item_weight != '' \ or self.operating_system != '' or self.computer_memory_type != '' or self.batteries != '': self.valid = 1 else: self.valid = 0 self.con = connect_to_db() self.cur = self.con.cursor()
def get_reviews_content(): """Select all the profiles of the users from the reviews table""" con = connect_to_db() cur = con.cursor() cur.execute("SELECT Review_id, Content FROM reviews") db_output = [item for item in cur.fetchall()] return db_output
def profile(): """Select all the profiles of the users from the reviews table""" con = connect_to_db() cur = con.cursor() cur.execute("SELECT DISTINCT Profile_link FROM reviews") db_output = [item for item in cur.fetchall()] con.close() return db_output
def add_sentiment_to_db(review_id, polarity, subjectivity, polarity_conf, subjectivity_conf): con = connect_to_db() cur = con.cursor() cur.execute("""UPDATE reviews SET Polarity=%s, Subjectivity=%s, Polarity_confidence=%s, Subjectivity_confidence=%s WHERE Review_id =%s""",(polarity, subjectivity, polarity_conf, subjectivity_conf, review_id)) con.commit() con.close()
def __init__(self, user_id, username, location, date, rank, profile, cont): self.user_id = user_id self.username = username self.location = location self.date = date self.rank = rank self.profile = profile self.content = cont self.con = connect_to_db() self.cur = self.con.cursor()
def valid_features(): """Check the validity of the features that were added to the table laptop_features, and re-scrape and update the corresponding records in case it was not valid.""" con = connect_to_db() cur = con.cursor() cur.execute("SELECT Link, Laptop_id FROM laptop_features WHERE Valid=0") db_output = [item for item in cur.fetchall()] con.close() for my_url in db_output: feat = scraper_class.Parameters(config.AMAZON + my_url[0]) laptop = feat.get_param() laptop.update_db(my_url[1])
def __init__(self, user_id, ranking='', review='', votes=''): self.username = user_id self.ranking = ranking self.review = review self.votes = votes # Check if we didn't get empty values from the scraping. # If valid is 0, I will retry the scraping. if self.review == 0 and self.ranking == 0 and self.ranking == 0: self.valid = 0 else: self.valid = 1 self.con = connect_to_db() self.cur = self.con.cursor()
def __init__(self, name, price, rating, reviews, link): self.name = str(name.encode('utf-8', errors='ignore').decode('utf-8')) self.price = price self.rating = rating self.reviews = reviews self.link = link # Check if we didn't get empty values from the scraping. # If valid is 0, I will retry the scraping. if self.price != 0 or self.rating != -1 or self.reviews != 0: self.valid = 1 else: self.valid = 0 self.con = connect_to_db() self.cur = self.con.cursor()