Exemplo n.º 1
0
def reset_postgres():
    sql = SqlHelper(constants.SQL_DATABASE_NAME)

    sql.create_session()
    sql_entities = sql.fetch_entity_where('ZipCode', True)
    for sql_entity in sql_entities:
        sql_entity.requested = False

    sql.commit_session()
    sql.close_session()
Exemplo n.º 2
0
 def run(self):
     db = SqlHelper(constants.SQL_DATABASE_NAME)
     yelp_helper = YelpHelper()
     db.create_session()
     if self.city_name is None:
         cities = db.fetch_all(constants.SQL_TABLE_CITY)
     else:
         cities = db.fetch_entity_where('City', True, False, name=self.city_name)
     try:
         for city in cities:
             name = city.name
             for zip_code in city.zip_codes:
                 if not zip_code.requested:
                     zip_completed = True
                     self.location = str(zip_code.zip_code) + ', ' + str(name) + ', Deutschland'
                     self.offset = 0
                     content, status_code = yelp_helper.get_search(self.location, self.offset)
                     if 'error' not in content and not self.test_mode:
                         total = content['total']
                         entity_id = str(self.current_path) + str(self.location) + str(self.offset)
                         datastore_entity = self._create_datastore_entity(content)
                         save_success = self._save(entity_id, datastore_entity)
                         if save_success is False:
                             zip_completed = False
                         self.logger.info(u'Found {0} Entries...'.format(total))
                         while self.offset < total \
                                 and (self.offset + constants.YELP_SEARCH_LIMIT <= 1000) \
                                 and save_success is True:
                             content = yelp_helper.get_search(self.location, self.offset)
                             self.offset += constants.YELP_SEARCH_LIMIT + 1
                             if 'error' not in content:
                                 entity_id = str(self.current_path) + str(self.location) + str(self.offset)
                                 datastore_entity = self._create_datastore_entity(content)
                                 save_success = self._save(entity_id, datastore_entity)
                                 if save_success is False:
                                     zip_completed = False
                             else:
                                 raise YelpError(content['error']['code'], content['error']['description'])
                     else:
                         raise YelpError(content['error']['code'], content['error']['description'])
                     if zip_completed is True:
                         zip_code.requested = True
                         db.commit_session()
     except HTTPError as error:
         self.logger.exception('Encountered HTTP error %s on %s:\nAbort program.', error.code, error.url)
     except YelpError as err:
         self.logger.exception(err)
     finally:
         db.close_session()
Exemplo n.º 3
0
def fill_price_range_calculated_table():
    util.setup_logging()
    city_mode_list = {}

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    restaurants_without_price = db.fetch_entity_where('Restaurant', True, price_range=None)
    logger.info('Found {0} restaurants without Price'.format(len(restaurants_without_price)))

    for restaurant_without_price in restaurants_without_price:
        price_range_calculated = PriceRangeCalculated()

        restaurant_id = restaurant_without_price.id
        price_range_calculated.restaurant_id = restaurant_id

        city_name = restaurant_without_price.city
        if city_name:
            if city_name not in city_mode_list:
                logger.info('Calculating mode for {0}'.format(city_name))
                price_range_list = []
                restaurants_of_city = db.fetch_entity_where('Restaurant', True, city=city_name)
                logger.info('Found {0} restaurants for {1}'.format(len(restaurants_of_city), city_name))
                for restaurant_of_city in restaurants_of_city:
                    price_range = restaurant_of_city.price_range
                    if price_range:
                        price_range_list.append(price_range)
                if len(price_range_list) > 0:
                    try:
                        price_range_mode = mode(price_range_list)
                        logger.info('Mode for {0}: {1}'.format(city_name, price_range_mode))
                    except StatisticsError:
                        price_range_mode = '-1'
                        logger.info('Multiple modes found for {0}'.format(city_name))
                else:
                    price_range_mode = '-2'
                    logger.info('No price_range attribute found in {0}'.format(city_name))
                city_mode_list[city_name] = price_range_mode
            else:
                price_range_mode = city_mode_list[city_name]
                logger.info('Found price_range {0} for {1}'.format(price_range_mode, city_name))
            price_range_calculated.price_range = price_range_mode
            db.insert(price_range_calculated)

    logger.info('Calculated {0} price_range mode(s)'.format(len(city_mode_list)))

    db.commit_session()
    db.close_session()
Exemplo n.º 4
0
def fill_buying_power_calculated_table():
    util.setup_logging()

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    cities_without_buying_power = db.fetch_entity_where('City', True, buying_power=None)
    logger.info('Found {0} cities without Buying Power'.format(len(cities_without_buying_power)))

    buying_power_average = get_germany_buying_power_average()
    logger.info('Buying Power Germany: {0}'.format(buying_power_average))

    for city_without_buying_power in cities_without_buying_power:
        buying_power_calculated = BuyingPowerCalculated()

        city_id = city_without_buying_power.id
        buying_power_calculated.city_id = city_id
        buying_power_calculated.buying_power = buying_power_average

        db.insert(buying_power_calculated)

    db.commit_session()
    db.close_session()
Exemplo n.º 5
0
def main():
    util.setup_logging()

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    yelp = YelpHelper()
    db.create_session()

    result = db.fetch_entity_where('Restaurant', True, False, review_count=0)
    logger.info('Found {0} Restaurants with 0 Review Count'.format(
        len(result)))

    for restaurant in result:
        logger.info('Old Review Count Value: {0}'.format(
            restaurant.review_count))
        name = restaurant.name
        business_id = restaurant.id
        logger.info(name)
        result, status_code = yelp.get_business(business_id, 0)
        status_codes = [403, 404]
        if status_code not in status_codes:
            if 'error' not in result:
                review_count = result['review_count']
                if review_count is not None:
                    restaurant.review_count = review_count
                    logger.info('New Review Count Value: {0}'.format(
                        restaurant.review_count))
            else:
                logger.error('{0}: {1}'.format(result['error']['code'],
                                               result['error']['description']))
                break
        else:
            restaurant.review_count = 0
            logger.info('New Review Count Value: {0}'.format(
                restaurant.review_count))

    db.commit_session()
    db.close_session()
Exemplo n.º 6
0
def main():
    rents = []
    util.setup_logging()

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    result = db.fetch_entity_where('City', True, True, rent_avg=None)

    for city in result:
        rents.append(city.rent_avg)

    rent_avg = int(numpy.mean(rents))

    result = db.fetch_entity_where('City', True, False, rent_avg=None)

    for city in result:
        rent_avg_calculated = RentAvgCalculated()
        rent_avg_calculated.city_id = city.id
        rent_avg_calculated.rent_avg = rent_avg
        db.insert(rent_avg_calculated)

    db.commit_session()
    db.close_session()
Exemplo n.º 7
0
def check_price_range_availability_and_update():
    yelp_helper = YelpHelper()
    restaurants = []
    util.setup_logging()

    not_available_count = 0

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    result = db.fetch_entity_where('Restaurant', True, price_range=None)
    logger.info('Found {0} restaurants'.format(str(len(result))))

    try:
        for restaurant in result:
            restaurant_id = restaurant.id
            business, status_code = yelp_helper.get_business(restaurant_id)
            if 'error' not in business:
                if 'price' in business:
                    price_range = business['price']
                    if price_range:
                        restaurant.price_range = price_range
                    else:
                        logger.info('Price Range is null')
                    db.insert(restaurant)
                else:
                    not_available_count += 1
            else:
                raise YelpError(business['error']['code'], business['error']['description'])
            logger.info(not_available_count)
    except YelpError as error:
        logger.exception(error)
        logger.info('Adding {0} updated restaurants to DB...'.format(len(restaurants)))
    finally:
        db.commit_session()
        db.close_session()
    def run(self):
        result = Result()
        result.set_success(True)

        sql = SqlHelper(constants.SQL_DATABASE_NAME)
        sql.create_session()

        if self.current_city:
            self.cities.append(self.current_city)
        elif self.top_how_much:
            self.cities = self._fetch_top_city_from(self.top_how_much,
                                                    'top_cities')

        restaurants = self._fetch_all_restaurants()
        locale_list = ['en_US', 'de_DE']
        if restaurants:
            for restaurant in restaurants:
                self.current_restaurant_id = restaurant.id
                # if there is a change in zip codes;
                # all reviews from current zip code are successfully collected into db
                # set it to collected
                self.logger.debug('Restaurant Zip: {0}'.format(
                    restaurant.zip_code))
                if self.current_zip_code and restaurant.zip_code is not self.current_zip_code:
                    sql.update_entity('ZipCode', 'zip_code',
                                      str(self.current_zip_code),
                                      'review_collected', True)
                self.current_zip_code = restaurant.zip_code
                self.current_city = restaurant.city
                for locale in locale_list:
                    self.current_locale = locale
                    yelp_entity, status_code = self.yelp.get_reviews(
                        self.current_restaurant_id, locale)
                    if not status_code == 403:
                        if 'error' not in yelp_entity:
                            reviews = yelp_entity['reviews']
                            if len(reviews) > 0:
                                datastore_entity = self._create_datastore_entity(
                                    yelp_entity)
                                entity_id = self.current_city + '@' + \
                                            str(self.current_zip_code) + '@' + \
                                            str(self.current_restaurant_id) + '@' + \
                                            locale
                                if not self.test_mode:
                                    success = self._save(
                                        entity_id, datastore_entity)
                                    if success:
                                        result.set_success(success)
                                        sql.commit_session()
                            else:
                                self.logger.debug(
                                    'No Reviews found for restaurant {0} in {1}'
                                    .format(restaurant.name,
                                            self.current_city))
                        else:
                            message = yelp_entity['error']['description']
                            result.set_success(False)
                            result.set_message(message)
                            raise YelpError(yelp_entity['error']['code'],
                                            message)
                    else:
                        self.logger.debug(
                            'No Reviews found for restaurant {0} in {1}'.
                            format(restaurant.name, self.current_city))
            else:
                result.set_success(False)
                result.set_message(
                    'Failure when saving Review Entity to Datastore')
        else:
            result.set_success(True)
            result.set_message(
                'No Restaurants left to collect reviews from! Nice Job')

        sql.close_session()
        self.logger.info(result)
        return result
Exemplo n.º 9
0
class Transporter(ABC, threading.Thread):

    database = None
    source_entity = None
    target_entity = None
    source_db = None
    test_mode = None
    source_entity_id = None
    city_name = None
    zip_codes = []
    top_how_much = None

    logger = logging.getLogger(__name__)

    def __init__(self, database, source_entity, test_mode, city_name):
        super(Transporter, self).__init__()
        self.logger.info(
            'Creating Transporter for Datastore Entity: {0}'.format(
                source_entity))
        self.database = database
        self.source_entity = source_entity
        self.source_db = DatastoreHelper()
        self.target_db = SqlHelper(self.database)
        self.test_mode = test_mode
        self.city_name = city_name

        if self.city_name:
            self._fetch_zip_codes_from_database()

    def run(self):
        results = []
        self.logger.info('Starting transport...')
        self.target_db.create_session()
        total = self._get_entities(None, None, True)
        self.logger.info('Found a total of %s entries in Google Datastore',
                         str(total))
        offset = 0
        while offset < total:
            result = self._transport(offset)
            results.append(result)
            offset += constants.GCP_FETCH_LIMIT
            # i dont know why but google datastore doesn't allow a offset greater than 2500
            if offset == 2500:
                self.logger.info('Resetting offset...')
                offset = 0
                total = self._get_entities(None, None, True)
                self.logger.info(
                    'Found a total of %s entries in Google Datastore',
                    str(total))
        for result in results:
            self.logger.info(result)
        self.logger.info('Done transporting Restaurants...')

    def _transport(self, offset):
        result = Result()
        limit = constants.GCP_FETCH_LIMIT
        source_entities = self._get_entities(limit, offset, False)
        if source_entities:
            for datastore_entity in source_entities:
                self.logger.info('Starting mapping...')
                entities = self.map(datastore_entity)
                entity_length = len(entities)
                self.logger.info('Mapped {0} entities...'.format(
                    str(entity_length)))
                if not self.test_mode:
                    if entity_length > 0:
                        try:
                            for entity in entities:
                                if entity:
                                    self.logger.info('Saving in database...')
                                    self.target_db.insert(entity)
                            self.logger.info('Commiting DB entries')
                            self.target_db.commit_session()
                            result.set_success(True)
                            result.set_message(
                                'Fetched entries from offset {0} with limit {1}'
                                .format(str(offset), str(limit)))
                        except SQLAlchemyError as err:
                            result.set_success(False)
                            result.set_message(err.code)
                            self.logger.exception('An SQLAlchemyError occured')
                        finally:
                            self.target_db.close_session()
                    else:
                        result.set_success(True)
                        result.set_message(
                            'There are no mapped entities that can be saved in database'
                        )
                    self.source_db.set_transported(datastore_entity, True)
                else:
                    result.set_success(True)
                    result.set_message('Test Mode active')
        else:
            result.set_success(False)
            result.set_message(self.source_entity +
                               ' could not be found in Google Datastore')
        return result

    def _fetch_zip_codes_from_database(self):
        sql = SqlHelper(constants.SQL_DATABASE_NAME)

        sql.create_session()
        city_from_db = sql.fetch_city_by_name(self.city_name)
        # get zip codes and close session afterwards
        zip_codes = city_from_db.zip_codes
        sql.close_session()

        for zip_code_obj in zip_codes:
            self.zip_codes.append(zip_code_obj.zip_code)

    def _fetch_entities_by_zip_code(self, entity_name, limit, offset,
                                    only_keys):
        result_all = []

        for zip_code in self.zip_codes:
            result = self.source_db.fetch_entity(entity_name,
                                                 limit=limit,
                                                 offset=offset,
                                                 only_keys=only_keys,
                                                 operator='=',
                                                 zip_code=str(zip_code),
                                                 transported=False)
            result_all += result
        return result_all

    def _get_entities(self, limit, offset, only_total):
        if not self.zip_codes:
            content = self.source_db.fetch_entity(self.source_entity,
                                                  limit,
                                                  offset,
                                                  only_total,
                                                  '=',
                                                  transported=False)
        else:
            content = self._fetch_entities_by_zip_code(self.source_entity,
                                                       limit, offset,
                                                       only_total)

        if only_total:
            result = len(content)
        else:
            result = content

        return result

    # maps target and source structure and returns a list of entities to save in db
    @abstractmethod
    def map(self, datastore_entity) -> List:
        pass
Exemplo n.º 10
0
# cities = cities.values.tolist()

# city_objects = sql.fetch_entity_where('TopCities')
# cities = [[city.state, city.city] for city in city_objects]
# print(cities[1])
# print(cities[0])

analyzer = TextAnalyzer('english', True, False, '../../data/tree_tagger')

review = 'Yes, the 5 stars are deserved: here you can drink and buy the best coffee in Bochum (and maybe in the Ruhr area?). I usually only take the roasted coffee'

text = analyzer.text_process(review)

print(text)
#
# menu_item_improved = util.convert_list_to_string(analyzer.text_process(text))
# if menu_item_improved:
#     print('Item: ')
#     print(menu_item_improved)
# else:
#     print('Value is None')

# result = datastore.fetch_entity('THD_Speisekarte', None, None, True, '=', transported=False, zip_code=44866)
# print(result)

# sql._update_entity('ZipCode', 'zip_code', '10115', 'review_collected', False)

sql.commit_session()

sql.close_session()