def __init__(self):
     print self.DOMAIN
     self.logger = Logger(name='dubizzle_data_log')
     self.err_logger = Logger(name='err_dubizzle_data_log')
     self.request_manager = RequestManager()
     self.source_code_manager = SourceCodeManager()
     self.generator = Generator()
     self.db = DatabaseManager()
Exemplo n.º 2
0
def chart(request):
    list_data = DatabaseManager().get_similar_cars_for_graph(make='BMW',
                                                             model='3-Series',
                                                             trim='320i',
                                                             year=2007)
    list_make = DatabaseManager().get_all_make()
    cars_data = list_data['data']
    cars_urls = list_data['urls']
    print('Length:', len(cars_data))
    return render(request, 'car_chart/scatter_chart.html', {
        'values': cars_data,
        'urls': cars_urls,
        'makes': list_make
    })
Exemplo n.º 3
0
    def __init__(self):
        # self.pool = Pool(5)

        self.db = DatabaseManager()
        # self.rmq_extract = RabbitMQ.RabbitMQManager.RabbitMQManager('uae_extract_urls')
        # self.rmq_update = RabbitMQ.RabbitMQManager.RabbitMQManager('uae_update_urls')

        # ======== Dubizzle =======
        #self.dubizzle_data_extractor = DubizzleDataExtractor()
        # self.dubizzle_links_extractor = DubizzleLinksExtractor()

        # ======== Dubicars =======
        #self.dubicars_data_extractor = DubicarsDataExtractor()
        # self.dubicars_links_extractor = DubicarsLinksExtractor()

        self.deal_quality = DealQualityManager()
Exemplo n.º 4
0
def handle_ImageMessage(event):
    image = line_bot_api.get_message_content(event.message.id)
    image_url = ImageUtil().upload(image.content)
    record.image = image_url
    line_bot_api.push_message(event.source.user_id,
                              TextSendMessage(text="You can send 'compensation' to choose the type of compensation."))
    DatabaseManager().save_record(record)
Exemplo n.º 5
0
 def activate(self, member):
     try:
         with DatabaseManager() as db:
             sql = "UPDATE `users` SET `active`=%s WHERE `user_id`=%s"
             db.execute(sql, (1, member.id))
         print("Activated account for user %s" % (member.id))
     except Exception as e:
         print("Error activating user %s.\n%s" % (member.id, e))
Exemplo n.º 6
0
 def create(self, member, details):
     try:
         with DatabaseManager() as db:
             sql = "INSERT INTO `profiles` (`user_id`, `battlenet_tag`, `name`, `race`, `primary_profession`, `secondary_profession`) VALUES (%s, %s, %s, %s, %s, %s)"
             db.execute(sql, (member.id, details['battlenet'], details['name'], details['race'], details['primary_profession'], details['secondary_profession']))
             db.commit()
             print("Added profile for user %s" % member.id)
     except Exception as e:
         print("Error creating profile %s.\n%s" % (member.id, e))
Exemplo n.º 7
0
def get_models_for_make(request):
    print('get_models_for_make WAS RUNNED')
    if request.GET:
        make = request.GET['make']
        print(make)
        models = json.dumps(DatabaseManager().get_all_models(make))
        print(models)
        if len(models) > 0:
            return HttpResponse(models, content_type="text/html")
    else:
        pass
Exemplo n.º 8
0
    def read(self, member):
        try:
            with DatabaseManager() as db:
                sql = "SELECT * from `users` WHERE `user_id`=%s AND `active`=%s"
                result = db.query(sql, (member, 1))

                if not result:
                    print("User does not exist: %s" % member)
                else:
                    return result[0]
        except Exception as e:
            print("Error fetching user id %s.\n%s" % (member, e))
Exemplo n.º 9
0
    def fetch(self):
        try:
            with DatabaseManager() as db:
                sql = "SELECT * from `users` where `active`=%s ORDER BY `credits` DESC LIMIT 5"
                result = db.query(sql, (1))

                if not result:
                    print("Error fetching leaderboard")
                else:
                    return result
        except Exception as e:
            print("Error fetching leaderboard.\n%s" % (e))
Exemplo n.º 10
0
    def read(self, member):
        # Fetch user object to retrieve ID
        try:
            with DatabaseManager() as db:
                sql = "SELECT * FROM `profiles` WHERE `user_id`=%s"
                result = db.query(sql, member.id)

                if not result:
                    print("User does not exist: %s" % member.id)
                    return None
                else:
                    return result[0]
        except Exception as e:
            print("Error fetching profile for user %s.\n%s" % (member.id, e))
Exemplo n.º 11
0
def get_trim_for_model(request):
    print('get_trim_for_model WAS RUNNED')
    if request.GET:
        make = request.GET['make']
        model = request.GET['model']
        print(make)
        print(model)
        models = json.dumps(DatabaseManager().get_all_trim(make=make,
                                                           model=model))
        print(models)
        if len(models) > 0:
            return HttpResponse(models, content_type="text/html")
    else:
        pass
Exemplo n.º 12
0
    def create(self, member):
        if member.bot == 1 or self.read(member, 1):
            return

        if self.read(member, 0):
            self.activate(member)
            return

        try:
            with DatabaseManager() as db:
                sql = "INSERT INTO `users` (`user_id`, `name`, `guild`, `joined_at`, `active`, `received_credits`) VALUES (%s, %s, %s, %s, %s, %s)"
                db.execute(sql, (member.id, member.name, member.guild.id,
                                 member.joined_at, 1, member.joined_at))
                db.commit()
        except Exception as e:
            print("Error fetching user %s.\n%s" % (member.id, e))
Exemplo n.º 13
0
    def read(self, member, active=1):
        try:
            with DatabaseManager() as db:
                if active == 1:
                    sql = "SELECT * from `users` WHERE `user_id`=%s AND `active`=1"
                else:
                    sql = "SELECT * from `users` WHERE `user_id`=%s AND `active`=0"

                result = db.query(sql, (member.id))

                if not result:
                    print("User does not exist or is inactive: %s" % member.id)
                    return None
                else:
                    return result[0]
        except Exception as e:
            print("Error fetching user id %s.\n%s" % (member.id, e))
Exemplo n.º 14
0
def get_year_for_trim(request):
    print('get_year_for_trim WAS RUNNED')
    if request.GET:
        make = request.GET['make']
        model = request.GET['model']
        trim = request.GET['trim']
        # print(make)
        # print(model)
        # print(trim)
        models = json.dumps(DatabaseManager().get_all_year(make=make,
                                                           model=model,
                                                           trim=trim))
        print(models)
        if len(models) > 0:
            return HttpResponse(models, content_type="text/html")
    else:
        pass
Exemplo n.º 15
0
    def update(self, member, amount, operation="add"):
        user = self.read(member)

        if operation == "minus":
            new_credits = user['credits'] - amount
        else:
            new_credits = user['credits'] + amount

        date = datetime.now()

        try:
            with DatabaseManager() as db:
                sql = "UPDATE `users` SET `credits`=%s, `received_credits`=%s WHERE `user_id`=%s"
                db.execute(sql, (new_credits, date, member))
            print("Updated %s's credits from %s to %s" %
                  (user['name'], user['credits'], new_credits))
        except Exception as e:
            print("Error when updating credits for %s.\n%s" % (member, e))
Exemplo n.º 16
0
def get_graph(request):
    print('get_graph WAS RUNNED')
    if request.GET:
        make = request.GET['make']
        model = request.GET['model']
        trim = request.GET['trim']
        year = int(request.GET['year'])

        list_data = DatabaseManager().get_similar_cars_for_graph(make=make,
                                                                 model=model,
                                                                 trim=trim,
                                                                 year=year)
        # print( 'Length:',len(list_data['data']))
        # print(list_data['data'])
        # print(list_data['urls'])

        if len(list_data) > 0:
            return HttpResponse(json.dumps(list_data),
                                content_type="text/html")
Exemplo n.º 17
0
class ProxieManager():
    def __init__(self):
        self.db = DatabaseManager()

    def check_existing_proxie(self):
        proxie_list = self.db.get_proxies()
        for proxie_data in proxie_list:
            proxie = self.__generate_proxie(proxie_data)

    def __check_proxie(self, proxie):
        pass

    @staticmethod
    def __generate_proxie(proxie_data):
        protocol = proxie_data['protocol']
        host = proxie_data['host']
        port = proxie_data['port']
        proxie = protocol + "://" + host + ":" + port
        return proxie
Exemplo n.º 18
0
                price = price.replace('AED', "").replace(" ", ""). \
                    replace(",", ""). \
                    replace(".", ""). \
                    replace("-", "")
                return int(price)
            except:
                return 0

    def __find_tag_by_text(self, code, text):

        tag_with_text = code.find(text=text)
        needed_tag = tag_with_text.parent.find_next_sibling()
        return needed_tag.text


if __name__ == '__main__':
    extractor = DataExtractor()
    db = DatabaseManager()

    # print extractor.extract_data(db.get_all_urls(extractor.DOMAIN))
    # list = db.get_all_urls(extractor.DOMAIN)[200:250]
    # for url in list:
    #     extractor.extract_data(url)

    extractor.extract_data({
        'url':
        'https://www.dubicars.com/2006-toyota-land-cruiser-vxr-v8-152617.html',
        'listing_id': '152617',
        'id': 221717L
    })
Exemplo n.º 19
0
class DataExtractor:
    DOMAIN = 'dubicars.com'
    PROJECT_ID = 13

    PATH = 'phones/'

    def __init__(self):
        print self.DOMAIN
        self.logger = Logger(name='dubicars_data_log')
        self.err_logger = Logger(name='err_dubicars_data_log')
        self.request_manager = RequestManager()
        self.source_code_manager = SourceCodeManager()
        self.generator = Generator()
        self.db = DatabaseManager()
        self.trim_list = self.db.get_trim_list()

    def extract_data(self, url_data):
        print url_data
        url_id = url_data['id']
        url = url_data['url']
        listing_id = url_data['listing_id']

        data = {}

        response = self.request_manager.take_get_request(url)
        source_code = response['source_code']
        parsed_code = self.source_code_manager.parse_code(source_code)

        expired = parsed_code.find('img', {'class': 'sold'})
        if expired is not None:
            self.db.set_url_inactive(url_id)
            self.err_logger.error("EXPIRED " + str(url_data))

            return
        elif response['status_code'] == 404:
            self.db.set_url_inactive(url_id)
            self.err_logger.error("404 " + str(url_data))

            return

        try:
            marka = self.__find_make(parsed_code)

            year = self.__find_year(parsed_code)
            kilometres = self.__find_km(parsed_code)
            color = self.__find_color(parsed_code)
            specs = self.__find_specs(parsed_code)
            price = self.__find_price(parsed_code)
            model = self.__find_model(parsed_code, make=marka)
            trim = self.__find_trim(parsed_code, marka=marka, model=model)
            if trim == 'Other':
                self.db.set_url_processed(url_id)
                self.db.set_url_inactive(url_id)
                return
            phone = self.__find_phone(parsed_code)
        except Exception as exc:
            self.err_logger.error(str(exc) + str(url_data))

            self.db.set_url_processed(url_id)
            return
        try:
            data['year'] = int(year)
            data['price'] = int(price)
            data['kilometres'] = int(kilometres)
            data['color'] = color
            data['specs'] = specs
            data['trim'] = trim
            data['model'] = model
            data['make'] = marka
            data['phone'] = phone
            print data
        except Exception as exc:
            self.err_logger.error(str(exc) + url_data)

            self.db.set_url_processed(url_id)
            self.db.set_url_inactive(url_id)

            return

        self.db.insert_data(data=data,
                            listing_id=listing_id,
                            url=url,
                            source=self.DOMAIN)
        self.db.set_url_processed(url_id)

    def update_data(self, url_data):
        timestamp = generate_timestamp()
        url_id = url_data['id']
        listing_id = url_data['listing_id']
        print listing_id
        url = url_data['url']
        first_timestamp = url_data['timestamp']
        time_dif = first_timestamp - datetime.strptime(timestamp,
                                                       "%Y.%m.%d:%H:%M:%S")
        time_dif = time_dif.days

        response = self.request_manager.take_get_request(url)
        source_code = response['source_code']
        parsed_code = self.source_code_manager.parse_code(source_code)
        expired = parsed_code.find('img', {'class': 'sold'})
        if expired is not None:
            self.db.set_sold_status(listing_id=listing_id,
                                    days_for_selling=time_dif)
            #self.db.remove_listing(listing_id)
            self.db.set_url_inactive(url_id)
            return

        elif response['status_code'] == 404:
            print 404, listing_id
            self.db.set_sold_status(listing_id=listing_id,
                                    days_for_selling=time_dif)
            #self.db.remove_listing(listing_id)
            self.db.set_url_inactive(url_id)
            return

        try:
            price = self.__find_price(parsed_code)
        except:
            price = 0

        # days = self.__calc_days_on_market(listing_id)

        self.db.update_listing(listing_id=listing_id,
                               price=int(price),
                               days_on_market=time_dif)
        self.db.set_updated(listing_id=listing_id)

    def __find_make(self, code):
        try:
            make = self.__find_tag_by_text(code, text='Make:')
            return make
        except:
            return ''

    def __find_year(self, code):
        try:
            year = self.__find_tag_by_text(code, text='Year:')
            year_list = year.split()
            for year in year_list:
                try:
                    year = int(year)
                    return year
                except:
                    continue
        except:
            return ''

    def __find_km(self, code):
        try:
            km = self.__find_tag_by_text(code, text='Kilometers:')
            km = km.replace(",", "").replace(".", "").replace(" ", "")
            return int(km)
        except:
            return 0

    def __find_color(self, code):
        try:
            color = self.__find_tag_by_text(code, text='Color:')
            return color.strip()
        except:
            return ''

    def __find_specs(self, code):
        try:
            specs = self.__find_tag_by_text(code, text='Specs:')
            return specs.strip()
        except:
            return ''

    # ============= TRIM ===============
    # =====
    def __generateEditedTrims(self, marka, trim):
        for example_trim in self.trim_list:
            try:
                if len(example_trim['trim']) <= 3:
                    continue
            except:
                continue

            if '-' in example_trim['trim']:
                if example_trim['make'] == marka:

                    edited_example_trim = example_trim['trim'].replace(
                        '-', ' ')
                    if edited_example_trim in trim:
                        print example_trim['trim']
                        return example_trim['trim']

                    edited_example_trim = example_trim['trim'].replace(
                        '-', ' ').title()
                    if edited_example_trim in trim:
                        print example_trim['trim']
                        return example_trim['trim']
        return ''

    def __find_trim(self, code, marka, model):
        try:
            to_return_trim = ''
            not_edited_trim = self.__find_tag_by_text(code,
                                                      text='Model:').strip()
            trim = not_edited_trim.replace(model, '').strip()

            if len(trim.split()) == 0:
                print not_edited_trim, 'there is no Trim!!!!'
                return not_edited_trim.strip()

            for example_trim in self.trim_list:
                if example_trim['make'] == marka:
                    if example_trim['trim'] in trim:

                        if len(example_trim['trim']) <= 2:
                            if ' ' + example_trim[
                                    'trim'] + ' ' in ' ' + trim + ' ':
                                if len(example_trim['trim']) > len(
                                        to_return_trim):
                                    print example_trim['trim']
                                    to_return_trim = example_trim['trim']
                            continue

                        if len(example_trim['trim']) > len(to_return_trim):
                            print example_trim['trim']
                            to_return_trim = example_trim['trim']

            edited_trim = self.__generateEditedTrims(marka=marka, trim=trim)
            if len(edited_trim) > len(to_return_trim):
                return edited_trim
            elif to_return_trim == '':
                if len(trim.split()) <= 2 and len(trim.split()) > 0:
                    return trim
            else:
                return to_return_trim
        except:
            return ''

    # =====
    # ============= TRIM ===============

    def __find_model(self, code, make):
        try:
            breadcrumbs = code.findAll('span', {'typeof': 'v:Breadcrumb'})
            name = breadcrumbs[-1].text
            len_make = len(make.split())
            trim = name.split()[len_make:]
            trim = ' '.join(trim)
            return trim.strip()
        except Exception as exc:
            print exc
            return ''

    def __find_phone(self, code):
        try:
            phone = code.find('p', {
                'id': 'contact-buttons'
            }).find('a')['data-reveal']
            phone = phone.replace('"',
                                  "").replace(" ",
                                              "").replace("[",
                                                          "").replace("]", "")
            return phone.strip()
        except Exception as exc:
            print exc
            return ''

    def __find_price(self, code):
        try:
            price = code.find('strong', {'class': 'money'}).text
            price = price.replace('AED', "").replace(" ", "").\
                replace(",", "").\
                replace(".", "").\
                replace("-", "")
            return int(price)
        except:
            try:
                price = code.find('strong', {'class': 'money reduced'}).text
                price = price.replace('AED', "").replace(" ", ""). \
                    replace(",", ""). \
                    replace(".", ""). \
                    replace("-", "")
                return int(price)
            except:
                return 0

    def __find_tag_by_text(self, code, text):

        tag_with_text = code.find(text=text)
        needed_tag = tag_with_text.parent.find_next_sibling()
        return needed_tag.text
Exemplo n.º 20
0
class DealQualityManager:
    def __init__(self):
        self.db = DatabaseManager()

    def group_cars(self, object_list):
        grouped_list = [[]]
        temp_object = object_list[0]
        index = 0
        for object in object_list:
            if object[1:5] == temp_object[1:5]:
                grouped_list[index].append(object)
            else:
                index += 1
                temp_object = object
                grouped_list.append([])
                grouped_list[index].append(object)

        return grouped_list

    # def dealRating(self, list_cars):
    #     result_km = 0
    #     result_price = 0
    #     summ_km = 0
    #     summ_price = 0
    #
    #     for car in list_cars:
    #         summ_km += car[5]
    #         summ_price += car[6]
    #
    #     average_km = (summ_km / len(list_cars))
    #     average_price = (summ_price / len(list_cars))
    #     print('Average km: ', average_km)
    #     print('Average price: ', average_price)
    #
    #     average_km_degree = average_km / 17
    #     average_price_degree = average_price / 20
    #
    #     for car in list_cars:
    #         id = car[0]
    #         different_km = average_km - car[5]
    #         different_price = average_price - car[6]
    #         try:
    #             result_km = int(different_km / average_km_degree)
    #         except ZeroDivisionError:
    #             result_km = 0
    #         try:
    #             result_price = int(different_price / average_price_degree)
    #         except ZeroDivisionError:
    #             result_price = 0
    #         status = str(result_km + result_price)
    #         self.db.set_deal_quality(status=status, listing_id=id, price_difference=different_price)

    def dealRating(self, list_cars):
        result_km = 0
        result_price = 0
        summ_km = 0
        summ_price = 0

        for car in list_cars:
            summ_km += car['kilometres']
            summ_price += car['price']

        average_km = (summ_km / len(list_cars))
        average_price = (summ_price / len(list_cars))
        print('Average km: ', average_km)
        print('Average price: ', average_price)

        average_km_degree = average_km / 17
        average_price_degree = average_price / 20

        for car in list_cars:
            id = car['id']
            different_km = average_km - car['kilometres']
            different_price = average_price - car['price']
            try:
                result_km = int(different_km / average_km_degree)
            except ZeroDivisionError:
                result_km = 0
            try:
                result_price = int(different_price / average_price_degree)
            except ZeroDivisionError:
                result_price = 0
            status = str(result_km + result_price)
            self.db.set_deal_quality(status=status,
                                     listing_id=id,
                                     price_difference=different_price)

    def main(self, car_list):
        car_list = self.group_cars(car_list)
        for grouped_cars in car_list:
            self.dealRating(list(grouped_cars))
Exemplo n.º 21
0
 def __init__(self):
     self.db = DatabaseManager()
Exemplo n.º 22
0
class Runner():
    TEMPLATE_DUBIZZLE = 'https://uae.dubizzle.com/motors/used-cars/?page={}&seller_type=OW&is_search=1&is_basic_search_widget=0&places__id__in=--&ot=desc&o=2'
    TEMPLATE_DUBICARS = 'https://www.dubicars.com/search?ajax=true&view=&o=&l=&ma=&mo=0&c=new-and-used&pf=&pt=&yf=&yt=&kf=10000&kt=&b=&co=&ci=&s=&gi=&page={}'

    DUBIZZLE_DOMAIN = 'dubai.dubizzle.com'
    DUBICARS_DOMAIN = 'dubicars.com'

    def __init__(self):
        # self.pool = Pool(5)

        self.db = DatabaseManager()
        # self.rmq_extract = RabbitMQ.RabbitMQManager.RabbitMQManager('uae_extract_urls')
        # self.rmq_update = RabbitMQ.RabbitMQManager.RabbitMQManager('uae_update_urls')

        # ======== Dubizzle =======
        #self.dubizzle_data_extractor = DubizzleDataExtractor()
        # self.dubizzle_links_extractor = DubizzleLinksExtractor()

        # ======== Dubicars =======
        #self.dubicars_data_extractor = DubicarsDataExtractor()
        # self.dubicars_links_extractor = DubicarsLinksExtractor()

        self.deal_quality = DealQualityManager()

    def main(self, update_mode=False, deal_update=False, extract_data=False):

        # if update_mode:
        #     self.update_db()
        #     self.deal_quality.main(self.db.get_grouped_listings())
        #     print "Updated!"
        #     self.db.reset_updates()
        #     return
        # if deal_update:
        #     self.deal_quality.main(self.db.get_grouped_listings())
        #     return
        # if extract_data: # Not work
        #     new_urls = self.db.get_urls(source=None)
        #     print len(new_urls)
        #     self.pool.map(self.dubizzle_data_extractor.extract_data, new_urls)
        #     return

        self.deal_quality.main(self.db.get_grouped_listings())

    def extract_data_dubizzle(self):
        # ======== Dubizzle =======
        # self.dubizzle_links_extractor.extract_urls()
        # print self.DUBIZZLE_DOMAIN

        new_urls = self.db.get_urls(source=self.DUBIZZLE_DOMAIN)  # Dubizzle
        print len(new_urls)
        self.rmq_extract.load_urls(
            self.generate_urls_to_queue(activity='extract',
                                        source='dubizzle',
                                        urls=new_urls))

    def extract_data_dubicars(self):
        # ======== Dubicars =======
        self.dubicars_links_extractor.main(self.TEMPLATE_DUBICARS)

        new_urls = self.db.get_urls(source=self.DUBICARS_DOMAIN)  # Dubcars
        print len(new_urls)
        self.rmq_extract.load_urls(
            self.generate_urls_to_queue(activity='extract',
                                        source='dubicars',
                                        urls=new_urls))

    def update_db(self):

        # ======== Dubizzle =======
        dubizzle_urls_data = self.db.get_all_urls(source=self.DUBIZZLE_DOMAIN)
        urls_to_queue = self.generate_urls_to_queue(source='dubizzle',
                                                    activity='update',
                                                    urls=dubizzle_urls_data)
        self.rmq_update.load_urls(urls=urls_to_queue)

        # ======== Dubicars =======
        dubicars_urls_data = self.db.get_all_urls(source=self.DUBICARS_DOMAIN)
        urls_to_queue = self.generate_urls_to_queue(source='dubicars',
                                                    activity='update',
                                                    urls=dubicars_urls_data)
        self.rmq_update.load_urls(urls=urls_to_queue)

    def generate_urls_to_queue(self, source, activity, urls):

        for url in urls:
            url['activity'] = activity
            url['source'] = source

        return urls

    def deal_quality_func(self):
        self.deal_quality.main(self.db.get_grouped_listings())
Exemplo n.º 23
0
        for car in list_cars:
            id = car['id']
            different_km = average_km - car['kilometres']
            different_price = average_price - car['price']
            try:
                result_km = int(different_km / average_km_degree)
            except ZeroDivisionError:
                result_km = 0
            try:
                result_price = int(different_price / average_price_degree)
            except ZeroDivisionError:
                result_price = 0
            status = str(result_km + result_price)
            self.db.set_deal_quality(status=status,
                                     listing_id=id,
                                     price_difference=different_price)

    def main(self, car_list):
        car_list = self.group_cars(car_list)
        for grouped_cars in car_list:
            self.dealRating(list(grouped_cars))


if __name__ == '__main__':
    db = DatabaseManager()
    DQM = DealQualityManager()

    object_list = db.get_grouped_listings()

    DQM.main(object_list)
Exemplo n.º 24
0
 def __init__(self):
     self.dbmanager = DatabaseManager('api', 'password')
Exemplo n.º 25
0
class databaseinterface:
    def __init__(self):
        self.dbmanager = DatabaseManager('api', 'password')

    def getChannels(self):
        """
        :return: List of all channels
        """
        return self.dbmanager.lookupChannels()

    def getUsers(self):
        """
        :return: List of all users
        """
        return self.dbmanager.lookupUsers()

    def newUser(self, user):
        """
        Adds a user to the database
        :param user: User object
        :return: None
        """
        self.dbmanager.createUser((user.id, user.alias, user.password))

    def changeUser(self, userid, user):
        """
        Updates the user in the database at user ID to be user
        Basically removes then re adds
        :param userid: Id of user being updated
        :param user: User object
        :return: None
        """
        self.dbmanager.updateUser(userid, user.name, user.password)

    def getUser(self, userid):
        """
        Returns the information to create a user object from what was stored in the database
        :param userid: Id of user being looked up
        :return: User object
        """
        (id, alias, password) = self.dbmanager.lookupUser(userid)
        return (alias, id, None, None)

    def getUserAlias(self, alias):
        """
        returns user with alias==alias, none if not found
        :param alias: Alias of user being looked up
        :return: User object
        """
        x = self.dbmanager.lookupUser(alias, "alias")
        if not x:
            return None
        (id, alias, password) = x
        return (alias, id, None, None)

    def newChannel(self, channel):
        """
        Adds a channel object to the database
        :param channel: Channel object
        :return:
        """
        self.dbmanager.createChannel(
            (channel.id, channel.name, channel.permisions))

    def getChannel(self, channel):
        """
        Retrieve information from the database and create a channel object
        :param channel:
        :return:
        """
        (id, name, permissions,
         blocked_users) = self.dbmanager.lookupChannel(channel)
        return (name, permissions, id, blocked_users)

    def change_default_permisions(self, channel, permissions):
        regex = re.match(r'([01]{3})', permissions)
        if regex:
            self.dbmanager.updateChannelPermissions(channel.id, permissions)
            # change permision in channel with given id

    def blockUser(self, channel, user):
        """
        Blocks a given user from a given channel
        :param channel: Channel object
        :param user: User object
        :return: None
        """
        self.dbmanager.appendBlockedChannel(channel.id, user.id)

    def UnblockUser(self, channel, user):
        """
        Unblocks a given user from a given channel
        :param channel: Channel object
        :param user: User object
        :return: None
        """
        self.dbmanager.removeBlockedUser(channel.id, user.id)
class DataExtractor:
    DOMAIN = 'dubai.dubizzle.com'
    PROJECT_ID = 13

    PATH = 'phones/'

    def __init__(self):
        print self.DOMAIN
        self.logger = Logger(name='dubizzle_data_log')
        self.err_logger = Logger(name='err_dubizzle_data_log')
        self.request_manager = RequestManager()
        self.source_code_manager = SourceCodeManager()
        self.generator = Generator()
        self.db = DatabaseManager()

    def extract_data(self, url_data):
        print url_data
        url_id = url_data['id']
        url = url_data['url']
        listing_id = url_data['listing_id']

        data = {}

        response = self.request_manager.take_get_request(url)
        source_code = response['source_code']
        parsed_code = self.source_code_manager.parse_code(source_code)

        expired = parsed_code.find('div', {'id': 'expired-ad-message'})
        if expired is not None:
            #self.db.remove_listing(listing_id)
            self.db.set_url_inactive(url_id)
            self.err_logger.error("EXPIRED " + str(url_data))

            return
        elif response['status_code'] == 404:
            #self.db.remove_listing(listing_id)
            self.err_logger.error("404 " + str(url_data))

            self.db.set_url_inactive(url_id)

            return

        bread = parsed_code.find('span', {'id': 'browse_in_breadcrumb'})
        items = bread.findAll('div')
        try:
            year = parsed_code.find('img', attrs={
                'alt': 'Year'
            }).parent.text.replace('Year', '').strip()

            kilometres = parsed_code.find('img', attrs={
                'alt': 'Kilometers'
            }).parent.text.replace('Kilometers',
                                   '').strip().replace(',',
                                                       '').replace('.', '')
            color = parsed_code.find('img', attrs={
                'alt': 'Color'
            }).parent.text.replace('Color', '').strip()
            specs = parsed_code.find('img', attrs={
                'alt': 'Specs'
            }).parent.text.replace('Specs', '').strip()
            trim = parsed_code.find('img', attrs={
                'alt': 'Trim'
            }).parent.parent.text.replace('Trim', '').strip()
            if trim == 'Other':
                self.db.set_url_processed(url_id)
                return
            price = parsed_code.find('span', {
                'id': 'actualprice'
            }).text.replace(',', '').replace('.', '')
            model = items[-1].find('a').text.strip()
            marka = items[-2].find('a').text.strip()
            phone = self.extract_phone(parsed_code, id=url_id)
        except Exception as exc:
            self.err_logger.error(str(exc) + str(url_data))
            self.db.set_url_processed(url_id)

            return

        data['year'] = int(year)
        data['price'] = int(price)
        data['kilometres'] = int(kilometres)
        data['color'] = color
        data['specs'] = specs
        data['trim'] = trim
        data['model'] = model
        data['make'] = marka
        data['phone'] = phone

        self.db.insert_data(data=data,
                            listing_id=listing_id,
                            url=url,
                            source=self.DOMAIN)
        self.db.set_url_processed(url_id)

    def update_data(self, url_data):
        timestamp = generate_timestamp()
        url_id = url_data['id']
        listing_id = url_data['listing_id']
        print listing_id
        url = url_data['url']
        first_timestamp = url_data['timestamp']
        time_dif = first_timestamp - datetime.strptime(timestamp,
                                                       "%Y.%m.%d:%H:%M:%S")
        time_dif = time_dif.days

        response = self.request_manager.take_get_request(url)
        source_code = response['source_code']
        parsed_code = self.source_code_manager.parse_code(source_code)
        expired = parsed_code.find('div', {'id': 'expired-ad-message'})
        if expired is not None:
            self.db.set_sold_status(listing_id=listing_id,
                                    days_for_selling=time_dif)
            #self.db.remove_listing(listing_id)
            self.db.set_url_inactive(url_id)
            print "updated"

            return

        elif response['status_code'] == 404:
            print 404, listing_id
            self.db.set_sold_status(listing_id=listing_id,
                                    days_for_selling=time_dif)
            #self.db.remove_listing(listing_id)
            self.db.set_url_inactive(url_id)
            print "updated"

            return

        try:
            price = parsed_code.find('span', {
                'id': 'actualprice'
            }).text.replace(',', '').replace('.', '')
        except:
            price = 0

        # days = self.__calc_days_on_market(listing_id)

        self.db.update_listing(listing_id=listing_id,
                               price=int(price),
                               days_on_market=time_dif)
        self.db.set_updated(listing_id=listing_id)
        print "updated"

    # def __calc_days_on_market(self, listing_id):
    #     days_on_market = self.db.get_car_data(listing_id).days_on_market
    #     if days_on_market is None:
    #         return 0
    #     days_on_market += 1
    #     return days_on_market

    def extract_phone(self, code, id):
        img = code.find('img', {'class': 'phone-num-img'})['src']

        ext = img.partition('data:image/')[2].split(';')[0]
        with open(self.PATH + str(id) + '.' + ext, 'wb') as f:
            f.write(ba.a2b_base64(img.partition('base64,')[2]))

        text = textract.process(self.PATH + str(id) + '.' + ext).replace(
            ' ', '')

        if '+971' in text:
            pass
        else:
            text = '+971' + text

        os.remove(self.PATH + str(id) + '.' + ext)
        return text.strip()
 def __init__(self):
     self.request_manager = RequestManager()
     self.source_code_manager = SourceCodeManager()
     self.generator = Generator()
     self.db = DatabaseManager()
class DataExtractor:

    DOMAIN = 'carswitch.com'
    PROJECT_ID = 15

    def __init__(self):
        self.request_manager = RequestManager()
        self.source_code_manager = SourceCodeManager()
        self.generator = Generator()
        self.db = DatabaseManager()

    def update_data(self, db_list_listings):
        list_cars = self.take_js_request()
        if len(list_cars) == 0:
            print 'Error Carswitch, len of list cars is 0'
            return

        list_expired_listing_id = []
        list_active_cars = []

        for listing in db_list_listings:
            checker = False
            for car in list_cars:
                active_car = car['inspectionID']
                if str(listing) == str(active_car):
                    list_active_cars.append({
                        'listing_id':active_car,
                        'price': car['salePrice']
                    })
                    checker = True
                    break

            if checker is False:
                list_expired_listing_id.append(listing)


        if len(list_expired_listing_id) > 0:
            for expired_id in list_expired_listing_id:
                self.db.set_sold_status(listing_id=expired_id, days_for_selling=0)
                print expired_id, 'Not Active'

        if len(list_active_cars) > 0:
            for active_car in list_active_cars:
                timestamp = self.get_info_about_car(active_car['listing_id']).timestamp
                days_on_market = self.calculate_days_on_market(first_timestamp=timestamp)
                self.db.update_listing(listing_id=active_car['listing_id'], price=active_car['price'], days_on_market=days_on_market)
                print active_car, 'Active'


    def take_js_request(self):
        data = '{"requests": [{"indexName": "All_Carswitch_Cars","params": "query=&' \
               'numericFilters=%2CinspectionStatus!%3D9%2Cpromoted!%3D1%2C(new!%3D1)&facetFilters=&page=&hitsPerPage=1200"}]}'

        url = 'http://ih3kc909gb-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20AngularJS%203.15.1&x-algolia-application-id=IH3KC909GB&x-algolia-api-key=0a4fcd3b57535f88c86172d5646d6787'

        try:
            response = urllib2.urlopen(
                url,
                data=data)
            data = json.loads(response.read())
            return data['results'][0]['hits']
        except Exception as e:
            print('Error: ' + str(e))
            return []

    def indicate_specs(self, specs_index):
        if specs_index == 0:
            return 'American'
        elif specs_index == 1:
            return 'GCC'
        elif specs_index == 2:
            return 'European'
        elif specs_index == 3:
            return 'Japanese'
        elif specs_index == 4:
            return 'Canadian'

    def extract_data(self, db_list_listings):
        list_cars = self.take_js_request()

        if len(list_cars) == 0:
            print 'Error Carswitch, len of list cars is 0'
            return

        print len(list_cars)

        for car in list_cars:
            data = {}

            listing_id = car['inspectionID']
            inspectionID = car['carID']

            if str(listing_id) in db_list_listings:
                print listing_id, 'Exist!'
                continue

            data['make'] = car['make']
            data['model'] = car['model']
            data['trim'] = car['displayTrim']
            data['year'] = car['year']
            data['kilometres'] = car['mileage']
            data['color'] = car['_highlightResult']['colorPaint']['value']
            data['specs'] = self.indicate_specs(car['GCCspecs'])
            data['price'] = car['salePrice']



            url = 'http://carswitch.com/uae/used-car/{0}/{1}/{2}/{3}-{4}'\
                .format(data['make'],data['model'],data['year'],listing_id,inspectionID)

            data['phone'] = ''

            print data

            self.db.insert_data(data=data, listing_id=listing_id, url=url, source=self.DOMAIN)


    def get_info_about_car(self, listing_id):
        car_obj = self.db.get_car_data(listing_id=listing_id)
        return car_obj


    def calculate_days_on_market(self, first_timestamp):
        timestamp = generate_timestamp()
        time_dif = first_timestamp - datetime.strptime(timestamp,
                                                       "%Y.%m.%d:%H:%M:%S")
        time_dif = time_dif.days

        return time_dif

    def main(self):
        list_listings = self.db.get_all_cars_listings(self.DOMAIN)
        self.extract_data(list_listings)
        self.update_data(list_listings)
Exemplo n.º 29
0
class DealQualityManager:
    def __init__(self):
        self.db = DatabaseManager()


    def group_cars(self, object_list):
        grouped_list = [[]]
        temp_object = object_list[0]
        index = 0
        for object in object_list:
            if object[1:5] == temp_object[1:5]:
                grouped_list[index].append(object)
            else:
                index += 1
                temp_object = object
                grouped_list.append([])
                grouped_list[index].append(object)

        return grouped_list

    def calculate_middle_price(self, car_list):
        summ = 0
        errors = 0
        for car in car_list:
            try:
                price = int(car[5])
                summ += price
            except:
                errors += 1
        try:
            middle_price = summ/(len(car_list)-errors)
        except ZeroDivisionError:
            middle_price = 0

        return middle_price

    def dealRating(self, list_cars):
        result_km = 0
        result_price = 0
        summ_km = 0
        summ_price = 0

        for car in list_cars:
            summ_km += car[5]
            summ_price += car[6]

        average_km = (summ_km / len(list_cars))
        average_price = (summ_price / len(list_cars))
        print 'Average km: ', average_km
        print 'Average price: ', average_price

        average_km_degree = average_km / 20
        average_price_degree = average_price / 20

        for car in list_cars:
            id = car[0]
            different_km = average_km - car[5]
            different_price = average_price - car[6]
            try:
                result_km = int(different_km / average_km_degree)
            except ZeroDivisionError:
                result_km = 0
            try:
                result_price = int(different_price / average_price_degree)
            except ZeroDivisionError:
                result_price = 0
            status = str(result_km + result_price)
            self.db.set_deal_quality(status=status, listing_id=id, price_difference=different_price)


    def calculate_deal_quality(self, car_list):
        middle_price = self.calculate_middle_price(car_list)
        for car in car_list:
            difference = 0
            id = car[0]
            price = int(car[5])
            difference = middle_price - price
            print car, middle_price, difference
            if difference > 3000:
                self.db.set_deal_quality(status="Excellent", listing_id=id, price_difference=difference)
            elif difference > 1500:
                self.db.set_deal_quality(status="Good", listing_id=id, price_difference=difference)
            elif difference < -1500:
                self.db.set_deal_quality(status="Poor", listing_id=id, price_difference=difference)
            else:
                self.db.set_deal_quality(status="Fair", listing_id=id, price_difference=difference)



    def main(self, car_list):
        car_list = self.group_cars(car_list)
        for grouped_cars in car_list:
            self.dealRating(list(grouped_cars))
Exemplo n.º 30
0
from sqlalchemy import Table
from sqlalchemy.orm import sessionmaker
from database.DatabaseManager import DatabaseManager

databaseManagerII = DatabaseManager('database/inverted-index.db',
                                    'database/inverted-index.sql')
BaseII = databaseManagerII.get_base()
metadataII = databaseManagerII.get_metadata()
engineII = databaseManagerII.get_engine()

databaseManagerD = DatabaseManager('database/documents.db',
                                   'database/documents.sql')
BaseD = databaseManagerD.get_base()
metadataD = databaseManagerD.get_metadata()
engineD = databaseManagerD.get_engine()


# Reflect each database table using metadata
class IndexWord(BaseII):
    __table__ = Table('IndexWord', metadataII, autoload=True)


class Posting(BaseII):
    __table__ = Table('Posting', metadataII, autoload=True)


class Document(BaseD):
    __table__ = Table('Document', metadataD, autoload=True)


# Create a sessionmaker function for each Database to use the tables