示例#1
0
class CleanFlights(object):
    """clean Flights"""

    mongodbaccess = None
    logger = None

    def __init__(self, mongo_db_access, level_log):
        self.logger = Logger(self.__class__.__name__, level_log).get()
        self.mongodbaccess = mongo_db_access
        self.logger.info("Inicio: %s", datetime.datetime.now())


    def clean(self):
        """ clean Process """
        self.logger.info("++INFO-- CLEAN FASE I")
        result = {"total":0}
        for vuelo in self.mongodbaccess.find("vuelos", {}):
            result = self.analize_each_flight(result, vuelo)
        return result

    def analize_each_flight(self, result, vuelo):
        """each flight analyze each rule"""
        apply(lambda rule: accumulate_dic(result, rule(vuelo)), self.create_all_rules())
        result["total"] += 1
        return result

    def create_all_rules(self):
        """ insert all rules created for run all"""
        return [self.rule_older_than_15days]

    def rule_older_than_15days(self, elemento):
        """First Rule: move all flights from vuelos to vuelosOld older than 15 days """
        date15 = datetime.datetime.now()-datetime.timedelta(days=15)
        deleted = 0
        inserted_old = 0
        if elemento.get("dateDirect", datetime.datetime) < date15:
            if self.mongodbaccess.insert("vuelosOld", elemento) is not None:
                inserted_old = 1
                self.logger.error("Error vuelo not insert backup but delete %s", elemento)
            self.mongodbaccess.delete_one("vuelos", {"_id":elemento.get("_id")})
            deleted = 1
        return {"deleted":deleted, "inserted_old":inserted_old}
示例#2
0
class MongoDBAccess(object):
    """Class to access to MongoDB allow access and review connections"""

    db_access = None
    _client = None

    def __init__(self, config, levelLog):
        """Need a file where has got all parameters and level of Loggin"""
        self.logger = Logger(self.__class__.__name__, levelLog).get()
        self.logger.setLevel('INFO')

        try:
            self.logger.debug(config.get("url", ""))
            self._client = MongoClient(config.get("url", ""))
            self.db_access = self._client[config.get("nameDB")]
            self.logger.info("-- INFO -- DATA BASE CONECT OK")
        except ConfigurationError:
            self.logger.error("ConfigurationErr")
        except ConnectionFailure:
            self.logger.error("ConnectionFailure")
        except OperationFailure:
            self.logger.error("Authentication failure")

    def status(self):
        """Determinate True is connect or False if is not connect"""
        if self._client is None:
            return False
        try:
            self.logger.debug(self._client.server_info())
            return True
        except ConnectionFailure:
            self.logger.error("ConnectionFailure")
            return False
        except OperationFailure:
            self.logger.error("Authentication failure")
            return False

    def find_one(self, collection, query, sort=None):
        """Find one element only return a json element"""
        if self.status():
            sort = None if sort is None else sort.items()
            self.logger.info("Access to collection: %s, query %s", collection,
                             query)
            return self.db_access[collection].find_one(query, sort=sort)
        else:
            self.logger.error("Database Not INIT Find_one")
            return None

    def find(self, collection, query, sort=None, limite=None):
        """Find several elements is a cursor, atention for line in cursor is better"""
        if self.status():
            self.logger.info("Access to collection Multi: %s, query: %s, sort: %s, limit: %s",\
                collection, query, sort, limite)
            limite = 0 if limite is None else limite
            sort = None if sort is None else sort.items()
            return self.db_access[collection].find(query,
                                                   sort=sort,
                                                   limit=limite)
        else:
            self.logger.error("Database Not INIT Find")
            return None

    def update_one(self, collection, query, change, is_set="set"):
        """Update One return status of update"""
        if self.status():
            self.logger.info("Modify collection: %s, query: %s, modify: %s, set: %s",\
                collection, query, change, is_set)
            setdollar = "$" + is_set
            return self.db_access[collection].update_one(
                query, {setdollar: change})
        else:
            self.logger.error("Database Not INIT Update_one")
            return None

    def update_many(self, collection, query, change, is_set="set"):
        """Update Many return status of update"""
        if self.status():
            self.logger.info("Modify Many collection: %s, query: %s, modify: %s, set: %s",\
                collection, query, change, is_set)
            setdollar = "$" + is_set
            return self.db_access[collection].update_many(
                query, {setdollar: change})
        else:
            self.logger.error("Database Not INIT Update_one")
            return None

    def insert(self, collection, element):
        """Insert return status of insert"""
        if self.status():
            self.logger.debug("Insert collection: %s, data: %s", collection,
                              element)
            #control duplicated
            try:
                return self.db_access[collection].insert(element)
            except DuplicateKeyError:
                return None
        else:
            self.logger.error("Database Not INIT Find")
            return None

    def delete_one(self, collection, element):
        """delete One return status of delete"""
        if self.status():
            self.logger.info("Remove collection: %s, data: %s", collection,
                             element)
            return self.db_access[collection].delete_one(element)
        else:
            self.logger.error("Database Not INIT Find")
            return None

    def delete_many(self, collection, element):
        """delete return status of delete"""
        if self.status():
            self.logger.info("Remove collection: %s, data: %s", collection,
                             element)
            return self.db_access[collection].delete_many(element)
        else:
            self.logger.error("Database Not INIT Find")
            return None

    def aggregate(self, collection, element):
        """delete return status of delete"""
        if self.status():
            self.logger.info("Aggregate collection: %s, data: %s", collection,
                             element)
            return self.db_access[collection].aggregate(element)
        else:
            self.logger.error("Database Not INIT Find")
            return None

    def drop(self, collection):
        """Drop a collection return status of drop"""
        if self.status():
            self.logger.info("Drop collection: %s", collection)
            return self.db_access[collection].drop()
        else:
            self.logger.error("Database Not INIT Find")
            return None
示例#3
0
class FindFlights(object):
    """find Flight"""

    seleniumaccess = None
    mongodbaccess = None
    logger = None
    holidays = None

    def __init__(self, config, mongo_db_access, level_log):
        self.logger = Logger(self.__class__.__name__, level_log).get()
        self.mongodbaccess = mongo_db_access
        self.seleniumaccess = SeleniumAccess(config, level_log)
        self.holidays = Holidays(level_log)
        self.logger.info("Inicio: %s", datetime.datetime.now())

    def get_flights(self, urls):
        """ doc to explain """
        self.logger.info("Process each url")
        result = {"save": 0, "warn": 0, "error": 0}

        self.seleniumaccess.open_selenium()
        driver = self.seleniumaccess.driver
        time.sleep(1)
        driver.get("http://www.google.com")
        time.sleep(1)

        for url in urls:
            accumulate_dic(result, self.url_to_flight(url, driver))

        self.seleniumaccess.close_selenium()
        return result

    def url_to_flight(self, url, driver):
        """process each url"""
        driver.get(url.get("url", "http://google.es"))
        try:
            precio_string = driver.find_element_by_class_name(
                "gws-flights-results__price").text
            #navigate
            #driver.find_element_by_class_name("gws-flights-results__more").click()
            #driver.find_element_by_xpath("//*[contains(text(), 'SELECT FLIGHT')]").click()
            if url.get("type", "") == "o":
                type_flight = driver\
                  .find_element_by_class_name("gws-flights-form__menu-label").text
            else:
                type_flight = driver\
                  .find_element_by_class_name("gws-flights-results__price-annotation").text

            url_insert = \
              {"dBusqueda":datetime.datetime.now(),  \
               "precio":float(precio_string[1:].replace(".", "").replace(", ", ".")), \
               "type": type_flight,\
               "horaS":driver.find_element_by_class_name("gws-flights-results__times").text,\
               "horaLl":"",\
               "company":driver.find_element_by_class_name("gws-flights-results__carriers").text,\
               "duracion":driver.find_element_by_class_name("gws-flights-results__duration").text, \
               "escalas":driver \
                .find_element_by_class_name("gws-flights-results__itinerary-stops").text, \
               "from":url.get("from", "XXX"), \
               "to":url.get("to", "XXX"), \
               "dateDirect":url.get("dateDirect", "XXX"), \
               "dateReturn":url.get("dateReturn", "YYY"), \
               "holidays": \
                 self.holidays.get_number_holidays(url.get("dateDirect", "XXX"), \
                                                   url.get("dateReturn", "YYY"))}
            self.logger.debug("Insert url elemento: %s", url_insert)
            self.mongodbaccess.insert("vuelos", url_insert)
            self.mongodbaccess.delete_one("urls", {"url": url.get("url", "")})
            print "from: {0}, to: {1}, dateDirect: {2}, dateReturn: {3}, price: {4}".format(\
                   url_insert["from"], url_insert["to"], \
                   url_insert["dateDirect"].strftime("%Y-%m-%d"), \
                   url_insert["dateReturn"].strftime("%Y-%m-%d"), \
                   url_insert["precio"])
        except StaleElementReferenceException as error_ref:
            print "****************************"
            print url
            print error_ref
            time.sleep(1)
            return {"save": 0, "warn": 0, "error": 1}
        except NoSuchElementException as error_no_such:
            print "****************************"
            print url
            print error_no_such
            time.sleep(1)
            return {"save": 0, "warn": 1, "error": 0}
        except TimeoutException as error_time_out:
            print "-- ERROR -- TimeOut *****************"
            print "****************************"
            print url
            print error_time_out
            return {"save": 0, "warn": 0, "error": 1}
        return {"save": 1, "warn": 0, "error": 0}
示例#4
0
class Vuelos(object):
    """find Flight"""

    level_log = None
    config = None
    mongodbaccess = None
    logger = None

    def __init__(self, file_config, level_log):
        self.level_log = level_log
        self.logger = Logger(self.__class__.__name__, level_log).get()
        try:
            self.config = json.loads(open(file_config, "r").read())
            self.mongodbaccess = MongoDBAccess(self.config, level_log)
        except IOError:
            self.logger.error("File Error: %s", file_config)
            self.config = {}
            self.mongodbaccess = MongoDBAccess({}, level_log)
        self.logger.info("Inicio: %s", datetime.datetime.now())

    def ejecutar(self, nivel):
        """ run load process """
        print "++ INFO ++ MODULO PRINCIPAL MODO DE EJECUCION: {0}".format(
            nivel)
        if nivel == "1":
            print "-- INFO -- MODO 1 duro ejecuta y limpia los datos del dia"
            #proceso duro vaciamos informacion y empezamos
            print "++ INFO ++ Vaciamos informacion del dia"
            print "-- INFO -- dia: {0}".format(today())
            borrados = self.vaciar_dia()
            print "-- INFO -- vaciamos informacion -- Vuelos borrados del dia: {0}"\
                  .format(borrados.deleted_count)
            urls = BuildUrls(self.mongodbaccess, self.level_log).build_urls()
            print "-- INFO -- construir urls -- numero de URLS: {0}".format(
                urls)
        else:
            print "-- INFO -- MODO 0 suave solo si hay datos que ejecutar"
            #proceso soft miramos si hay algo que procesar
            #si no hay nada que procesar o el dia no se ha ejecutado.
            if self.return_urls().count() == 0:
                #no hay nada que ejecutar
                if self.find_last_day() < today():
                    # ultimo dia es anterior a hoy a las 12... no se ha procesado
                    print "++ WARN ++  1.1 PRIMERA VEZ DEL DIA creamos las URLS y seguimos"
                    urls = BuildUrls(self.mongodbaccess,
                                     self.level_log).build_urls()
                    print "-- INFO -- construir urls -- numero de URLS: {0}".format(
                        urls)
                else:
                    # ultimo dia posterior hoy a las 12... esta todo Ok
                    print "++ WARN ++  1.2 SE HA PROCESADO TODO Y NO HAY NADA QUE HACER"
            else:
                if self.find_last_day() < today():
                    # prblemas en el paraiso ayer la cosa no fue bien. Reiniciamos y procesamos
                    print "** ERROR **  2.1 AYER NO SE EJECUTARON TODOS LOS VUELOS"
                    print "** ERROR **  vuelos pendientes {0}".format(
                        self.return_urls().count())
                    self.logger.error("AYER no se ejecutaron todos los vuelos")
                    urls = BuildUrls(self.mongodbaccess,
                                     self.level_log).build_urls()
                    print "-- INFO -- construir urls -- numero de URLS: {0}".format(
                        urls)

                else:
                    #hay cosas que ejecutar
                    print "++ WARN ++  2.2 HA HABIDO UNA CANCELACION y el "\
                          +"SISTEMA SIGUE DESDE ESE PUNTO"
                    print "++ WARN ++  vuelos pendientes {0}".format(
                        self.return_urls().count())
                    self.logger.error(
                        "Ha habido una cancelacion y se sigue desde ese punto")
        result = FindFlights(self.config, self.mongodbaccess, self.level_log)\
                   .get_flights(self.return_urls())
        print "++ INFO -- TOTAL PROCESO, Save: {0}".format(
            result.get("save", 0))
        print "++ INFO -- TOTAL PROCESO, errores sin Informacion: {0}".format(
            result.get("warn", 0))
        print "++ INFO -- TOTAL PROCESO, errores NO ENCONTRADO: {0}".format(
            result.get("error", 0))

    def vaciar_dia(self):
        """ delete all info of day """
        return self.mongodbaccess.delete_many("vuelos",
                                              {"dBusqueda": {
                                                  "$gt": today()
                                              }})

    def return_urls(self):
        """ doc to explain """
        return self.mongodbaccess.find("urls", {})

    def find_last_day(self):
        """ doc to explain """
        print "++ INFO ++ find_last_day"
        if self.mongodbaccess.find_one("vuelos", {}, sort={"dBusqueda": -1
                                                           }) is None:
            return datetime.datetime(2000, 01, 01)
        else:
            return self.mongodbaccess.find_one("vuelos", {}, sort={"dBusqueda":-1})\
                                     .get("dBusqueda", "")