def getPrices(arrival, departure, date): data = Scraper.Scrap(logger, arrival, departure, date) # en caso de que no haya sido posible acceder a los vuelos se devuelve un 1, escribir CSV vacio y terminar if isinstance(data, int): if (data == 1): dic = { "A.Salida": [], "A.Llegada": [], "Duracion": [], "Paradas": [], "Vuelos": [], "Vuelos alternativos": [], "precio": [], "Escala": [], "hSalida_v1": [], "hLlegada_v1": [], "hSalida_v2": [], "hLlegada_v2": [], "Enlaces": [] } pd.DataFrame(dic).to_csv(path_prices + "/" + arrival + "_" + departure + "_" + date + ".csv.noconnex", sep=";") return begining = t.time() ####################################### # Eliminar vuelo/tren de la cabecera si lo hay.. if len(data) > 0: data = data[(data["hSalida_v1"] != "null") | (data["hSalida_v2"] != "null") | (data["hLlegada_v1"] != "null") | (data["hSalida_v1"] != "null")] data = data.reset_index(drop=True) logger.info("Making flghts available...") ############################################ # Cargar diccionario dic = {} j = json.load(open(path_airlines)) airlines = j.get("airlines") for i in range(0, len(airlines)): dic[str(airlines[i].get("name"))] = str(airlines[i].get("iata")) ############################################# # Canviar nombre de la aerolinea por codigo IATA for i in range(0, data.shape[0]): for j in range(0, len(data["Vuelos"][i])): if len(data["Vuelos"][i]) != 0: try: words = re.findall("[A-Za-zÀ-ÿ]{1,}", data["Vuelos"][i][j]) numbers = re.findall("[0-9]{1,}", data["Vuelos"][i][j])[0] name = "" for word in words: name = name + " " + word expression = dic.get(name.strip()) + " " + numbers data["Vuelos"][i][j] = str(expression) except TypeError: expression = "XXX" + " " + numbers data["Vuelos"][i][j] = str(expression) for j in range(0, len(data["Vuelos alternativos"][i])): if len(data["Vuelos alternativos"][i]) != 0: data["Vuelos alternativos"][i][j] = data[ "Vuelos alternativos"][i][j].strip() #logger.info("flights with more than a scale dropped, dictionary used successfully") # print("flights with more than a scale dropped, dictionary used successfully") ################################################## # Separar los vuelos directos con un codeshare, para ellos no aplica el algoritmo de combinatoria direct_guys = [ i for i in range(0, len(data)) if (len(data["Vuelos"][i]) == 1) & (len(data["Vuelos alternativos"][i]) == 1) ] data_directs = data.loc[direct_guys][:] data = data.drop(axis=0, index=direct_guys) data = data.reset_index(drop=True) data["Vuelos"] = data["Vuelos"] + data["Vuelos alternativos"] ################################################### # Algoritmo de combinatoria, se crean todos los pares de vuelos posibles en los vuelos con codeshare cols = [ "Vuelos", "A.Llegada", "A.Salida", "Duracion", "Enlaces", "Escala", "hSalida_v1", "hLlegada_v1", "hSalida_v2", "hLlegada_v2", "Paradas", "Vuelos alternativos", "precio" ] for k in range(0, data.shape[0]): dic = {} for col in cols: dic[col] = [] if len(data["Vuelos alternativos"][k]) > 0: items = itertools.permutations(data["Vuelos"][k], 2) breaker = True while breaker: try: for col in cols: if col == "Vuelos": dic[col].append(list(items.__next__())) else: dic[col].append(data[col][k]) except StopIteration: breaker = False else: for col in cols: dic[col].append(data[col][k]) if k == 0: df = pd.DataFrame(dic) else: temp = pd.DataFrame(dic) frame = [df, temp] df = pd.concat(frame) # print("combinatorics of codeshares worked fine") ######################################################### # si el dataset de xxxxx no esta vacio, el dataset resultado (df) se habra construido, en caso de que no sea asi, escribir dataset vacio y terminar if len(data) > 0: dfclean = df.reset_index().drop(axis=1, columns=["index"]) dfclean = pd.concat([data_directs, dfclean], sort=False) dfclean = dfclean.reset_index(drop=True) ######################################################## # Separacion por columnas de los pares de vuelos v2 = [] for i in range(0, dfclean.shape[0]): if len(dfclean["Vuelos"][i]) > 1: v2.append(dfclean["Vuelos"][i][1]) dfclean["Vuelos"][i].remove(dfclean["Vuelos"][i][1]) dfclean["Vuelos"][i] = dfclean["Vuelos"][i][0] else: v2.append("null") dfclean["Vuelos"][i] = dfclean["Vuelos"][i][0] dfclean["Vuelos1"] = v2 ########################################################## dic = {} cols = ["Code_v1", "flightnumber_v1", "Code_v2", "flightnumber_v2"] for col in cols: dic[col] = [] ############################################################ for i in range(0, dfclean.shape[0]): e0 = dfclean["Vuelos"][i].split(" ") e1 = dfclean["Vuelos1"][i].split(" ") dic["Code_v1"].append(e0[0]) dic["flightnumber_v1"].append(e0[1]) if dfclean["Vuelos1"][i] != "null": dic["Code_v2"].append(e1[0]) dic["flightnumber_v2"].append(e1[1]) else: dic["Code_v2"].append("null") dic["flightnumber_v2"].append("null") partition = pd.DataFrame(dic) ######################################################### # Escribir el CSV dfclean = pd.merge(right=partition, left=dfclean, right_index=True, left_index=True) dfclean = dfclean.drop(axis=1, columns=["Vuelos", "Vuelos1"]) dfclean.to_csv(path_prices + "/" + arrival + "_" + departure + "_" + date + ".csv", sep=";") timing = t.time() - begining logger.info( "CSV written successfully, transformations took {} seconds".format( timing)) else: data.to_csv(path_prices + "/" + arrival + "_" + departure + "_" + date + ".csv.noflights", sep=";") timing = t.time() - begining logger.info( "CSV written successfully but with no flights in it and it took {} seconds" .format(timing))