예제 #1
0
def zip_dir(months, rm=False):
    current = datetime.datetime.now()
    month = current.month
    year = current.year
    month -= months

    if month < 1:
        month = 12 + month
        year -= 1

    zip_file = str(year) + '-' + str(month) + '.zip'
    zipper = zp(zip_file, 'w', ZIP_DEFLATED)
    rm_list = list()

    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith('.csv'):
                dir_list = root.replace(path, '').split('\\')

                if int(dir_list[2]) == year and int(dir_list[3]) == month:
                    file_path = root + '/' + file
                    zipper.write(file_path)
                    rm_list.append((file_path, root))

    zipper.close()

    if rm:
        for file_path, root in rm_list:
            os.remove(file_path)

        rm_empty_dir(path)
예제 #2
0
 def ExtractSpecificFile(self, name, zip):
     """name is the name of the file you want to extract from the zip\n
         zip is the archive file you want to extarct from
     """
     with zp(zip) as myzip:
         with myzip.open(name) as myfile:
             with open(name, "a") as savefile:
                 savefile.write(myfile.read())
예제 #3
0
    def CreateZipFile(self, name=None, path=None):
        """name name of the zip file you want to create\n
            path is the path of the directory you want ton archived
        """
        if name is None:
            name = self.RandomName()
        if path is None:
            path = os.getcwd()
        assert os.path.isdir(path), "You need to pass in a directory"

        dirfile = os.listdir(path)
        with zp(name, "a") as zippyfile:
            for file in dirfile:
                zippyfile.write(file)
예제 #4
0
 def ExtractAllFileInADir(self, path, pathtoextractto=None):
     if pathtoextractto == None:
         pathtoextractto = os.getcwd()
     with zp(path) as zippyextract:
         zippyextract.extractall(path=path)
예제 #5
0
def airtraffic(year, state, min_pax):
    wrkdir = os.getcwd()
    download_lookups(wrkdir)
    headers = {
        "Content-Type":
        "application/x-www-form-urlencoded",
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
        "Accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    }
    # f-string for 'year' selected above
    raw_data_dom = f"UserTableName=T_100_Domestic_Market__U.S._Carriers&DBShortName=Air_Carriers&RawDataTable=T_T100D_MARKET_US_CARRIER_ONLY&sqlstr=+SELECT+PASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN_AIRPORT_ID%2CORIGIN_AIRPORT_SEQ_ID%2CORIGIN_CITY_MARKET_ID%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_WAC%2CDEST_AIRPORT_ID%2CDEST_AIRPORT_SEQ_ID%2CDEST_CITY_MARKET_ID%2CDEST%2CDEST_CITY_NAME%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_WAC%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS+FROM++T_T100D_MARKET_US_CARRIER_ONLY+WHERE+YEAR%3D{year}&varlist=PASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN_AIRPORT_ID%2CORIGIN_AIRPORT_SEQ_ID%2CORIGIN_CITY_MARKET_ID%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_WAC%2CDEST_AIRPORT_ID%2CDEST_AIRPORT_SEQ_ID%2CDEST_CITY_MARKET_ID%2CDEST%2CDEST_CITY_NAME%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_WAC%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS&grouplist=&suml=&sumRegion=&filter1=title%3D&filter2=title%3D&geo=All%A0&time=All%A0Months&timename=Month&GEOGRAPHY=All&XYEAR={year}&FREQUENCY=All&AllVars=All&VarName=PASSENGERS&VarDesc=Passengers&VarType=Num&VarName=FREIGHT&VarDesc=Freight&VarType=Num&VarName=MAIL&VarDesc=Mail&VarType=Num&VarName=DISTANCE&VarDesc=Distance&VarType=Num&VarName=UNIQUE_CARRIER&VarDesc=UniqueCarrier&VarType=Char&VarName=AIRLINE_ID&VarDesc=AirlineID&VarType=Num&VarName=UNIQUE_CARRIER_NAME&VarDesc=UniqueCarrierName&VarType=Char&VarName=UNIQUE_CARRIER_ENTITY&VarDesc=UniqCarrierEntity&VarType=Char&VarName=REGION&VarDesc=CarrierRegion&VarType=Char&VarName=CARRIER&VarDesc=Carrier&VarType=Char&VarName=CARRIER_NAME&VarDesc=CarrierName&VarType=Char&VarName=CARRIER_GROUP&VarDesc=CarrierGroup&VarType=Num&VarName=CARRIER_GROUP_NEW&VarDesc=CarrierGroupNew&VarType=Num&VarName=ORIGIN_AIRPORT_ID&VarDesc=OriginAirportID&VarType=Num&VarName=ORIGIN_AIRPORT_SEQ_ID&VarDesc=OriginAirportSeqID&VarType=Num&VarName=ORIGIN_CITY_MARKET_ID&VarDesc=OriginCityMarketID&VarType=Num&VarName=ORIGIN&VarDesc=Origin&VarType=Char&VarName=ORIGIN_CITY_NAME&VarDesc=OriginCityName&VarType=Char&VarName=ORIGIN_STATE_ABR&VarDesc=OriginState&VarType=Char&VarName=ORIGIN_STATE_FIPS&VarDesc=OriginStateFips&VarType=Char&VarName=ORIGIN_STATE_NM&VarDesc=OriginStateName&VarType=Char&VarName=ORIGIN_WAC&VarDesc=OriginWac&VarType=Num&VarName=DEST_AIRPORT_ID&VarDesc=DestAirportID&VarType=Num&VarName=DEST_AIRPORT_SEQ_ID&VarDesc=DestAirportSeqID&VarType=Num&VarName=DEST_CITY_MARKET_ID&VarDesc=DestCityMarketID&VarType=Num&VarName=DEST&VarDesc=Dest&VarType=Char&VarName=DEST_CITY_NAME&VarDesc=DestCityName&VarType=Char&VarName=DEST_STATE_ABR&VarDesc=DestState&VarType=Char&VarName=DEST_STATE_FIPS&VarDesc=DestStateFips&VarType=Char&VarName=DEST_STATE_NM&VarDesc=DestStateName&VarType=Char&VarName=DEST_WAC&VarDesc=DestWac&VarType=Num&VarName=YEAR&VarDesc=Year&VarType=Num&VarName=QUARTER&VarDesc=Quarter&VarType=Num&VarName=MONTH&VarDesc=Month&VarType=Num&VarName=DISTANCE_GROUP&VarDesc=DistanceGroup&VarType=Num&VarName=CLASS&VarDesc=Class&VarType=Char"
    # f-string for 'year' selected above
    raw_data_inter = f"UserTableName=T_100_International_Market__All_Carriers&DBShortName=Air_Carriers&RawDataTable=T_T100I_MARKET_ALL_CARRIER&sqlstr=+SELECT+PASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN_AIRPORT_ID%2CORIGIN_AIRPORT_SEQ_ID%2CORIGIN_CITY_MARKET_ID%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST_AIRPORT_ID%2CDEST_AIRPORT_SEQ_ID%2CDEST_CITY_MARKET_ID%2CDEST%2CDEST_CITY_NAME%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS+FROM++T_T100I_MARKET_ALL_CARRIER+WHERE+YEAR%3D{year}&varlist=PASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN_AIRPORT_ID%2CORIGIN_AIRPORT_SEQ_ID%2CORIGIN_CITY_MARKET_ID%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST_AIRPORT_ID%2CDEST_AIRPORT_SEQ_ID%2CDEST_CITY_MARKET_ID%2CDEST%2CDEST_CITY_NAME%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS&grouplist=&suml=&sumRegion=&filter1=title%3D&filter2=title%3D&geo=All%A0&time=All%A0Months&timename=Month&GEOGRAPHY=All&XYEAR={year}&FREQUENCY=All&AllVars=All&VarName=PASSENGERS&VarDesc=Passengers&VarType=Num&VarName=FREIGHT&VarDesc=Freight&VarType=Num&VarName=MAIL&VarDesc=Mail&VarType=Num&VarName=DISTANCE&VarDesc=Distance&VarType=Num&VarName=UNIQUE_CARRIER&VarDesc=UniqueCarrier&VarType=Char&VarName=AIRLINE_ID&VarDesc=AirlineID&VarType=Num&VarName=UNIQUE_CARRIER_NAME&VarDesc=UniqueCarrierName&VarType=Char&VarName=UNIQUE_CARRIER_ENTITY&VarDesc=UniqCarrierEntity&VarType=Char&VarName=REGION&VarDesc=CarrierRegion&VarType=Char&VarName=CARRIER&VarDesc=Carrier&VarType=Char&VarName=CARRIER_NAME&VarDesc=CarrierName&VarType=Char&VarName=CARRIER_GROUP&VarDesc=CarrierGroup&VarType=Num&VarName=CARRIER_GROUP_NEW&VarDesc=CarrierGroupNew&VarType=Num&VarName=ORIGIN_AIRPORT_ID&VarDesc=OriginAirportID&VarType=Num&VarName=ORIGIN_AIRPORT_SEQ_ID&VarDesc=OriginAirportSeqID&VarType=Num&VarName=ORIGIN_CITY_MARKET_ID&VarDesc=OriginCityMarketID&VarType=Num&VarName=ORIGIN&VarDesc=Origin&VarType=Char&VarName=ORIGIN_CITY_NAME&VarDesc=OriginCityName&VarType=Char&VarName=ORIGIN_COUNTRY&VarDesc=OriginCountry&VarType=Char&VarName=ORIGIN_COUNTRY_NAME&VarDesc=OriginCountryName&VarType=Char&VarName=ORIGIN_WAC&VarDesc=OriginWac&VarType=Num&VarName=DEST_AIRPORT_ID&VarDesc=DestAirportID&VarType=Num&VarName=DEST_AIRPORT_SEQ_ID&VarDesc=DestAirportSeqID&VarType=Num&VarName=DEST_CITY_MARKET_ID&VarDesc=DestCityMarketID&VarType=Num&VarName=DEST&VarDesc=Dest&VarType=Char&VarName=DEST_CITY_NAME&VarDesc=DestCityName&VarType=Char&VarName=DEST_COUNTRY&VarDesc=DestCountry&VarType=Char&VarName=DEST_COUNTRY_NAME&VarDesc=DestCountryName&VarType=Char&VarName=DEST_WAC&VarDesc=DestWac&VarType=Num&VarName=YEAR&VarDesc=Year&VarType=Num&VarName=QUARTER&VarDesc=Quarter&VarType=Num&VarName=MONTH&VarDesc=Month&VarType=Num&VarName=DISTANCE_GROUP&VarDesc=DistanceGroup&VarType=Num&VarName=CLASS&VarDesc=Class&VarType=Char"
    # DOMESTIC Get file name of zip file to be downloaded
    url_dom = "https://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=258&Has_Group=3&Is_Zipped=0"
    response = requests.post(url_dom, headers=headers, data=raw_data_dom)
    url_dom = response.url
    # International Get file name of zip file to be downloaded
    url_inter = "https://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=260&Has_Group=3&Is_Zipped=0"
    response = requests.post(url_inter, headers=headers, data=raw_data_inter)
    url_inter = response.url

    # Download and unzip the zip file

    # Files should look like these, but with different starting number string
    # url_dom = "https://transtats.bts.gov/ftproot/TranStatsData/982825318_T_T100D_MARKET_US_CARRIER_ONLY.zip"
    # url_inter = "https://transtats.bts.gov/ftproot/TranStatsData/982825318_T_T100I_MARKET_ALL_CARRIER.zip"

    urls = [url_dom, url_inter]

    files_to_unzip = []
    for url in urls:

        zip_file = url.split("/")[-1].split(".")[0]
        files_to_unzip.append(zip_file)

        downloaded = False
        tries = 0
        while not downloaded:
            try:
                remote = urllib.request.urlopen(url)
                data = remote.read()
                remote.close()
                downloaded = True
            except:
                print(
                    "WARNING: failed to download air traffic data. Retrying..."
                )
                tries += 1
                time.sleep(60)
            if tries == 2:
                sys.exit(
                    "ERROR: failed to download air traffic data from BTS.gov. Please try again later."
                )

        local = open(zip_file, "wb")
        local.write(data)
        local.close()

    # Unzip the downloaded aviation data
    for file_to_unzip in files_to_unzip:

        # specifying the zip file name
        file_name = file_to_unzip

        # opening the zip file in READ mode
        # with ZipFile(file_name, 'r') as zip:
        with zp(file_name, "r") as zip_:
            # printing all the contents of the zip file
            zip_.printdir()

            # extracting all the files
            print("Extracting all the files now...")
            zip_.extractall()
            print("Done!")

    # Domestic Only: Take off and land in US

    dom_fn = urls[0].split("/")[-1].split(".")[0] + ".csv"
    dom_dir = f"{wrkdir}/{dom_fn}"
    df_domestic = pd.read_csv(
        dom_dir,
        sep=",",
        converters={"PASSENGERS": lambda x: int(float(x))},
        engine="python",
    )

    # Add country name (headers from International data)
    df_domestic["ORIGIN_COUNTRY_NAME"] = "United States"
    df_domestic["ORIGIN_COUNTRY"] = "US"
    df_domestic["DEST_COUNTRY_NAME"] = "United States"
    df_domestic["DEST_COUNTRY"] = "US"

    # delete phantom column:
    df_domestic = df_domestic[df_domestic.columns.drop(
        list(df_domestic.filter(regex="Unnamed")))]

    # add marker for domestic
    df_domestic["dataset"] = "domestic"

    dft = df_domestic.copy()

    # lower case all columns
    col_up = dft.columns

    col_low = [x.lower() for x in col_up]
    dft.columns = [x.lower() for x in dft.columns]
    dft = dft[col_low]

    # Delete zero pax
    dft = dft[dft["passengers"] != 0]

    # Delete rows with < min_pax
    dft = dft[dft["passengers"] >= min_pax]

    # Sort by month
    dft = dft.sort_values("month").reset_index(drop=True)

    # delete Saipan
    dft = dft[dft["origin_state_abr"] != "TT"]
    dft["timestamp"] = dft.month.apply(lambda x: timestamp(x, year))
    # Split to just city name
    dft["origin_city"] = dft.origin_city_name.apply(lambda x: x.split(",")[0])
    dft["dest_city"] = dft.dest_city_name.apply(lambda x: x.split(",")[0])
    # Rename columns for schema:
    dft = dft.rename(
        columns={
            "origin": "origin_airport_code",
            "dest": "dest_airport_code",
            "origin_country_name": "origin_admin0",
            "dest_country_name": "dest_admin0",
            "origin_country": "origin_iso2",
            "dest_country": "dest_iso2",
        })
    df = dft.copy()

    # Add admin1 "Ohio"
    conv_state_fn = f"{wrkdir}/abv_to_state.txt"

    df_conv_state = pd.read_csv(conv_state_fn, sep="\t")
    state_conv_d = df_conv_state.set_index("Code").to_dict()
    state_conv_d = state_conv_d["Description"]

    df["origin_admin1"] = df.origin_state_abr.apply(
        lambda x: state_conv_d.get(x, "None"))
    df["dest_admin1"] = df.dest_state_abr.apply(
        lambda x: state_conv_d.get(x, "None"))

    # ADD COUNTY NAME

    # Read in Airport Facilities Directory data to get county name
    afd_fn = f"{wrkdir}/airportFD.txt"
    df_afd = pd.read_csv(afd_fn, sep="\t")

    # Build county NAME dictionary
    df_afd_county = pd.DataFrame(df_afd, columns=["LocationID", "County"])
    county_dict = df_afd_county.set_index("LocationID").to_dict()
    county_d = county_dict["County"]

    df["origin_admin2"] = df.origin_airport_code.apply(
        lambda x: county_d.get(x, np.NaN))
    df["dest_admin2"] = df.dest_airport_code.apply(
        lambda x: county_d.get(x, np.NaN))

    # Replace #NAME? with NaN (for Puerto Rico)
    df.replace("#NAME?", "None", inplace=True)

    # ADD FIPS CODE

    # Add county FIPS from county NAME
    fips_fn = f"{wrkdir}/county_to_fips.csv"
    df_fips = pd.read_csv(
        fips_fn,
        sep=",",
        converters={"FIPS County Code": lambda x: str(x)},
        engine="python",
    )

    # Lookup dict
    fips_d = (df_fips.groupby("State").apply(
        lambda x: [dict(zip(x["County Name"], x["FIPS County Code"]))]).
              to_dict())

    df["munger_origin"] = df[["origin_state_abr",
                              "origin_admin2"]].values.tolist()
    df["munger_dest"] = df[["dest_state_abr", "dest_admin2"]].values.tolist()

    df["origin_fips"] = df.munger_origin.apply(lambda x: fips_it(x, fips_d))
    df["dest_fips"] = df.munger_dest.apply(lambda x: fips_it(x, fips_d))

    # Format proper capitalization for County NAMEs
    df["origin_admin2"] = df["origin_admin2"].apply(lambda x: uncap_it(x))
    df["dest_admin2"] = df["dest_admin2"].apply(lambda x: uncap_it(x))

    # Get the columns we need
    df_domestic_final = df.copy()

    keepers = [
        "timestamp",
        "origin_airport_code",
        "origin_city",
        "origin_state_abr",
        "origin_admin2",
        "origin_fips",
        "origin_admin1",
        "origin_admin0",
        "origin_iso2",
        "dest_airport_code",
        "dest_city",
        "dest_state_abr",
        "dest_admin2",
        "dest_fips",
        "dest_admin1",
        "dest_admin0",
        "dest_iso2",
        "distance",
        "passengers",
        "month",
    ]
    df_domestic_final = df[keepers]

    # International: One (and only one) non-US location
    # for fn in os.listdir(wrkdir):
    #    if "T100I_MARKET_ALL_CARRIER.csv" in fn:
    #        file = fn

    inter_fn = urls[1].split("/")[-1].split(".")[0] + ".csv"
    inter_dir = f"{wrkdir}/{inter_fn}"
    df_international = pd.read_csv(
        inter_dir,
        sep=",",
        converters={"PASSENGERS": lambda x: int(float(x))},
        engine="python",
    )

    df = df_international.copy()

    # add marker for international flights
    df["dataset"] = "inter"

    # delete phantom column:
    df = df[df.columns.drop(list(df.filter(regex="Unnamed")))]

    # lower case all columns
    col_up = df.columns
    col_low = [x.lower() for x in col_up]
    df.columns = [x.lower() for x in df.columns]
    df = df[col_low]

    # Delete zero pax
    df = df[df["passengers"] != 0]

    # Delete rows with < min_pax
    df = df[df["passengers"] >= min_pax]

    # Sort by month
    df = df.sort_values("month").reset_index(drop=True)

    df["timestamp"] = df.month.apply(lambda x: timestamp(x, year))

    # Split to just city name
    df["origin_city"] = df.origin_city_name.apply(lambda x: x.split(",")[0])
    df["dest_city"] = df.dest_city_name.apply(lambda x: x.split(",")[0])

    # Rename columns for schema:
    df = df.rename(
        columns={
            "origin": "origin_airport_code",
            "dest": "dest_airport_code",
            "origin_country_name": "origin_admin0",
            "dest_country_name": "dest_admin0",
            "origin_country": "origin_iso2",
            "dest_country": "dest_iso2",
        })

    df["origin_state_abr"] = df.origin_city_name.apply(lambda x: abrv_it(x))
    df["dest_state_abr"] = df.dest_city_name.apply(lambda x: abrv_it(x))

    # Add admin1 (state/province)
    conv_fn = f"{wrkdir}/worldcities.csv"
    df_conv = pd.read_csv(conv_fn, sep=",")
    df_conv["munger"] = df_conv[["iso2", "city_ascii",
                                 "admin_name"]].values.tolist()

    conv_d = (df_conv.groupby("country").apply(
        lambda x: [dict(zip(x.city_ascii, x.admin_name))]).to_dict())

    df["origin_admin1_temp"] = df.origin_state_abr.apply(
        lambda x: state_conv_d.get(x, "None"))
    df["dest_admin1_temp"] = df.dest_state_abr.apply(
        lambda x: state_conv_d.get(x, "None"))

    df["origin_munger"] = df[[
        "origin_admin0", "origin_city", "origin_admin1_temp"
    ]].values.tolist()
    df["dest_munger"] = df[["dest_admin0", "dest_city",
                            "dest_admin1_temp"]].values.tolist()

    df["origin_admin1"] = df.origin_munger.apply(
        lambda x: admin1_origin(x, conv_d))
    df["dest_admin1"] = df.dest_munger.apply(
        lambda x: admin1_origin(x, conv_d))

    df["admin2org"] = df[["origin_airport_code",
                          "origin_admin0"]].values.tolist()
    df["admin2dest"] = df[["dest_airport_code", "dest_admin0"]].values.tolist()

    df["origin_admin2"] = df.admin2org.apply(lambda x: admin2_it(x, county_d))
    df["dest_admin2"] = df.admin2dest.apply(lambda x: admin2_it(x, county_d))

    # Get fips for US airports

    df["munger_origin_fips"] = df[["origin_state_abr",
                                   "origin_admin2"]].values.tolist()
    df["munger_dest_fips"] = df[["dest_state_abr",
                                 "dest_admin2"]].values.tolist()

    df["origin_fips"] = df.munger_origin_fips.apply(
        lambda x: fips_it(x, fips_d))
    df["dest_fips"] = df.munger_dest_fips.apply(lambda x: fips_it(x, fips_d))

    # Format proper capitalization for County NAMEs
    df["origin_admin2"] = df["origin_admin2"].apply(lambda x: uncap_it(x))
    df["dest_admin2"] = df["dest_admin2"].apply(lambda x: uncap_it(x))

    # Get the columns we need
    df_inter_final = df.copy()

    keepers = [
        "timestamp",
        "origin_airport_code",
        "origin_city",
        "origin_fips",
        "origin_admin1",
        "origin_admin0",
        "dest_airport_code",
        "dest_city",
        "dest_fips",
        "dest_admin1",
        "dest_admin0",
        "distance",
        "passengers",
        "month",
    ]
    df_inter_final = df_inter_final[keepers]

    # Combine dataframes
    df_final = pd.concat([df_domestic_final, df_inter_final],
                         ignore_index=True)

    df_final["distance"] = df_final.distance.apply(lambda x: int(x))

    cols = list(df_final.columns)
    cols.remove("passengers")
    grpr = ['origin_airport_code', 'dest_airport_code', 'timestamp']
    df_agg_ = df_final.groupby(grpr, as_index=False)[["passengers"]].sum()
    df_final_dedupe = df_final[cols].drop_duplicates()
    df_agg = pd.merge(
        df_agg_,
        df_final_dedupe,
        how='left',
        left_on=['origin_airport_code', 'dest_airport_code', 'timestamp'],
        right_on=['origin_airport_code', 'dest_airport_code', 'timestamp'])

    # Final cleanup
    df_agg.replace("#name?", np.nan, inplace=True)
    df_agg.replace("None", np.nan, inplace=True)

    df_agg = df_agg.sort_values("month").reset_index(drop=True)

    if state != '-':
        df_agg = df_agg[df_agg['dest_admin1'] == state_conv_d[state]]

    df_agg = df_agg[keepers]
    del df_agg['month']

    df_agg = df_agg.rename(
        columns={
            "timestamp": "ts",
            "origin_airport_code": "origin_code",
            "dest_airport_code": "dest_code",
        })

    cleanup(wrkdir, files_to_unzip)
    return df_agg
예제 #6
0
from zipfile import as ZipFile as zp

file_path = "" #Define the path of the zipfile over here

with zp(file_path, "r") as zip_:
	zip_.printdir()		#to print all the content
	zip_.extractall()	#to extract the zip file


예제 #7
0
def extract(path_to_zip, pswd):
    with zp(path_to_zip, "r") as zipObj:
        zipObj.extractall("01-07-2021/zipFiles", pwd=pswd.encode())
예제 #8
0
def checkZipInfo(path_to_zip):
    with zp(path_to_zip, "r") as zipObj:
        return zipObj.infolist()