def FAOFBS():
    import requests
    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    import csv
    import xlrd
    import matplotlib.lines as mlines
    import matplotlib.transforms as mtransforms
    import xlsxwriter
    import dask.dataframe as dd
    import statsmodels.api as sm

    data = dd.read_csv(
        r'C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv',
        encoding="ISO-8859-1")
    # data=pd.concat(tp,ignore_index=True)

    data['Code'] = data[str('Element Code')] + data[str('Item Code')]
    concord_table = dd.read_csv(
        'C:\\Users\Public\Pythonfiles\Aggregation for crop type.csv')

    data = dd.merge(data,
                    concord_table,
                    how="left",
                    left_on="Item Code",
                    right_on='Code no')

    data['Series_Name'] = data[str('Code Name')] + data[str('Element')]
    series_concord_table = dd.read_csv(
        'C:\\Users\Public\Pythonfiles\FAOSeriesConcordance.csv')

    data.columns = list(data.columns)
    data = data.drop([
        'Area Code', 'Item Code', 'Flag', 'Unit', 'Year Code', 'Element',
        'Element Code', 'Code', 'Code Name', 'Item', 'Code no'
    ],
                     axis=1)
    print(data.head())
    data = data.dropna(how='any')
    print(data.head())

    data.reset_index()

    datapanda = data.compute()
    # data=pd.DataFrame(data)
    # p= datapanda.pivot_table(index=["Area",'Year'],values=['Value'],
    # columns=["Series Name in Ifs"],aggfunc=[np.sum])

    p = pd.pivot_table(datapanda,
                       index=["Area", 'Year'],
                       values=['Value'],
                       columns=["Series_Name"],
                       aggfunc=[np.sum])

    return (p)
def IMFGFSRevenueData():
    import requests
    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    import csv
    import xlrd
    import matplotlib.lines as mlines
    import matplotlib.transforms as mtransforms
    import xlsxwriter
    import statsmodels.api as sm
    import dask.dataframe as dd

    import requests
    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    import csv
    import xlrd
    import matplotlib.lines as mlines
    import matplotlib.transforms as mtransforms
    import xlsxwriter
    import statsmodels.api as sm
    import dask.dataframe as dd

    data = dd.read_csv('C:\\Users\Public\Pythonfiles\GFSRevenue.csv')

    data['FuncSector'] = data[str('Sector Name')] + data[str(
        'Classification Name')]

    concord_table = pd.read_excel(
        'C:\\Users\Public\Pythonfiles\CountryConcordanceIMF.xlsx')

    data = data.merge(concord_table, on="Country Name", how='left')
    data = data.loc[data['Unit Name'] == 'Percent of GDP']
    print(data.head())
    data = data.drop([
        'Country Code', 'Country Name', 'Classification Code', 'Sector Code',
        'Unit Code', 'Status', 'Valuation', 'Bases of recording (Gross/Net)',
        'Nature of data'
    ],
                     axis=1)

    data = data.reset_index()
    data = data.compute()

    data = data.reset_index()

    p = pd.pivot_table(
        data,
        index=["Country name in IFs", "Unit Name", 'Time Period'],
        values=['Value'],
        columns=['FuncSector'],
        aggfunc=[np.sum])

    return (p)
Пример #3
0
 def preProcessing(self, fileName):
     # remove too much data for now.
     print("procssinfg the file: {}".format(fileName))
     import dask.dataframe as dd
     data = dd.read_csv(fileName, dtype=self.dtypes, na_values=[''])
     topRows = data.head(10000000)
     print("got the first milllion")
     outputfile = ".\\output_frame.csv"
     topRows.to_csv(outputfile)
     print("dfone")
     return topRows
Пример #4
0
import dask as ds
df = ds.read_csv('update_m.txt',
                 sep='|',
                 usecols=[1, 2, 3, 4, 5],
                 parse_dates=[1],
                 infer_datetime_format=True)