def FAOFBS(): import requests import numpy as np import matplotlib.pyplot as plt import pandas as pd import csv import xlrd import matplotlib.lines as mlines import matplotlib.transforms as mtransforms import xlsxwriter import dask.dataframe as dd import statsmodels.api as sm data = dd.read_csv( r'C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv', encoding="ISO-8859-1") # data=pd.concat(tp,ignore_index=True) data['Code'] = data[str('Element Code')] + data[str('Item Code')] concord_table = dd.read_csv( 'C:\\Users\Public\Pythonfiles\Aggregation for crop type.csv') data = dd.merge(data, concord_table, how="left", left_on="Item Code", right_on='Code no') data['Series_Name'] = data[str('Code Name')] + data[str('Element')] series_concord_table = dd.read_csv( 'C:\\Users\Public\Pythonfiles\FAOSeriesConcordance.csv') data.columns = list(data.columns) data = data.drop([ 'Area Code', 'Item Code', 'Flag', 'Unit', 'Year Code', 'Element', 'Element Code', 'Code', 'Code Name', 'Item', 'Code no' ], axis=1) print(data.head()) data = data.dropna(how='any') print(data.head()) data.reset_index() datapanda = data.compute() # data=pd.DataFrame(data) # p= datapanda.pivot_table(index=["Area",'Year'],values=['Value'], # columns=["Series Name in Ifs"],aggfunc=[np.sum]) p = pd.pivot_table(datapanda, index=["Area", 'Year'], values=['Value'], columns=["Series_Name"], aggfunc=[np.sum]) return (p)
def IMFGFSRevenueData(): import requests import numpy as np import matplotlib.pyplot as plt import pandas as pd import csv import xlrd import matplotlib.lines as mlines import matplotlib.transforms as mtransforms import xlsxwriter import statsmodels.api as sm import dask.dataframe as dd import requests import numpy as np import matplotlib.pyplot as plt import pandas as pd import csv import xlrd import matplotlib.lines as mlines import matplotlib.transforms as mtransforms import xlsxwriter import statsmodels.api as sm import dask.dataframe as dd data = dd.read_csv('C:\\Users\Public\Pythonfiles\GFSRevenue.csv') data['FuncSector'] = data[str('Sector Name')] + data[str( 'Classification Name')] concord_table = pd.read_excel( 'C:\\Users\Public\Pythonfiles\CountryConcordanceIMF.xlsx') data = data.merge(concord_table, on="Country Name", how='left') data = data.loc[data['Unit Name'] == 'Percent of GDP'] print(data.head()) data = data.drop([ 'Country Code', 'Country Name', 'Classification Code', 'Sector Code', 'Unit Code', 'Status', 'Valuation', 'Bases of recording (Gross/Net)', 'Nature of data' ], axis=1) data = data.reset_index() data = data.compute() data = data.reset_index() p = pd.pivot_table( data, index=["Country name in IFs", "Unit Name", 'Time Period'], values=['Value'], columns=['FuncSector'], aggfunc=[np.sum]) return (p)
def preProcessing(self, fileName): # remove too much data for now. print("procssinfg the file: {}".format(fileName)) import dask.dataframe as dd data = dd.read_csv(fileName, dtype=self.dtypes, na_values=['']) topRows = data.head(10000000) print("got the first milllion") outputfile = ".\\output_frame.csv" topRows.to_csv(outputfile) print("dfone") return topRows
import dask as ds df = ds.read_csv('update_m.txt', sep='|', usecols=[1, 2, 3, 4, 5], parse_dates=[1], infer_datetime_format=True)