Пример #1
0
from datetime import date
import pandas as pd
import os

from xls_read import read_sheet, read_by_definition, yearmon
from regions import Regions

filter_region_name = Regions.filter_region_name
reference_region_names = Regions.names()
rf_name = Regions.rf_name()
district_names = Regions.district_names()
summable_regions = Regions.summable_regions()

def get_dataframe(datapoints_stream):
    """Return dataframe corresponding to datapoints stream."""        
    list_of_dicts = [{'val':x[0], 'region':x[1], 'dates':x[2]} for x in datapoints_stream]
    df = pd.DataFrame(list_of_dicts)
    df = df.pivot(columns='region', values='val', index='dates')[reference_region_names]
    df.index = pd.DatetimeIndex(df.index)
    return df

def get_dataframe_by_definition(def_dict):
    """Return dataframe corresponding to definition dict."""   
    file_path = os.path.join(def_dict['folder'], def_dict['filename'])
    if 'anchor' in def_dict.keys():
        gen = read_sheet(file_path, def_dict['sheet'], def_dict['anchor']) 
    else:
        gen = read_sheet(file_path, def_dict['sheet'])
    try:
       df = get_dataframe(gen)[Regions.names()]
    except:
Пример #2
0
from datetime import date
import pandas as pd
import os

from xls_read import read_sheet, read_by_definition, yearmon
from regions import Regions

filter_region_name = Regions.filter_region_name
reference_region_names = Regions.names()
rf_name = Regions.rf_name()
district_names = Regions.district_names()
summable_regions = Regions.summable_regions()


def get_dataframe(datapoints_stream):
    """Return dataframe corresponding to datapoints stream."""
    list_of_dicts = [{
        'val': x[0],
        'region': x[1],
        'dates': x[2]
    } for x in datapoints_stream]
    df = pd.DataFrame(list_of_dicts)
    df = df.pivot(columns='region', values='val',
                  index='dates')[reference_region_names]
    df.index = pd.DatetimeIndex(df.index)
    return df


def get_dataframe_by_definition(def_dict):
    """Return dataframe corresponding to definition dict."""
    file_path = os.path.join(def_dict['folder'], def_dict['filename'])
 dfs = import_xl_data()
 
 # this import is faster, but series is alphabetic
 # dfs = import_csv_data()
 
 # output 1: save all dataframes to xls by sheet - one df per sheet    
 if '1' in jobs:
     to_xl_book(dfs, tag = 'by_sheet')    
 
 # output 2: concat all dataframes to one xls sheet
 if '2' in jobs:
     r = pd.concat(dfs)
     to_xl_sheet(r, tag = 'one_page', sheet = "regions")
 
 # output 3: make Russia file (1 sheet)    
 rf = Regions.rf_name()
 if '3' in jobs:               
     # note: must have pandas 17 or higher for 'rename'
     df_rf = pd.concat([d[rf].rename(d['varname'][0]) for d in dfs], axis = 1)
     to_xl_sheet(df_rf, tag = 'rf', sheet = 'rf')          
     
 # output 4: make fed districts file (num_var sheets)
 if '4' in jobs:
     cols = ['varname'] + [Regions.rf_name()] + Regions.district_names() 
     dfs2 = [d.reindex(columns=cols) for d in dfs]
     to_xl_book(dfs2, tag = 'districts')        
 
 # output 5: make regions only file (num_var sheets)  
 if '5' in jobs:    
     cols = ['varname'] + [Regions.rf_name()] + Regions.summable_regions()
     dfs3 = [d.reindex(columns=cols) for d in dfs] 
Пример #4
0
    dfs = import_xl_data()

    # this import is faster, but series is alphabetic
    # dfs = import_csv_data()

    # output 1: save all dataframes to xls by sheet - one df per sheet
    if '1' in jobs:
        to_xl_book(dfs, tag='by_sheet')

    # output 2: concat all dataframes to one xls sheet
    if '2' in jobs:
        r = pd.concat(dfs)
        to_xl_sheet(r, tag='one_page', sheet="regions")

    # output 3: make Russia file (1 sheet)
    rf = Regions.rf_name()
    if '3' in jobs:
        # note: must have pandas 17 or higher for 'rename'
        df_rf = pd.concat([d[rf].rename(d['varname'][0]) for d in dfs], axis=1)
        to_xl_sheet(df_rf, tag='rf', sheet='rf')

    # output 4: make fed districts file (num_var sheets)
    if '4' in jobs:
        cols = ['varname'] + [Regions.rf_name()] + Regions.district_names()
        dfs2 = [d.reindex(columns=cols) for d in dfs]
        to_xl_book(dfs2, tag='districts')

    # output 5: make regions only file (num_var sheets)
    if '5' in jobs:
        cols = ['varname'] + [Regions.rf_name()] + Regions.summable_regions()
        dfs3 = [d.reindex(columns=cols) for d in dfs]