def final_check(year=2006):
    test_filename = os.path.join(DATA_SOURCES_DIR, "test.h5")
    survey_filename = os.path.join(DATA_SOURCES_DIR, "survey.h5")

    store = HDFStore(test_filename)
    survey = HDFStore(survey_filename)

    final2 = store.get('survey_2006')
    print survey
    finalT = survey.get('survey_2006')

    varlist = [
        'adeben',
        'adfdap',
        'amois',
        'ancchom',
        'ancentr',
        'anciatm',
        'ancrech',
        'anref',
        'contra',
        'datant',
        'dimtyp',
        'ident',
        'idfoy'
        'noi',
        'nondic',
        'rabs',
        'RABSP',
        'RAISTP',
        'raistp',
        'rdem',
        'retrai',
        'sitant',
        'sp10',
        'sp11',
        'stc',
        'TXTPPB',
        ]

    for i in range(0, 10):
        varname = 'sp0' + str(i)
        varlist.append(varname)

    varlist = set(varlist)
    columns = final2.columns
    columns = set(columns)

    print varlist.difference(columns)
    print final2.loc[
        final2.idfoy == 603018901,
        ['idfoy', 'quifoy', 'idfam', 'quifam', 'idmen', 'quimen', 'noi']
        ].to_string()

    return
def final_check(year=2006):
    test_filename = os.path.join(DATA_SOURCES_DIR,"test.h5")
    survey_filename = os.path.join(DATA_SOURCES_DIR,"survey.h5")

    store = HDFStore(test_filename)
    survey = HDFStore(survey_filename)

    final2 = store.get('survey_2006')
    print survey
    finalT = survey.get('survey_2006')

    varlist = ['anref', 'sitant', 'adeben', 'stc', 'retrai', 'contra', 'datant', 'rabs', 'nondic', 'TXTPPB',
               'ancrech', 'RAISTP', 'amois', 'adfdap', 'ancentr', 'anciatm', 'ancchom', 'ident', 'noi', 'dimtyp',
               'RABSP', 'raistp', 'rdem', 'sp10', 'sp11', 'idfoy']

    for i in range(0,10):
        varname = 'sp0' + str(i)
        varlist.append(varname)

    varlist = set(varlist)
    columns = final2.columns ;
    columns = set(columns)

    print varlist.difference(columns)
    print final2.loc[final2.idfoy==603018901,
                       ['idfoy', 'quifoy', 'idfam', 'quifam', 'idmen', 'quimen', 'noi']].to_string()
#     print final2
#     print finalT
# #     control(final2, debug=True, verbose=True, verbose_columns=['idfam', 'quifam'])
# #     control(finalT, debug=True, verbose=True, verbose_columns=['idfam', 'quifam'])
#     print 'FAMILLE--------------'
#     print final2.quifam.value_counts()
#     print finalT.quifam.value_counts()
#     print ''
#     print 'FOYER------------------'
#     print final2.quifoy.value_counts()
#     print finalT.quifoy.value_counts()
#     print ''
#     print 'MENAGES-----------------'
#     print final2.quimen.value_counts()
#     print finalT.quimen.value_counts()
#
#     print ''
#     print final2.age.describe()
#     print finalT.age.describe()
# #     age_data = final2['age'].value_counts().reset_index()
# #     age_data = age_data.sort_index(by='index', ascending='True')
# #     print age_data.to_string()
# #     print final2.loc[final2['quifam']==2, ['quifam', 'age']].describe()

    return
Exemplo n.º 3
0
def test():
    '''
    Validate check_consistency
    '''
    #===========================================================================
    # from pandas import DataFrame
    #res = DataFrame({af_col.name: simulation.output_table.get_value(af_col.name, af_col.entity)})
    # print res
    #===========================================================================

    store = HDFStore(os.path.join(os.path.dirname(os.path.join(SRC_PATH,'countries','france','data','erf')),'fichiertest.h5'))
    datatable = store.get('test12')
    test_simu = store.get('test_simu')
    print check_consistency(test_simu, datatable)
Exemplo n.º 4
0
def load_df(path, default=None):
    """Load DataFrame for HDF5 store path '\logs' table"""
    try:
        store = HDFStore(path)
        print store.keys()
        df = store.get('logs')
        store.close()
        return df
    except:
        return default
Exemplo n.º 5
0
def build_comparison():
    directory = os.path.dirname(__file__)
    fname = os.path.join(directory, H5_FILENAME)
    store = HDFStore(fname)

    openfisca = store.get("openfisca")
    insee = store.get("insee")
    print openfisca
    print insee
#    for year in range(2006,2010):
    print openfisca.head()
    openfisca.drop(0, axis=0, inplace=True)
    openfisca.reset_index(inplace=True)
    from pandas import DataFrame
    print (openfisca.sum() - insee.sum())/insee.sum()

    df = (openfisca-insee)/insee
    print df
    print df.to_string()
Exemplo n.º 6
0
Arquivo: hdf.py Projeto: Itay4/pandas
class HDFStoreDataFrame(BaseIO):

    def setup(self):
        N = 25000
        index = tm.makeStringIndex(N)
        self.df = DataFrame({'float1': np.random.randn(N),
                             'float2': np.random.randn(N)},
                            index=index)
        self.df_mixed = DataFrame({'float1': np.random.randn(N),
                                   'float2': np.random.randn(N),
                                   'string1': ['foo'] * N,
                                   'bool1': [True] * N,
                                   'int1': np.random.randint(0, N, size=N)},
                                  index=index)
        self.df_wide = DataFrame(np.random.randn(N, 100))
        self.start_wide = self.df_wide.index[10000]
        self.stop_wide = self.df_wide.index[15000]
        self.df2 = DataFrame({'float1': np.random.randn(N),
                              'float2': np.random.randn(N)},
                             index=date_range('1/1/2000', periods=N))
        self.start = self.df2.index[10000]
        self.stop = self.df2.index[15000]
        self.df_wide2 = DataFrame(np.random.randn(N, 100),
                                  index=date_range('1/1/2000', periods=N))
        self.df_dc = DataFrame(np.random.randn(N, 10),
                               columns=['C%03d' % i for i in range(10)])

        self.fname = '__test__.h5'

        self.store = HDFStore(self.fname)
        self.store.put('fixed', self.df)
        self.store.put('fixed_mixed', self.df_mixed)
        self.store.append('table', self.df2)
        self.store.append('table_mixed', self.df_mixed)
        self.store.append('table_wide', self.df_wide)
        self.store.append('table_wide2', self.df_wide2)

    def teardown(self):
        self.store.close()
        self.remove(self.fname)

    def time_read_store(self):
        self.store.get('fixed')

    def time_read_store_mixed(self):
        self.store.get('fixed_mixed')

    def time_write_store(self):
        self.store.put('fixed_write', self.df)

    def time_write_store_mixed(self):
        self.store.put('fixed_mixed_write', self.df_mixed)

    def time_read_store_table_mixed(self):
        self.store.select('table_mixed')

    def time_write_store_table_mixed(self):
        self.store.append('table_mixed_write', self.df_mixed)

    def time_read_store_table(self):
        self.store.select('table')

    def time_write_store_table(self):
        self.store.append('table_write', self.df)

    def time_read_store_table_wide(self):
        self.store.select('table_wide')

    def time_write_store_table_wide(self):
        self.store.append('table_wide_write', self.df_wide)

    def time_write_store_table_dc(self):
        self.store.append('table_dc_write', self.df_dc, data_columns=True)

    def time_query_store_table_wide(self):
        self.store.select('table_wide', where="index > self.start_wide and "
                                              "index < self.stop_wide")

    def time_query_store_table(self):
        self.store.select('table', where="index > self.start and "
                                         "index < self.stop")

    def time_store_repr(self):
        repr(self.store)

    def time_store_str(self):
        str(self.store)

    def time_store_info(self):
        self.store.info()
def PlotTestLogsAll(myfile):
    """ Reads the specifed TestLogsAll.HDF5 file and calculates and plots relationships. 
    
    Input: File name with the results, e.g.: "aLabView2\\TestLogsAll.h5"

    Output: Plots in the same directory
    """
    import pandas as pd # multidimensional data analysis
    import numpy as np  # python numerical library
    # from os import listdir
    # from os.path import isdir, isfile, join
    
    import matplotlib.pyplot as plt
    # from matplotlib.backends.backend_pdf import PdfPages
    # Wei's advice ===
    import matplotlib.backends.backend_pdf as dpdf

    from pandas import read_hdf, HDFStore, ExcelWriter

    CONFIG = {'PlotIslDurHist':True, # Island durations histogram
              'PlotCorDur2Pen':True, # Correlation of duration to penetration
              'PlotCorDur2fStd':True, # Correlation of duration to standard deviation of frequency
             }

    if myfile.endswith(".h5"):
        mypdffile = "".join(myfile.split(".")[0:-1] + ['.pdf'])
    else:
        mypdffile = myfile + '.pdf'

    print "Opening: " + myfile
    h5store = HDFStore(myfile)
    TestLog = h5store.get('TestLogsAll')

    print "Opening: " + mypdffile
    pltPdf = dpdf.PdfPages(mypdffile)

    # Filtering TestLog into df1
    df1 = TestLog[(TestLog['tIslDur'] > 0.) & 
                  (TestLog['NrmlFlg'] == 'y') & 
                  (TestLog['FileName'] != 'TestLogMotorBr.xlsx') & 
                  (TestLog['FileName'] != 'TestLogSummer01.xlsx')]
    # Adding details to df1
    df1['QCload0']=df1['QCload']
    df1.loc[(df1[df1['QCload0']<0].index), ('QCload0')] =0.0 # df1['QCload0'][df1['QCload0']<0] = 0.0
    df1['PFact']=df1['LabViewP']/(df1['LabViewP']**2 + (-df1['GEAmpQ']+df1['QCload0'])**2).apply(np.sqrt)
    df1['PFactsign']='ind'
    df1.loc[(df1[df1['QCload0']>df1['GEAmpQ']].index), ('PFactsign')]='cap' # df1['PFactsign'][df1['QCload0']>df1['GEAmpQ']]='cap'
    df1s = df1[df1['FileName'].str.contains('Summer')]
    df1w = df1[df1['FileName'].str.contains('Winter')]
    
    if CONFIG['PlotIslDurHist']: # Island duration histogram
        # Fig: Island duration histogram
        fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6))
        # provision for a label
        # fig.suptitle(myfile) # This titles the figure
        # File info output to page top
        # label= file_info[file_info.index==fname][['fiComment']].values[0][0]
        # label = myfile
        # ax0.annotate(label,
        #              xy=(0.2/6.4, 4.6/4.8), # (0.2,-0.2)inch from top left corner
        #              xycoords='figure fraction',
        #              horizontalalignment='left',
        #              verticalalignment='top',
        #              fontsize=10)
        # subplots_adjust(top=4./4.8)

        df2a = TestLog['tIslDur'][TestLog['tIslDur'] > 0.]
        ax0.set_title('Island Duration Histogram')
        df2a.plot(kind='hist', bins=20, ax=ax0, alpha=0.5) # legend=True
        # df2.plot(kind='hist', bins=20, ax=ax0, alpha=0.5, legend=True)
        # ax0.set_xlim([-1.5,1.5])
        # ax0.set_ylim([-1.2,1.2])
        ax0.grid(True, which='both')
        ax0.set_xlabel('Island duration (sec)')
        ax0.set_ylabel('Number of observations')
        # ax0.set_aspect('equal')

        # ax1.set_title('Currents Al/Be')
        # ax1.plot(df2['pvIal']/1.5, df2['pvIbe']/1.5)
        # ax1.set_xlim([-300,300])
        # ax1.set_ylim([-240,240])
        # ax1.grid(True, which='both')
        # ax1.set_aspect('equal')
        # ax1.set_title('Island Voltage Al/Be')
        # ax1.plot(df2['Time'], df2['Island Val']/1.5/sqrt(2)/BASE['Vln'])
        # ax1.plot(df2['Time'], df2['Island Vbe']/1.5/sqrt(2)/BASE['Vln'])
        # ax1.set_ylim([-1.2,1.2])
        # ax1.grid(True, which='both')

        pltPdf.savefig() # saves fig to pdf
        plt.close() # Closes fig to clean up memory

        # Fig: PF actual histogram
        fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6))
        df2a = df1['PFact']
        ax0.set_title('Load PF actual')
        df2a.plot(kind='hist', bins=20, ax=ax0, alpha=0.5) # legend=True
        ax0.grid(True, which='both')
        ax0.set_xlabel('PF actual')
        ax0.set_ylabel('Number of observations')
        pltPdf.savefig() # saves fig to pdf
        plt.close() # Closes fig to clean up memory


    if CONFIG['PlotCorDur2Pen']: # Island duration corelations 
        # Fig: duration to penetration
        fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6))
        ax0.set_title('Island Duration vs. Penetration')
        df2a = TestLog[['tIslDur','PrcntPen']][(TestLog['tIslDur'] > 0.)&(TestLog['NrmlFlg'] == 'y') & (TestLog['FileName'] != 'TestLogMotorBr.xlsx')]
        df2b = TestLog[['tIslDur','PrcntPen']][(TestLog['tIslDur'] >  0.) & (TestLog['NrmlFlg'] == 'y') & (TestLog['FileName'] == 'TestLogMotorBr.xlsx')]
        df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)]
        df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)]
        ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='CMPLDs')
        ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean() + 1.0, df2a1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2b['PrcntPen'], df2b['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='MotorB')
        ax0.grid(True, which='both')
        ax0.set_xlabel('PV Penetration (pu)')
        ax0.set_ylabel('Island Duration (sec)')
        # Now add the legend with some customizations.
        legend = ax0.legend(loc='upper left', shadow=False)
        for label in legend.get_texts():
            label.set_fontsize('small')
        # plt.legend()

        pltPdf.savefig() # saves fig to pdf
        plt.close() # Closes fig to clean up memory

        # Fig: duration to penetration with PF as a parameter
        fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6))
        ax0.set_title('Island Duration vs. Penetration')
        df2a = df1[['tIslDur','PrcntPen']][(df1['PFact'] < 0.97) & (df1['PFactsign'] == 'ind')]
        df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)]
        df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)]
        df2b = df1[['tIslDur','PrcntPen']][(df1['PFact'] < 0.99) & (df1['PFact'] >= 0.97) & (df1['PFactsign'] == 'ind')]
        df2b1 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] > 0.95) & (df2b['PrcntPen'] < 1.05)]
        df2b2 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] < 0.95) | (df2b['PrcntPen'] > 1.05)]
        df2c = df1[['tIslDur','PrcntPen']][(df1['PFact'] > 0.99)]
        df2c1 = df2c[['tIslDur','PrcntPen']][(df2c['PrcntPen'] > 0.95) & (df2c['PrcntPen'] < 1.05)]
        df2c2 = df2c[['tIslDur','PrcntPen']][(df2c['PrcntPen'] < 0.95) | (df2c['PrcntPen'] > 1.05)]
        df2d = df1[['tIslDur','PrcntPen']][(df1['PFact'] < 0.99) & (df1['PFactsign'] == 'cap')]
        df2d1 = df2d[['tIslDur','PrcntPen']][(df2d['PrcntPen'] > 0.95) & (df2d['PrcntPen'] < 1.05)]
        df2d2 = df2d[['tIslDur','PrcntPen']][(df2d['PrcntPen'] < 0.95) | (df2d['PrcntPen'] > 1.05)]
        print df2a['tIslDur'].count()
        print df2b['tIslDur'].count()
        print df2c['tIslDur'].count()
        print df2d['tIslDur'].count()
        ax0.plot(df2d1['PrcntPen']-df2d1['PrcntPen'].mean()+1.0, df2d1['tIslDur'], 'ko', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2d2['PrcntPen'], df2d2['tIslDur'], 'ko', markersize=4, markeredgecolor='none', label='PF ~ 0.98cap')
        ax0.plot(df2c1['PrcntPen']-df2c1['PrcntPen'].mean()+1.0, df2c1['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2c2['PrcntPen'], df2c2['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='PF ~ 1.0')
        ax0.plot(df2b1['PrcntPen']-df2b1['PrcntPen'].mean()+1.0, df2b1['tIslDur'], 'go', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2b2['PrcntPen'], df2b2['tIslDur'], 'go', markersize=4, markeredgecolor='none', label='PF ~ 0.98ind')
        ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean()+1.0, df2a1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='PF ~ 0.95ind')
        ax0.grid(True, which='both')
        ax0.set_xlabel('PV Penetration (pu)')
        ax0.set_ylabel('Island Duration (sec)')
        ax0.set_ylim([0,0.6])
        # Now add the legend with some customizations.
        legend = ax0.legend(loc='upper left', shadow=False)
        for label in legend.get_texts():
            label.set_fontsize('small')
        # plt.legend()

        pltPdf.savefig() # saves fig to pdf
        plt.close() # Closes fig to clean up memory

        # Fig: duration to penetration at PF with season as parameter
        fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6))
        ax0.set_title('Island Duration vs. Penetration at PF ~ 0.95ind')
        df2a = df1s[['tIslDur','PrcntPen']][(df1s['PFact'] < 0.97) & (df1s['PFactsign'] == 'ind')]
        df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)]
        df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)]
        df2b = df1w[['tIslDur','PrcntPen']][(df1w['PFact'] < 0.97) & (df1w['PFactsign'] == 'ind')]
        df2b1 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] > 0.95) & (df2b['PrcntPen'] < 1.05)]
        df2b2 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] < 0.95) | (df2b['PrcntPen'] > 1.05)]
        print "Summer: " + str(df2a['tIslDur'].count())
        print "Winter: " + str(df2b['tIslDur'].count())
        ax0.plot(df2b1['PrcntPen']-df2b1['PrcntPen'].mean()+1.0, df2b1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2b2['PrcntPen'], df2b2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='winter')
        ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean()+1.0, df2a1['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='summer')
        ax0.grid(True, which='both')
        ax0.set_xlabel('PV Penetration (pu)')
        ax0.set_ylabel('Island Duration (sec)')
        ax0.set_ylim([0,0.6])
        # Now add the legend with some customizations.
        legend = ax0.legend(loc='upper left', shadow=False)
        for label in legend.get_texts():
            label.set_fontsize('small')
        # plt.legend()

        pltPdf.savefig() # saves fig to pdf
        plt.close() # Closes fig to clean up memory

        # Fig: duration to penetration at PF with season as parameter
        fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6))
        ax0.set_title('Island Duration vs. Penetration at PF ~ 0.98ind')
        df2a = df1s[['tIslDur','PrcntPen']][(df1s['PFact'] > 0.97) & (df1s['PFact'] < 0.99) & (df1s['PFactsign'] == 'ind')]
        df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)]
        df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)]
        df2b = df1w[['tIslDur','PrcntPen']][(df1w['PFact'] > 0.97) & (df1w['PFact'] < 0.99)  & (df1w['PFactsign'] == 'ind')]
        df2b1 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] > 0.95) & (df2b['PrcntPen'] < 1.05)]
        df2b2 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] < 0.95) | (df2b['PrcntPen'] > 1.05)]
        print "Summer: " + str(df2a['tIslDur'].count())
        print "Winter: " + str(df2b['tIslDur'].count())
        ax0.plot(df2b1['PrcntPen']-df2b1['PrcntPen'].mean()+1.0, df2b1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2b2['PrcntPen'], df2b2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='winter')
        ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean()+1.0, df2a1['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='summer')
        ax0.grid(True, which='both')
        ax0.set_xlabel('PV Penetration (pu)')
        ax0.set_ylabel('Island Duration (sec)')
        ax0.set_ylim([0,0.6])
        # Now add the legend with some customizations.
        legend = ax0.legend(loc='upper left', shadow=False)
        for label in legend.get_texts():
            label.set_fontsize('small')
        # plt.legend()

        pltPdf.savefig() # saves fig to pdf
        plt.close() # Closes fig to clean up memory

        # Fig: duration to penetration at PF with season as parameter
        fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6))
        ax0.set_title('Island Duration vs. Penetration at PF ~ 1.0')
        df2a = df1s[['tIslDur','PrcntPen']][(df1s['PFact'] > 0.99)]
        df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)]
        df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)]
        df2b = df1w[['tIslDur','PrcntPen']][(df1w['PFact'] > 0.99)]
        df2b1 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] > 0.95) & (df2b['PrcntPen'] < 1.05)]
        df2b2 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] < 0.95) | (df2b['PrcntPen'] > 1.05)]
        print "Summer: " + str(df2a['tIslDur'].count())
        print "Winter: " + str(df2b['tIslDur'].count())
        ax0.plot(df2b1['PrcntPen']-df2b1['PrcntPen'].mean()+1.0, df2b1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2b2['PrcntPen'], df2b2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='winter')
        ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean()+1.0, df2a1['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='summer')
        ax0.grid(True, which='both')
        ax0.set_xlabel('PV Penetration (pu)')
        ax0.set_ylabel('Island Duration (sec)')
        ax0.set_ylim([0,0.6])
        # Now add the legend with some customizations.
        legend = ax0.legend(loc='upper left', shadow=False)
        for label in legend.get_texts():
            label.set_fontsize('small')
        # plt.legend()

        pltPdf.savefig() # saves fig to pdf
        plt.close() # Closes fig to clean up memory

        # Fig: duration to penetration at PF with season as parameter
        fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6))
        ax0.set_title('Island Duration vs. Penetration at PF ~ 0.98cap')
        df2a = df1s[['tIslDur','PrcntPen']][(df1s['PFact'] > 0.97) & (df1s['PFact'] < 0.99) & (df1s['PFactsign'] == 'cap')]
        df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)]
        df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)]
        df2b = df1w[['tIslDur','PrcntPen']][(df1w['PFact'] > 0.97) & (df1w['PFact'] < 0.99)  & (df1w['PFactsign'] == 'cap')]
        df2b1 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] > 0.95) & (df2b['PrcntPen'] < 1.05)]
        df2b2 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] < 0.95) | (df2b['PrcntPen'] > 1.05)]
        print "Summer: " + str(df2a['tIslDur'].count())
        print "Winter: " + str(df2b['tIslDur'].count())
        ax0.plot(df2b1['PrcntPen']-df2b1['PrcntPen'].mean()+1.0, df2b1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2b2['PrcntPen'], df2b2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='winter')
        ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean()+1.0, df2a1['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='')
        ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='summer')
        ax0.grid(True, which='both')
        ax0.set_xlabel('PV Penetration (pu)')
        ax0.set_ylabel('Island Duration (sec)')
        ax0.set_ylim([0,0.6])
        # Now add the legend with some customizations.
        legend = ax0.legend(loc='upper left', shadow=False)
        for label in legend.get_texts():
            label.set_fontsize('small')
        # plt.legend()

        pltPdf.savefig() # saves fig to pdf
        plt.close() # Closes fig to clean up memory



    if False: # 
        # Fig2: 
        fig, (ax0,ax1,ax2,ax3,ax4) = plt.subplots(nrows=5, ncols=1,
                                                  figsize=(8.5,11),
                                                  sharex=True)
        fig.suptitle(fname) # This titles the figure

        # ax0.set_title('Utility Bus Vabc')
        # ax0.plot(df2['Time'], df2[u'Utility Bus V A'])
        # ax0.plot(df2['Time'], df2[u'Utility Bus V B'])
        # ax0.plot(df2['Time'], df2[u'Utility Bus V C'])
        # ax0.set_ylim([-500,500])
        # ax0.grid(True, which='both')

        ax0.set_title('Island Bus Vabc')
        ax0.plot(df2['Time'], df2[u'Island Bus V A'])
        ax0.plot(df2['Time'], df2[u'Island Bus V B'])
        ax0.plot(df2['Time'], df2[u'Island Bus V C'])
        ax0.plot(df2['Time'], df2[u'Island Vmag'])
        # ax0.set_ylim([-500,500])
        ax0.grid(True, which='both')

        ax1.set_title('Island Bus Frequency')
        # ax1.plot(df2['Time'], df2[u'Island Wpll']/(2*pi))
        ax1.plot(df2['Time'], df2[u'Island freq'])
        # ax1.set_ylim([50, 70])
        ax1.grid(True, which='both')

        ax2.set_title('Total Load Current Iabc')
        ax2.plot(df2['Time'], df2[u'RLC Passive Load I A']+df2[u'GE Load I A'])
        ax2.plot(df2['Time'], df2[u'RLC Passive Load I B']+df2[u'GE Load I B'])
        ax2.plot(df2['Time'], df2[u'RLC Passive Load I C']+df2[u'GE Load I C'])
        # ax2.set_ylim([-100,100])
        ax2.grid(True, which='both')

        ax3.set_title('B1+B2 Iabc')
        ax3.plot(df2['Time'], df2[u'pvIa'])
        ax3.plot(df2['Time'], df2[u'pvIb'])
        ax3.plot(df2['Time'], df2[u'pvIc'])
        # ax3.set_ylim([-100,100])
        ax3.grid(True, which='both')

        ax4.set_title('Utility Iabc')
        ax4.plot(df2['Time'], df2[u'Utility I A'])
        ax4.plot(df2['Time'], df2[u'Utility I B'])
        ax4.plot(df2['Time'], df2[u'Utility I C'])
        # ax4.set_ylim([-100,100])
        ax4.grid(True, which='both')

        pltPdf.savefig() # Saves fig to pdf
        plt.close() # Closes fig to clean up memory

        # Fig4: 
        fig, (ax0,ax1,ax2,ax3,ax4) = plt.subplots(nrows=5, ncols=1,
                                                  figsize=(8.5,11),
                                                  sharex=True)
        fig.suptitle(fname) # This titles the figure

        ax0.set_title('P[kW]: Utility, Load, PV')
        ax0.plot(df2['Time'], df2[u'P Utility'])
        ax0.plot(df2['Time'], df2[u'P RLC']+df2[u'P AMP'])
        ax0.plot(df2['Time'], df2[u'P B1']+df2[u'P B2'])
        # ax0.set_ylim([-50,250])
        ax0.grid(True, which='both')

        ax1.set_title('Q[kVAr]: Utility, Load, PV')
        ax1.plot(df2['Time'], df2[u'Q Utility'])
        ax1.plot(df2['Time'], df2[u'Q RLC']+df2[u'Q AMP'])
        ax1.plot(df2['Time'], df2[u'Q B1']+df2[u'Q B2'])
        # ax1.set_ylim([-80,80])
        ax1.grid(True, which='both')

        ax2.set_title('Island Vpos, pu penetration')
        ax2.plot(df2['Time'], df2[u'Island Vpos']/BASE['Vln'])
        ax2.plot(df2['Time'], df2[u'B1+B2 pen'])
        ax2.set_ylim([0,1.5])
        ax2.grid(True, which='both')

        ax3.set_title('Island Vneg, Vzero')
        ax3.plot(df2['Time'], df2[u'Island Vneg']/BASE['Vln'])
        ax3.plot(df2['Time'], df2[u'Island Vzer']/BASE['Vln'])
        # ax3.set_ylim([0,0.25])
        ax3.grid(True, which='both')

        ax4.set_title('Island Vrms abc')
        ax4.plot(df2['Time'], df2[u'Island Varms']/BASE['Vln'])
        ax4.plot(df2['Time'], df2[u'Island Vbrms']/BASE['Vln'])
        ax4.plot(df2['Time'], df2[u'Island Vcrms']/BASE['Vln'])
        # ax4.set_ylim([0,1.25])
        ax4.grid(True, which='both')

        pltPdf.savefig() # Saves fig to pdf
        plt.close() # Closes fig to clean up memory

    print "Closing: " + mypdffile
    pltPdf.close() # Close the pdf file
    
    
    h5store.close()
    return
Exemplo n.º 8
0
    from pandas import HDFStore
    store = HDFStore('store.h5', complevel=9)
    fmap = wrap_monitor(wrap_write(partial(fetch_safe, rse=args.rse), store, overwrite=args.overwrite),
                        monitor)
    p.map(fmap, datelist)
    monitor.close()
    logging.info("closing file")
    store.close()

    logging.info("trying to open output")
    store = HDFStore('store.h5')
    data = []

    for k in store.keys():
        try:
            d = store.get(k)
            d['timestamp'] = pd.to_datetime(k.split("_")[1], format='%d%m%Y')
            data.append(d)
        except Exception as e:
            print "Problem reading", k
            print e
    store.close()
    data = pd.concat(data)
    data = data.set_index(['timestamp', 'owner'])

    data_to_plot = data['size'].unstack().fillna(0)
    dataplot = data_to_plot.iplot(kind='area', fill=True, asFigure=True)
    for d in dataplot['data']:
        d['hoverinfo'] = 'text+x+name'
        d['text'] = ["%.2f Gb" % xx for xx in data_to_plot[d['name']].tolist()]
    data.iplot(data=dataplot['data'])
Exemplo n.º 9
0
class LogSaver:
    """
        self.directory : Directory structure for temp and saved files
        self.log_list : List of server.log files to process
        self.extra : True if log messages and thread ids are to be saved too
        self.history_path : History of server.log conversions saved here
        self.progress_store_path : HDF5 file that holds one DataFrame for each server.log file 
        self.store_path : Final DataFrame of all server.log entries saved here
        self.history : History of server.log conversions
    """

    FINAL = 'logs'
    PROGRESS = 'progress'
    HISTORY = 'history'

    @staticmethod
    def normalize(name):
        return re.sub(r'[^a-zA-Z0-9]', '_', name)
     
    @staticmethod
    def make_name(base_name, extra):
        if extra:
            return base_name + '.extra'
        else:
            return base_name
     
    #@staticmethod
    #def temp_name(log_list, extra):
    #    hsh = hash(log_list)
    #    sgn = 'n' if hsh < 0 else 'p'
    #    temp = 'temp_%s%08X' % (sgn, abs(hsh))
    #    return LogSaver.make_name(temp, extra)    

    def __init__(self, store_path, log_list, extra):
        self.directory = ObjectDirectory(store_path)
        self.log_list = tuple(sorted(log_list))
        self.extra = extra

        self.history_path = self.directory.get_path(LogSaver.HISTORY, temp=True)
        self.progress_store_path = self.directory.get_path(LogSaver.PROGRESS, temp=True, is_df=True)
        self.store_path = self.directory.get_path(LogSaver.make_name(LogSaver.FINAL, extra), 
                            is_df=True)
        self.history = ObjectDirectory.load_object(self.history_path, {})
        self.saved = False
        
    def __repr__(self):
        return '\n'.join('%s: %s' % (k,v) for k,v in self.__dict__.items())
        
    def __str__(self):
        return '\n'.join([repr(self), '%d log files' % len(self.log_list)])    

    def save_all_logs(self, force=False):
         
        if os.path.exists(self.store_path):
            final_store = HDFStore(self.store_path)
            print 'Keys: %s' % final_store
            final_store.close()
            return
        if not force:
            assert not os.path.exists(self.history_path), '''
                %s exists but %s does not.
                There appears to be a conversion in progress.
                -f forces conversion to complete.
            ''' % (self.history_path, self.store_path)
        
        self.directory.make_dir_if_necessary(self.progress_store_path)
        self.progress_store = HDFStore(self.progress_store_path)
        for path in self.log_list:
            self.save_log(path)
        
        self.check()    
        print '--------'
        print 'All tables in %s' % self.progress_store_path
        print self.progress_store.keys()
        print '--------'
        
        def get_log(path):
            try:
                return self.progress_store.get(LogSaver.normalize(path))
            except Exception as e:
                print
                print path
                raise e
               
        
        df_list = [get_log(path) for path in self.log_list]     
        self.progress_store.close()
        print 'Closed %s' % self.progress_store_path
        
        df_all = pd.concat(df_list)
        print 'Final list has %d entries' % len(df_all)
        final_store = HDFStore(self.store_path)
        final_store.put('logs', df_all)
        print 'Keys: %s' % final_store
        final_store.close()
        print 'Closed %s' % self.store_path
        
        # Save the history in a corresponding file
        self.directory.save('history', self.history)
        print 'Saved history'
        
        self.saved = True
        

    def test_store(self):    
        final_store = HDFStore(self.store_path)
        print '----'
        print final_store.keys()
        print '-' * 80
        logs = final_store['/logs']
        print type(logs)
        print len(logs)
        print logs.columns
        final_store.close()

    def cleanup(self): 
        os.remove(self.progress_store_path)
        os.remove(self.history_path)
        
    def delete(self):
        os.remove(self.store_path)

    def save_log(self, path):
        """Return a pandas DataFrame for all the valid log entry lines in log_file
            The index of the DataFrame are the uniqufied timestamps of the log entries
        """
        if path in self.history:
            return
        
        print 'Processing %s' % path,
        start = time.time()
        header, df = load_log(path, extra=self.extra)
        if df is None:
            print 'Could not process %s' % path
            return
        self.progress_store.put(LogSaver.normalize(path), df)
        load_time = time.time() - start
        
        self.history[path] = {
            'start': df.index[0],
            'end': df.index[-1],
            'load_time': int(load_time),
            'num': len(df),
            'header': header
        }
        ObjectDirectory.save_object(self.history_path, self.history)
        del df
        print { k:v for k,v in self.history[path].items() if k != 'header' },
        print '%d of %d' % (len(self.history), len(self.log_list))

    def check(self):
        history = ObjectDirectory.load_object(self.history_path, {})
        sorted_keys = history.keys()
        sorted_keys.sort(key=lambda k: history[k]['start'])
        print '-' * 80
        print 'Time range by log file'
        for i, path in enumerate(sorted_keys):
            hist = history[path]
            print '%2d: %s  ---  %s : %s' % (i, hist['start'], hist['end'], path)
        
        path0 = sorted_keys[0]
        for path1 in sorted_keys[1:]:
            hist0,hist1 = history[path0],history[path1] 
            assert hist0['end'] < hist1['start'], '''
            -----------
            %s %s
            start: %s
            end  : %s
            -----------
            %s %s
            hist1['start']
            start: %s
            end  : %s
            ''' % (
                path0, hist0, hist0['start'],  hist0['end'],
                path1, hist1, hist1['start'],  hist1['end'])