Exemplos de ClearskyDetection em Python, exemplos de cs_detection.ClearskyDetection em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: nsrdb_preprocessor.py Projeto: benbenboben/test_subtree

    def combine_files_setup(self, outformat='pkl.gz'):
        """Combine multiple files (for a given site) into a single file data set.  Will also use PVLib get_clearsky
        method and fill in Clearsky GHI pvlib column.

        Returns
        -------
        None
        """
        time_cols = ['Year', 'Month', 'Day', 'Hour', 'Minute']
        for id, file_set in self.files_df.groupby(self.files_df['id']):
            files = [os.path.join(self.path_to_read_dir, f) for f in file_set.index]
            header = pd.read_csv(files[0], nrows=2)  # read header to get time zone, latitude, longitude, elevation
            tz = 'Etc/GMT' + header['Time Zone'][0].replace('-', '+')  # negative sign confuses 'Etc/GMTXX' timezone?
            df = pd.concat([pd.read_csv(f, skiprows=2) for f in files])
            df.index = pd.to_datetime(df[time_cols])
            df.index = df.index.tz_localize(tz)
            df = df.drop(time_cols, axis=1)
            latitude = float(header['Latitude'][0])
            longitude = float(header['Longitude'][0])
            elevation = float(header['Elevation'][0])
            # add Is clear NSRDB column and Clearsky GHI pvlib column
            # Scale Clearsky GHI pvlib to match periods of clarity between
            detection = cs_detection.ClearskyDetection(df, copy=False, set_ghi_status=True)
            detection.set_nsrdb_sky_status(label='Is clear NSRDB')
            detection.generate_pvlib_clearsky(latitude, longitude, elevation, tz=tz)
            detection.scale_model('GHI', 'Clearsky GHI pvlib', 'Is clear NSRDB')
            df = detection.df
            if outformat == 'pkl':
                pd.to_pickle(df, os.path.join(self.path_to_write_dir, str(int(id))) + '.pkl')
            elif outformat == 'pkl.gz':
                pd.to_pickle(df, os.path.join(self.path_to_write_dir, str(int(id))) + '.pkl.gz')
            elif outformat == 'csv':
                df.to_csv(os.path.join(self.path_to_write_dir, str(int(id))) + '.csv')
        print('Files successfully written to {}'.format(self.path_to_write_dir))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: 9_data_cleaning.py Projeto: benbenboben/test

len(nsrdb.df)

# # Investigate input data

# ## ABQ

# In[43]:

nsrdb = cs_detection.ClearskyDetection.read_pickle('abq_nsrdb_1.pkl.gz')
nsrdb.df.index = nsrdb.df.index.tz_convert('MST')
nsrdb.time_from_solar_noon('Clearsky GHI pvlib', 'tfn')

# In[44]:

train = cs_detection.ClearskyDetection(nsrdb.df, scale_col=None)
train.trim_dates('01-01-2013', '01-01-2015')
test = cs_detection.ClearskyDetection(nsrdb.df, scale_col=None)
test.trim_dates('01-01-2015', None)

# In[45]:

clf = ensemble.RandomForestClassifier(random_state=42)

# In[46]:

feature_cols = [
    'tfn', 'abs_ideal_ratio_diff grad', 'abs_ideal_ratio_diff grad mean',
    'abs_ideal_ratio_diff grad std', 'abs_ideal_ratio_diff grad second',
    'abs_ideal_ratio_diff grad second mean',
    'abs_ideal_ratio_diff grad second std',

Exemplo n.º 3

0

Exibir arquivo

Arquivo: 6_ml_exploration_combined_training_new_features_new_clf-Copy1.py Projeto: benbenboben/test

pred = test.iter_predict_daily(feature_cols,
                               'GHI',
                               'Clearsky GHI pvlib',
                               clf,
                               3,
                               by_day=True,
                               multiproc=True)
pred = pred.astype(bool)

# In[127]:

vis = visualize.Visualizer()

# In[128]:

srrl_tmp = cs_detection.ClearskyDetection(nsrdb_srrl.df)
srrl_tmp.intersection(ground.df.index)
vis.add_line_ser(test.df['GHI'], 'GHI')
vis.add_line_ser(test.df['Clearsky GHI pvlib'], 'GHI_cs')
vis.add_circle_ser(test.df[(srrl_tmp.df['sky_status'] == 0) & (pred)]['GHI'],
                   'ML clear only')
vis.add_circle_ser(test.df[(srrl_tmp.df['sky_status'] == 1) & (~pred)]['GHI'],
                   'NSRDB clear only')
vis.add_circle_ser(test.df[(srrl_tmp.df['sky_status'] == 1) & (pred)]['GHI'],
                   'ML+NSRDB clear only')
# vis.add_line_ser(test.df['abs_ideal_ratio_diff'] * 100)

# In[129]:

vis.show()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: 4_ornl_ml_exploration_time_freq_rf.py Projeto: benbenboben/test

'GHI Clearsky GHI pvlib gradient second ratio min', 
'GHI Clearsky GHI pvlib gradient second ratio max', 
'GHI Clearsky GHI pvlib line length ratio',
'GHI Clearsky GHI pvlib line length ratio gradient',
'GHI Clearsky GHI pvlib line length ratio gradient second'
]

target_cols = ['sky_status']


# # Train/test on NSRDB data to find optimal parameters

# In[66]:


train = cs_detection.ClearskyDetection(nsrdb.df)
train.trim_dates('01-01-2010', '01-01-2015')
test = cs_detection.ClearskyDetection(nsrdb.df)
test.trim_dates('01-01-2015', None)


# In[67]:


train.scale_model('GHI', 'Clearsky GHI pvlib', 'sky_status')


# In[68]:


utils.calc_all_window_metrics(train.df, 3, meas_col='GHI', model_col='Clearsky GHI pvlib', overwrite=True)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: 10_srrl_ground_training_after_clean.py Projeto: benbenboben/test

# In[5]:


ground.df.index[0], ground.df.index[-1]


# In[6]:


nsrdb.df.index[0], nsrdb.df.index[-1]


# In[7]:


ground2 = cs_detection.ClearskyDetection(ground.df, 'GHI', 'Clearsky GHI pvlib', solar_noon_col='abs(t-tnoon)')


# In[8]:


ground2.trim_dates('01-01-2002', '01-01-2015')
ground2.df = ground2.df[ground2.df.index.minute % 30 == 0]


# In[9]:


nsrdb2 = cs_detection.ClearskyDetection(nsrdb.df, 'GHI', 'Clearsky GHI pvlib', 'sky_status', solar_noon_col='abs(t-tnoon)')

Exemplo n.º 6

0

Exibir arquivo

def split_df_by_date(obj, start, mid, end):
    train = cs_detection.ClearskyDetection(obj.df)
    train.trim_dates(start, mid)
    test = cs_detection.ClearskyDetection(obj.df)
    test.trim_dates(mid, end)
    return train, test

Exemplo n.º 7

0

Exibir arquivo

Arquivo: 8_srrl_directional_features.py Projeto: benbenboben/test


# In[3]:


len(nsrdb.df)


# # Train/test on NSRDB data to find optimal parameters

# ## Default classifier

# In[4]:


train = cs_detection.ClearskyDetection(nsrdb.df, scale_col=None)
train.trim_dates(None, '01-01-2015')
test = cs_detection.ClearskyDetection(nsrdb.df, scale_col=None)
test.trim_dates('01-01-2015', None)


# In[5]:


train.scale_model('GHI', 'Clearsky GHI pvlib', 'sky_status')


# In[6]:


clf = ensemble.RandomForestClassifier(random_state=42)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: 10_clean_feature_selection_snl.py Projeto: benbenboben/test

import pygal


# # Train on default data

# In[2]:


detect_obj = cs_detection.ClearskyDetection.read_pickle('abq_nsrdb_1.pkl.gz', 'GHI', 'Clearsky GHI pvlib', 'sky_status')
detect_obj.df.index = detect_obj.df.index.tz_convert('MST')


# In[3]:


train_obj = cs_detection.ClearskyDetection(detect_obj.df, 'GHI', 'Clearsky GHI pvlib', 'sky_status')
train_obj.trim_dates(None, '01-01-2015')
test_obj = cs_detection.ClearskyDetection(detect_obj.df, 'GHI', 'Clearsky GHI pvlib', 'sky_status')
test_obj.trim_dates('01-01-2015', None)


# In[4]:


clf = ensemble.RandomForestClassifier(n_jobs=-1, n_estimators=32, random_state=42)


# In[5]:


clf = train_obj.fit_model(clf)

Exemplo n.º 9

0

Exibir arquivo


# In[8]:


clf = nsrdb.fit_model(feature_cols, target_cols, clf)


# Training vs the clearsky model in NSRDB is quite accurate.  I don't really want to use this clearsky curve though since it's unavailable for ground based measurements.

# ### Visualize

# In[9]:


train = cs_detection.ClearskyDetection(nsrdb.df)
train.trim_dates(None, '01-01-2015')
test = cs_detection.ClearskyDetection(nsrdb.df)
test.trim_dates('01-01-2015', None)


# In[10]:


clf.fit(train.df[feature_cols], train.df[target_cols])


# In[11]:


pred = clf.predict(test.df[feature_cols]).flatten()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: 8_abq_directional_features_investigation.py Projeto: benbenboben/test


# In[3]:


len(nsrdb.df)


# # Train/test on NSRDB data to find optimal parameters

# ## Default classifier

# In[4]:


train = cs_detection.ClearskyDetection(nsrdb.df, scale_col=None)
train.trim_dates(None, '01-01-2015')
test = cs_detection.ClearskyDetection(nsrdb.df, scale_col=None)
test.trim_dates('11-01-2015', '01-07-2015')


# In[5]:


train.scale_model('GHI', 'Clearsky GHI pvlib', 'sky_status')


# In[6]:


clf = ensemble.RandomForestClassifier(random_state=42)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: 5_srrl_ground_training.py Projeto: benbenboben/test

target_cols = ['sky_status']

# # Align date ranges

# In[6]:

ground.df.index[0], ground.df.index[-1]

# In[7]:

nsrdb.df.index[0], nsrdb.df.index[-1]

# In[8]:

ground2 = cs_detection.ClearskyDetection(ground.df)

# In[9]:

ground2.trim_dates('01-01-2008', '01-01-2012')
ground2.df = ground2.df[ground2.df.index.minute % 30 == 0]

# In[10]:

nsrdb2 = cs_detection.ClearskyDetection(nsrdb.df)

# In[11]:

nsrdb2.trim_dates('01-01-2008', '01-01-2012')
nsrdb2.df = nsrdb2.df[nsrdb2.df.index.minute % 30 == 0]

Exemplo n.º 12

0

Exibir arquivo

import matplotlib
import pv_clf
import numpy as np

get_ipython().magic('matplotlib notebook')

get_ipython().magic('load_ext autoreload')
get_ipython().magic('autoreload 2')


# In[507]:


nsrdb = cs_detection.ClearskyDetection.read_pickle('abq_nsrdb_1.pkl.gz')
nsrdb.df.index = nsrdb.df.index.tz_convert('MST')
train = cs_detection.ClearskyDetection(nsrdb.df)
train.trim_dates(None, '01-01-2015')
test = cs_detection.ClearskyDetection(nsrdb.df)
test.trim_dates('01-01-2015', None)


# In[508]:


X = np.asarray([train.df.index.values, train.df['GHI'].values, train.df['Clearsky GHI pvlib'].values]).T


# In[509]:


X.shape

Exemplo n.º 13

0

Exibir arquivo

Arquivo: 8_ornl_directional_features.py Projeto: benbenboben/test_subtree

nsrdb = cs_detection.ClearskyDetection.read_pickle('ornl_nsrdb_1.pkl.gz')
nsrdb.df.index = nsrdb.df.index.tz_convert('EST')
nsrdb.time_from_solar_noon('Clearsky GHI pvlib', 'tfn')

# In[3]:

len(nsrdb.df)

# # Train/test on NSRDB data to find optimal parameters

# ## Default classifier

# In[4]:

train = cs_detection.ClearskyDetection(nsrdb.df, scale_col=None)
train.trim_dates('01-01-2010', '01-01-2015')
test = cs_detection.ClearskyDetection(nsrdb.df, scale_col=None)
test.trim_dates('01-01-2015', None)

# In[5]:

train.scale_model('GHI', 'Clearsky GHI pvlib', 'sky_status')

# In[6]:

clf = ensemble.RandomForestClassifier(random_state=42)

# In[7]:

utils.calc_all_window_metrics(train.df,

Exemplo n.º 14

0

Exibir arquivo

Arquivo: srrl_cloudy.py Projeto: benbenboben/test

nsrdb.to_pickle('srrl_nsrdb_cloudy.pkl', overwrite=True)


# In[14]:


ground.to_pickle('srrl_ground_cloudy.pkl', overwrite=True)


# # Science

# In[16]:


ground_small = cs_detection.ClearskyDetection(ground.df)


# In[17]:


ground_small.trim_dates('07-01-2006', '07-08-2006')


# In[18]:


vis = Visualizer()
vis.add_line_ser(ground_small.df['GHI'], 'GHI')
vis.add_line_ser(ground_small.df['Clearsky GHI pvlib'], 'GHIcs')
vis.add_line_ser(ground_small.df['Total Cloud Cover [%]'], 'TCC')

Exemplo n.º 15

0

Exibir arquivo

Arquivo: 9_data_cleaning_remove_bad_points_snl-Copy1.py Projeto: benbenboben/test

np.set_printoptions(precision=4)
get_ipython().magic('matplotlib inline')

get_ipython().magic("config InlineBackend.figure_format = 'retina'")

matplotlib.rcParams.update({'font.size': 16})

import warnings
warnings.filterwarnings(action='ignore')


plt.close('all')

# Train on default data# nsrdb = pd.read_pickle('abq_nsrdb_1.pkl.gz')
detect_obj = cs_detection.ClearskyDetection.read_pickle('abq_nsrdb_1.pkl.gz', 'GHI', 'Clearsky GHI pvlib', 'sky_status')
detect_obj.df.index = detect_obj.df.index.tz_convert('MST')train_obj = cs_detection.ClearskyDetection(detect_obj.df, 'GHI', 'Clearsky GHI pvlib', 'sky_status')
train_obj.trim_dates(None, '01-01-2015')
test_obj = cs_detection.ClearskyDetection(detect_obj.df, 'GHI', 'Clearsky GHI pvlib', 'sky_status')
test_obj.trim_dates('01-01-2015', None)clf = ensemble.RandomForestClassifier(n_jobs=-1, n_estimators=32, max_depth=10, random_state=42)clf = train_obj.fit_model(clf)pred = test_obj.predict(clf)print(metrics.accuracy_score(test_obj.df['sky_status'], pred))print(metrics.recall_score(test_obj.df['sky_status'], pred))cm = metrics.confusion_matrix(test_obj.df['sky_status'], pred)visualize.plot_confusion_matrix2(cm, ('cloudy', 'clear'))fig, ax = plt.subplots(figsize=(12, 8))

_ = ax.bar(range(len(clf.feature_importances_)), clf.feature_importances_)
_ = ax.set_xticks(range(len(clf.feature_importances_)))
_ = ax.set_xticklabels(test_obj.features_, rotation=45)

_ = ax.set_ylabel('Importance')
_ = ax.set_xlabel('Feature')

_ = fig.tight_layout()fig, ax = plt.subplots(figsize=(12, 8))

nsrdb_mask = test_obj.df['sky_status'].values