示例#1
0
    def fit_model(self, clf, scale=True, clean=False, sample_weight=None, *args, **kwargs):
        """Fit an sklearn estimator object.

        Parameters
        ----------
        clf: estimator object
            Must have fit method.
        feature_cols: str
            Column names of features to use in fit.
        scale: bool
            Scale model before fit.

        Returns
        -------
        clf: estimator object
            Fitted estimator.

        """
        if scale:
            self.scale_model()
        utils.calc_all_window_metrics(self.df, self.window, self.meas_col, self.model_col, overwrite=True)
        # if clean:
        #     self.filter_labels(*args, **kwargs)  # do not rescale - only remove 'bad' cloudy periods
        #     # self.scale_model()
        #     # utils.calc_all_window_metrics(self.df, self.window, self.meas_col, self.model_col, overwrite=True)
        # else:
        #     self.df['mask'] = True
        self.filter_labels(**kwargs, overwrite=True)
        # clf.fit(self.df[self.df['mask']][self.features_], self.df[self.df['mask']][self.target_col])
        train_df = self.get_masked_df()
        # if sample_weight is not None:
        #     clf.fit(train_df[self.features_].values, train_df[self.target_col].values, train_df[sample_weight].values)
        # else:
        clf.fit(train_df[self.features_].values, train_df[self.target_col].astype(int).values)
        return clf
示例#2
0
    def filter_labels(self, ratio_mean_val=None, diff_mean_val=None, mask_label='quality_mask', overwrite=False):
        """Generate mask to remove incorrectly labeled points from training set.

        Resulting mask should remove the cloudy points that 'look' clear.  It should include
        every period originally labeled as clear.

        By default, no filtering is done.  Must actively choose to do so.
        """
        if mask_label in self.masks_ and not overwrite:
            raise RuntimeError('Label already exists as mask.  Change name or allow overwrite.')
        if not all(i in self.df.keys() for i in self.features_):
            self.scale_model()
            utils.calc_all_window_metrics(self.df, self.window, self.meas_col, self.model_col, overwrite=True)
        self.df[mask_label] = True
        if ratio_mean_val is not None and diff_mean_val is not None:
            # self.df.loc[(~self.df['sky_status']) &
            #             ((self.df['GHI/GHIcs mean'] >= ratio_mean_val) |
            #             (np.abs(self.df['GHI-GHIcs mean']) <= diff_mean_val)), mask_label] = False
            self.df.loc[(~self.df['sky_status']) &
                        ((np.abs(1 - self.df['GHI/GHIcs mean']) <= ratio_mean_val) |
                         (np.abs(self.df['GHI-GHIcs mean']) <= diff_mean_val)), mask_label] = False
        if mask_label not in self.masks_:
            self.masks_.append(mask_label)
示例#3
0
from sklearn import neighbors

# In[52]:

nn = neighbors.KNeighborsClassifier(n_neighbors=8)

# In[53]:

train.scale_model('GHI', 'Clearsky GHI pvlib', 'sky_status')

# In[54]:

utils.calc_all_window_metrics(train.df,
                              3,
                              meas_col='GHI',
                              model_col='Clearsky GHI pvlib',
                              overwrite=True)

# In[55]:

from sklearn import preprocessing

# In[56]:

ss = preprocessing.StandardScaler()
X_std = ss.fit_transform(train.df[feature_cols])

# In[57]:

nn.fit(X_std, train.df[target_cols].values.ravel())
#
# 1. Scale model clearsky (PVLib)
# 2. Calculate training metrics
# 3. Train model

# In[13]:

nsrdb_srrl.scale_model('GHI', 'Clearsky GHI pvlib', 'sky_status')
nsrdb_abq.scale_model('GHI', 'Clearsky GHI pvlib', 'sky_status')
nsrdb_ornl.scale_model('GHI', 'Clearsky GHI pvlib', 'sky_status')

# In[14]:

utils.calc_all_window_metrics(nsrdb_srrl.df,
                              3,
                              meas_col='GHI',
                              model_col='Clearsky GHI pvlib',
                              overwrite=True)
utils.calc_all_window_metrics(nsrdb_abq.df,
                              3,
                              meas_col='GHI',
                              model_col='Clearsky GHI pvlib',
                              overwrite=True)
utils.calc_all_window_metrics(nsrdb_ornl.df,
                              3,
                              meas_col='GHI',
                              model_col='Clearsky GHI pvlib',
                              overwrite=True)

# In[20]:
ground3 = cs_detection.ClearskyDetection(ground2.df)
ground3.trim_dates('07-01-2011', '07-15-2011')

# In[25]:

vis = visualize.Visualizer()
vis.add_line_ser(ground3.df['GHI'], 'Grnd GHI')
vis.add_line_ser(ground3.df['Clearsky GHI pvlib'], 'Grnd GHIcs')
vis.add_circle_ser(ground3.df[ground3.df['sky_status']]['GHI'], 'Clear')
vis.show()

# In[26]:

utils.calc_all_window_metrics(ground2.df,
                              11,
                              'GHI',
                              'Clearsky GHI pvlib',
                              overwrite=True)

# In[27]:

ground2_train = ground2.df[ground2.df.index.minute % 30 == 0]

# In[28]:

ground2_train = ground2_train[ground2_train.index < '07-01-2011']

# In[29]:

from sklearn import tree
from sklearn import ensemble
示例#6
0
 def calc_all_metrics(self):
     """Wrapper function for utils.calc_all_window_metrics for investigating features.
     """
     utils.calc_all_window_metrics(self.df, self.window, self.meas_col, self.model_col, overwrite=True)
     self.time_from_solar_noon()
示例#7
0
# In[13]:


train.scale_model('GHI', 'Clearsky GHI pvlib', 'sky_status')


# In[14]:


clf = ensemble.RandomForestClassifier(random_state=42)


# In[15]:


utils.calc_all_window_metrics(train.df, 3, meas_col='GHI', model_col='Clearsky GHI pvlib', overwrite=True)

%load_ext line_profiler%lprun -f utils.calc_all_window_metrics utils.calc_all_window_metrics(train.df, 3, meas_col='GHI', model_col='Clearsky GHI pvlib', overwrite=True)
# In[16]:


train.df.keys()


# In[17]:


feature_cols = [
    'tfn',
#     'ghi_status',
#     'abs_ideal_ratio_diff',