def __init__(self, normalize=False, balance=False, tweet_threshold=0, score=False, dump_model=True): """ Import or train the regression model """ self.model = RegressionModel() if not self.model.load(): training_set, testing_set = RegressionModel.load_datasets( balance=balance, viral_threshold=tweet_threshold) if ViralityPrediction.CLASSIFICATION == True: training_set = self.model.normaliseFeats(training_set) testing_set = self.model.normaliseFeats(testing_set) self.model.trainClassifier(training_set, normalize=normalize) if score: self.model.scoreClassifier(testing_set) else: self.model.trainRegression(training_set, normalize=normalize) if score: self.model.scoreRegression(testing_set) if dump_model: self.model.dump()
import pandas as pd from regression import RegressionModel model = RegressionModel() print(model.predict()) print(model.predict(use_macro=False)) # year_prior = row['year']-years # home_match = self.history_df.loc[self.history_df['home_id'] == row['home_id']] # row_prior = home_match.loc[home_match['year'] == year_prior] # # Break if not found # if len(home_match)<2 or len(row_prior) < 1: # break # # Create new row with home data, prev_year, prev_value, prev_value # row_prior = row_prior.drop("home_id", axis=1) # row_prior = row_prior.drop("year", axis=1) # row_prior = row_prior.values.tolist()[0] # row_prior = [row['year'], row['value']] + row_prior # # Add row to DataFrame # row_series = pd.Series(row_prior, index=pred_df.columns) # pred_df = pred_df.append(row_series, ignore_index=True)