def arima_ts(df): sc = SparkContext.getOrCreate() train = df.filter(df['date'].between('2013-01-01', '2014-11-01')) test = df.filter(df['date'].between('2014-11-01', '2015-05-01')) tr = numpy.array(train.select("sales").collect()).flatten() te = numpy.array(test.select("sales").collect()).flatten() nte = len(te) #model = autofit(Vectors.dense(tr), sc=sc) model = fit_model(p=0, d=1, q=0, ts=Vectors.dense(tr), sc=sc) prev = model.forecast(Vectors.dense(tr), nte) x = _java2py(sc, prev)[len(tr):] #print("ARIMA spark-ts R2: ", r2_score(te, x)) test = test.toPandas() test = test.set_index('date') df = df.toPandas() df = df.set_index('date') x = pd.DataFrame(x, index=test.index, columns=['prediction']) pd.concat([test, x], axis=1).plot() pd.concat([df, x], axis=1).plot() return r2_score(te, x)
def test_compare_with_r_with_userparams(self): data = data_file_as_nparray('resources/R_ARIMA_DataSet1.csv') model = fit_model(1, 0, 1, data, userInitParams=[0.0, 0.2, 1.0], sc=self.sc) (c, ar, ma) = model.coefficients self.assertAlmostEqual(ar, 0.55, delta=0.01) self.assertAlmostEqual(ma, 1.03, delta=0.01)
def test_remodel_sample_data(self): """ Data sampled from a given model should result in a similar model if fit again. """ model = ARIMAModel(2, 1, 2, [8.2, 0.2, 0.5, 0.3, 0.1], sc=self.sc) sampled = model.sample(1000) newModel = fit_model(2, 1, 2, sampled, sc=self.sc) (c, ar1, ar2, ma1, ma2) = model.coefficients (cTest, ar1Test, ar2Test, ma1Test, ma2Test) = newModel.coefficients self.assertAlmostEqual(c, cTest, delta=1) self.assertAlmostEqual(ar1, ar1Test, delta=0.1) self.assertAlmostEqual(ar2, ar2Test, delta=0.1) self.assertAlmostEqual(ma1, ma1Test, delta=0.1) self.assertAlmostEqual(ma2, ma2Test, delta=0.1)
def test_compare_with_r(self): data = data_file_as_nparray('resources/R_ARIMA_DataSet1.csv') model = fit_model(1, 0, 1, data, sc=self.sc) (c, ar, ma) = model.coefficients self.assertAlmostEqual(ar, 0.3, delta=0.01) self.assertAlmostEqual(ma, 0.7, delta=0.01)