示例#1
0
class TestDataHandler(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestDataHandler, self).__init__(*args, **kwargs)
        self.dh = DataHandler(data_path=root.joinpath("data"))

    def test_get_data(self):
        all_data = self.dh.get_data()
        self.assertIsInstance(all_data, dict)
        for proj, datasets in all_data.items():
            self.assertIsInstance(proj, str)
            self.assertIsInstance(datasets, dict)
            for key, value in datasets.items():
                self.assertIsInstance(key, str)
                self.assertIsInstance(value, pd.core.frame.DataFrame)
示例#2
0
    def bug_count(self):

        data_handler = DataHandler(data_path=root.joinpath('data'))
        files = data_handler.get_data()
        all_results = dict()
        for proj, data in files.items():
            col_name = ['Date', 'Actual', 'ARIMA', 'NAIVE']
            results = []
            actual = 0
            for train, test in self.moving_window(data, frame=24):
                try:
                    p, d, q = 4, 1, 4

                    # if not self.is_stationary(train):
                    #     train = self.detrend_series(train)

                    arima = ARIMA(train, order=(p, d, q), freq='W-MON')
                    arima_fit = arima.fit(disp=0)

                    # Find start and end time stamps
                    start, end = test.index[0], test.index[-1]

                    # Save date, actual, and forecast
                    prev_actual = actual
                    actual = test.values.ravel()[0]

                    forecast_arima = int(abs(arima_fit.forecast()[0]))
                    forecast_naive = prev_actual
                    date = test.index.strftime("%Y-%m-%d").values[0]
                    results.append(
                        [date, actual, forecast_arima, forecast_naive])
                except:
                    X = np.arange(len(train.values) + 1)
                    X = np.reshape(X, (len(X), 1))
                    y = train.values
                    model = LinearRegression()
                    model.fit(X[:-1], y)
                    prev_actual = actual
                    actual = test.values.ravel()[0]
                    forecast_arima = int(
                        abs(model.predict(X[-1].reshape(1, -1))[0]))
                    forecast_naive = prev_actual
                    date = test.index.strftime("%Y-%m-%d").values[0]
                    results.append(
                        [date, actual, forecast_arima, forecast_naive])

            results = pd.DataFrame(results, columns=col_name).set_index('Date')
            results.to_csv(root.joinpath('results', proj + ".csv"))

        return all_results
    root = root.parent

if root not in sys.path:
    sys.path.append(root)

from metrics.abcd import ABCD
from data.data_handler import DataHandler
from prediction.model import PredictionModel

import warnings

warnings.filterwarnings("ignore")

if __name__ == "__main__":
    dh = DataHandler()
    data = dh.get_data(top_k=1)
    for _, val in data.items():
        data = val

    X = data[data.columns[:-1]]
    y = data[data.columns[-1]]
    # lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, y)
    # model = SelectFromModel(lsvc, prefit=True)
    # X = model.transform(X)
    pca = PCA(n_components=3)
    pca.fit(X)
    X = pca.transform(X)
    colors = ['navy', 'darkorange']

    for X_transformed, title in [(X, "PCA")]:
        plt.figure(figsize=(8, 8))
示例#4
0
    root = root.parent

if root not in sys.path:
    sys.path.append(root)

from metrics.abcd import ABCD
from data.data_handler import DataHandler
from prediction.model import PredictionModel

import warnings
warnings.filterwarnings("ignore")

if __name__ == "__main__":
    dh = DataHandler()
    mdl = PredictionModel()
    data = dh.get_data(top_k=25)

    # Create a Table than can pretty printed
    # --------------------------------------
    results = PrettyTable()
    results.field_names = ["Project", "    G", "   Pd", "   Pf", "   F1",
                           " Prec", "  IFA", "PCI20"]

    # Align Data
    # ----------
    results.align["Project"] = "l"
    results.align["    G"] = "r"
    results.align["   Pd"] = "r"
    results.align["   Pf"] = "r"
    results.align["   F1"] = "r"
    results.align["  IFA"] = "r"
示例#5
0
from pathlib import Path

root = Path(os.path.abspath(os.path.join(os.getcwd().split("src")[0], 'src')))

if root not in sys.path:
    sys.path.append(str(root))

from metrics.abcd import ABCD
from data.data_handler import DataHandler
from prediction.model import PredictionModel

if __name__ == "__main__":
    dh = DataHandler()
    mdl = PredictionModel()
    data = dh.get_data()

    "Create a Table than can pretty printed"
    results = PrettyTable()
    results.field_names = ["Train", "Test ", "   Pd", "   Pf", "   F1"]

    "Align Data"
    results.align["Train"] = "l"
    results.align["Test "] = "l"
    results.align["   Pd"] = "r"
    results.align["   Pf"] = "r"
    results.align["   F1"] = "r"

    for proj, dataset in data.items():
        dataset_keys = sorted(dataset.keys())
        for trn, tst in zip(dataset_keys[:-1], dataset_keys[1:]):