Пример #1
0
def day_offset_sampler(window=1):
    """Assuming observations from dataset are (x, y), compute the density of the y's over the next n days

    Args:
        window (int): how many days to use in the density
    Yield:
        [(x₁, y₁), (x₂, y₂), ...)]: where ∀xᵢ are part of the same day, and y is a density over the next {offset} days
    """
    buffer = deque()

    sampler = dataset_sampler(x_format='OneHot', y_format='Ordinal')

    def add_day(day):
        # indexed: [newest day][first observation][first element of train pair][date index]
        if len(buffer) and buffer[-1][0][0][0] == day[0][0]:
            buffer[-1].append(day)
        else:
            buffer.append([day])

    while len(buffer) <= window + 1:
        add_day(next(sampler))

    for observation in sampler:
        add_day(observation)

        if len(buffer) > window + 1:
            current_day = buffer.popleft()
            temp = buffer.pop(
            )  # the last day only has one element outside the window
            expected = np.mean([j[1] for i in buffer for j in i], axis=0)
            buffer.append(temp)
            yield [(record[0], expected) for record in current_day]
Пример #2
0
def run_simple():
    from models.torch_simple.train import train_simple, test_simple
    sampler = lambda: dataset_sampler(x_format='OneHot', y_format='Ordinal')

    params = {'input_size': 25, 'output_size': 4}
    train_simple(sampler, params)
    evaluate(*test_simple(sampler, params))
Пример #3
0
def run_ann():
    from models.torch_ann.train import train_ann, test_ann
    sampler = lambda: dataset_sampler(x_format='OneHot', y_format='Ordinal')

    params = {'input_size': 25, 'output_size': 4, 'layer_sizes': (100, 20)}
    train_ann(sampler, params)
    evaluate(*test_ann(sampler, params))
Пример #4
0
def day_sampler():
    """Assuming observations from dataset are (x, y), group by the day"""
    sampler = dataset_sampler(x_format='OneHot', y_format='Ordinal')
    buffer = [next(sampler)]

    for observation in sampler:
        if buffer[0][0][0] != observation[0][0]:
            yield buffer
            buffer = [observation]
        buffer.append(observation)
    yield buffer
Пример #5
0
def run_lstm():
    from models.torch_lstm.train import train_lstm, test_lstm
    sampler = lambda: dataset_sampler(x_format='OneHot', y_format='Ordinal')

    params = {
        'input_size': 25,
        'output_size': 4,
        'lstm_hidden_dim': 20,
        'lstm_layers': 2,
        'batch_size': 1
    }
    train_lstm(sampler, params)
    evaluate(*test_lstm(sampler, params))
Пример #6
0
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

model_specifications = [{
    "name":
    "Decision Tree",
    "class":
    DecisionTreeClassifier,
    "datasource":
    lambda: dataset_sampler(x_format='OneHot', y_format='Ordinal'),
    "hyperparameters": {
        "criterion": ["gini", "entropy"],
        "splitter": ["best", "random"],
        "min_samples_split": [2, 3],
        "min_samples_leaf": [1, 5]
    }
}, {
    "name":
    "Neural Network PCA 10pc",
    "class":
    MLPClassifier,
    "datasource":
    lambda: dataset_sampler(
        x_format='OneHot', y_format='Ordinal', components=10),
    "hyperparameters": {
Пример #7
0
def run_keras():
    from models.keras_ann.network import train_keras, test_keras
    sampler = lambda: dataset_sampler(x_format='OneHot', y_format='OneHot')
    trainparams = {'epochs': 200, 'batch_size': 10}
    train_keras(sampler, trainparams)
    test_keras(sampler)