Exemplo n.º 1
0
def read_data_sets_cf(user=None,
                      need_labels=False,
                      train_size=80,
                      validation_size=10,
                      test_size=10):
    """
    1. Get all the user ids
    2. For each user get their events
    3. Call the parser
    4. Return the results
    """
    assert (train_size + validation_size + test_size) == 100
    # Get the historical meetings of the user
    db_client = CalendarDBClient()
    events = db_client.list_all_events(user)
    # Get the training objects saved using the system
    training_objs = Training.objects.filter(user_id=user)
    # Obtain a representation of the data
    data, labels = parse_dataset_cf(events, training_objs)

    t_max = len(data) * train_size / 100
    v_max = len(data) * validation_size / 100

    data_sets = DataSets()
    data_sets.train = DataSet(data[:t_max],
                              labels[:t_max] if need_labels else [],
                              need_labels)
    data_sets.validation = DataSet(data[t_max:v_max],
                                   data[t_max:v_max] if need_labels else [],
                                   need_labels)
    data_sets.test = DataSet(data[v_max:],
                             labels[v_max:] if need_labels else [],
                             need_labels)
    # , Do the math to return the total number of items
    return data_sets, len(users)
Exemplo n.º 2
0
def read_data_sets(user,
                   need_labels=False,
                   train_size=100,
                   validation_size=0,
                   test_size=0):
    assert (train_size + validation_size + test_size) == 100
    # Get the historical meetings of the user
    db_client = CalendarDBClient()
    events = db_client.list_all_events(user)
    # Get the training objects saved using the system
    training_objs = Training.objects.filter(user_id=user)
    # Obtain a representation of the data
    data, labels = parse_dataset(events, training_objs, need_labels)

    t_max = len(data) * train_size / 100
    v_max = len(data) * validation_size / 100

    data_sets = DataSets()
    data_sets.train = DataSet(data[:t_max],
                              labels[:t_max] if need_labels else [],
                              need_labels)
    data_sets.validation = DataSet(data[t_max:v_max],
                                   data[t_max:v_max] if need_labels else [],
                                   need_labels)
    data_sets.test = DataSet(data[v_max:],
                             labels[v_max:] if need_labels else [],
                             need_labels)
    return data_sets