Exemplo n.º 1
0
def construct_input_fns(dataset, data_dir, batch_size=16, repeat=1):
  """Construct train and test input functions, as well as the column fn."""
  if _check_buffers(data_dir, dataset):
    train_df, eval_df = None, None
  else:
    df = movielens.csv_to_joint_dataframe(dataset=dataset, data_dir=data_dir)
    df = movielens.integerize_genres(dataframe=df)
    df = df.drop(columns=[movielens.TITLE_COLUMN])

    train_df = df.sample(frac=0.8, random_state=0)
    eval_df = df.drop(train_df.index)

    train_df = train_df.reset_index(drop=True)
    eval_df = eval_df.reset_index(drop=True)

  train_input_fn = _df_to_input_fn(
      df=train_df, name="train", dataset=dataset, data_dir=data_dir,
      batch_size=batch_size, repeat=repeat,
      shuffle=movielens.NUM_RATINGS[dataset])
  eval_input_fn = _df_to_input_fn(
      df=eval_df, name="eval", dataset=dataset, data_dir=data_dir,
      batch_size=batch_size, repeat=repeat, shuffle=None)
  model_column_fn = functools.partial(build_model_columns, dataset=dataset)

  train_input_fn()
  return train_input_fn, eval_input_fn, model_column_fn
Exemplo n.º 2
0
def construct_input_fns(dataset, data_dir, batch_size=16, repeat=1):
    """Construct train and test input functions, as well as the column fn."""
    if _check_buffers(data_dir, dataset):
        train_df, eval_df = None, None
    else:
        df = movielens.csv_to_joint_dataframe(dataset=dataset,
                                              data_dir=data_dir)
        df = movielens.integerize_genres(dataframe=df)
        df = df.drop(columns=[movielens.TITLE_COLUMN])

        train_df = df.sample(frac=0.8, random_state=0)
        eval_df = df.drop(train_df.index)

        train_df = train_df.reset_index(drop=True)
        eval_df = eval_df.reset_index(drop=True)

    train_input_fn = _df_to_input_fn(df=train_df,
                                     name="train",
                                     dataset=dataset,
                                     data_dir=data_dir,
                                     batch_size=batch_size,
                                     repeat=repeat,
                                     shuffle=movielens.NUM_RATINGS[dataset])
    eval_input_fn = _df_to_input_fn(df=eval_df,
                                    name="eval",
                                    dataset=dataset,
                                    data_dir=data_dir,
                                    batch_size=batch_size,
                                    repeat=repeat,
                                    shuffle=None)
    model_column_fn = functools.partial(build_model_columns, dataset=dataset)

    train_input_fn()
    return train_input_fn, eval_input_fn, model_column_fn