示例#1
0
from preprocessing.reader import DatasetReader
from utils.fileprovider import FileProvider

logging.getLogger().setLevel(logging.INFO)

if __name__ == '__main__':
    """##### Parameter parsing"""

    parser = argparse.ArgumentParser(
        description=
        'A baseline based on returning the most common emoji given the user')
    parser.add_argument('--workdir', required=True, help='Work path')

    args = parser.parse_args()
    files = FileProvider(args.workdir)

    Y = []
    X = []
    users = {}
    dictionary = {}

    with open(files.evalita, 'r', encoding="utf-8") as reader:
        for line in reader:
            line = line.rstrip()
            sample = json.loads(line)
            uid = sample["uid"]
            label = sample["label"]

            if uid not in users:
                users[uid] = {}
示例#2
0
                        choices=["train", "userdata"],
                        help="Use user history to assist prediction")
    parser.add_argument("--n-folds",
                        type=int,
                        default=10,
                        help="Use user history to assist prediction")
    parser.add_argument("--gpu",
                        type=int,
                        default=0,
                        help="GPU ID to be used [0, 1, -1]")

    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(args.gpu)

    files = FileProvider(args.workdir)

    logging.info("Starting training with parameters: {0}".format(vars(args)))

    assert path.exists(files.evalita), "Unable to find {}".format(
        files.evalita)

    raw_train = EvalitaDatasetReader(files.evalita)
    random_state = 42
    raw_train, raw_test = raw_train.split(test_size=0.1,
                                          random_state=random_state)
    raw_real_test = EvalitaDatasetReader(files.evalita_real_test)

    logging.info("Populating user history")
    user_data = None
    if args.use_history: