from preprocessing.reader import DatasetReader from utils.fileprovider import FileProvider logging.getLogger().setLevel(logging.INFO) if __name__ == '__main__': """##### Parameter parsing""" parser = argparse.ArgumentParser( description= 'A baseline based on returning the most common emoji given the user') parser.add_argument('--workdir', required=True, help='Work path') args = parser.parse_args() files = FileProvider(args.workdir) Y = [] X = [] users = {} dictionary = {} with open(files.evalita, 'r', encoding="utf-8") as reader: for line in reader: line = line.rstrip() sample = json.loads(line) uid = sample["uid"] label = sample["label"] if uid not in users: users[uid] = {}
choices=["train", "userdata"], help="Use user history to assist prediction") parser.add_argument("--n-folds", type=int, default=10, help="Use user history to assist prediction") parser.add_argument("--gpu", type=int, default=0, help="GPU ID to be used [0, 1, -1]") args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(args.gpu) files = FileProvider(args.workdir) logging.info("Starting training with parameters: {0}".format(vars(args))) assert path.exists(files.evalita), "Unable to find {}".format( files.evalita) raw_train = EvalitaDatasetReader(files.evalita) random_state = 42 raw_train, raw_test = raw_train.split(test_size=0.1, random_state=random_state) raw_real_test = EvalitaDatasetReader(files.evalita_real_test) logging.info("Populating user history") user_data = None if args.use_history: