async def setup_learner(): await download_file(pretrained_link, path / modelname), #await download_file(vocablink, path / vocab), #await download_file(sptokenlink, path / sptoken), #await download_file(tokenlink, path / token), #await download_file(configlink, path / config), #await download_file(l2link, path / l2) try: data_bunch = BertDataBunch(path, path, tokenizer=path, train_file=None, val_file=None, label_file='l2.csv', batch_size_per_gpu=120, max_seq_length=40, multi_gpu=False, multi_label=False, model_type='bert') learner = BertLearner.from_pretrained_model(data_bunch, pretrained_path=path, metrics=[], device='cpu', logger=None, output_dir=None, is_fp16=False) return learner except RuntimeError as e: if len(e.args) > 0 and 'CPU-only machine' in e.args[0]: print(e) message = "\n\nThis model was trained with an old version of fastai and will not work in a CPU environment.\n\nPlease update the fastai library in your training environment and export your model again.\n\nSee instructions for 'Returning to work' at https://course.fast.ai." raise RuntimeError(message) else: raise
def train(path_to_directory, model): DATA_PATH = BASE / path_to_directory OUTPUT_DIR = DATA_PATH / 'output' / model OUTPUT_DIR.mkdir(parents=True, exist_ok=True) if (model == "biobert"): tokenizer = BertTokenizer.from_pretrained(BIOBERT_PATH, do_lower_case=True) pretrained_path = BIOBERT_PATH elif (model == "bert"): tokenizer = "bert-base-uncased" pretrained_path = "bert-base-uncased" else: print("Model parameter must be either 'bert' or 'biobert'") return databunch = BertDataBunch(DATA_PATH, LABEL_PATH, tokenizer=tokenizer, train_file='train.csv', val_file='val.csv', text_col='text', label_file='labels.csv', label_col=labels, batch_size_per_gpu=10, max_seq_length=512, multi_gpu=multi_gpu, multi_label=True, model_type='bert', clear_cache=True) learner = BertLearner.from_pretrained_model( databunch, pretrained_path=pretrained_path, metrics=metrics, device=device_cuda, logger=logger, output_dir=OUTPUT_DIR, finetuned_wgts_path=None, warmup_steps=500, multi_gpu=multi_gpu, is_fp16=True, multi_label=True, logging_steps=20) if path_to_directory.split('/', 1)[1] in ['original', 'synthetic']: epochs = 20 else: epochs = 10 learner.fit( epochs=epochs, lr=6e-5, validate=True, # Evaluate the model after each epoch schedule_type="warmup_cosine") learner.save_model() return
def __init__(self): databunch = BertDataBunch('train', 'train', tokenizer='distilbert-base-uncased', train_file='train.csv', val_file='val.csv', label_file='labels.csv', text_col='text', label_col='label', batch_size_per_gpu=8, max_seq_length=512, multi_gpu=False, multi_label=False, model_type='distilbert') device_cuda = torch.device("cuda") metrics = [{'name': 'accuracy', 'function': accuracy}] logger = logging.getLogger() self.learner = BertLearner.from_pretrained_model(databunch, pretrained_path='distilbert-base-uncased', metrics=metrics, device=device_cuda, output_dir='models', warmup_steps=100, logger=logger, multi_gpu=False, is_fp16=False, # install apex to use fp16 training multi_label=False, logging_steps=0)
def train(self): databunch = BertDataBunch( self._args.data_dir, self._args.data_dir, tokenizer=self._model_path, train_file=self._args.train_file, val_file=self._args.eval_file, label_file=self._args.labels_file, text_col=self._args.text_col, label_col=self._args.label_col, batch_size_per_gpu=self._args.batch_size_per_gpu, max_seq_length=self._args.max_seq_length, multi_gpu=True, multi_label=self._args.multi_label, model_type='bert') device = torch.device('cuda') learner = BertLearner.from_pretrained_model( databunch, self._model_path, metrics=self.metrics(), device=device, logger=log, output_dir=self._output_dir, finetuned_wgts_path=None, warmup_steps=5, multi_gpu=True, is_fp16=self._args.fp16, multi_label=self._args.multi_label, logging_steps=0) learner.fit(self._args.num_train_epochs, self._args.learning_rate, validate=True) learner.validate()
def train(args): if args.is_onepanel: args.out_dir = os.path.join("/onepanel/output/",args.out_dir) if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) logger = logging.getLogger() labels = ["anger", "anticipation","disgust","fear","joy","love","optimism","pessimism","sadness","surprise","trust","neutral"] databunch = BertDataBunch(".", ".", tokenizer=args.pretrained_model, train_file='nlp_train.csv', label_file='labels.csv', val_file="nlp_valid.csv", text_col='text', label_col=labels, batch_size_per_gpu=args.batch_size, max_seq_length=512, multi_gpu=False, multi_label=True, model_type='bert') device_cuda = torch.device("cuda") metrics = [{'name': 'accuracy', 'function': accuracy}] learner = BertLearner.from_pretrained_model( databunch, pretrained_path=args.pretrained_model, metrics=metrics, device=device_cuda, logger=logger, output_dir=args.out_dir, finetuned_wgts_path=None, warmup_steps=200, multi_gpu=False, is_fp16=False, multi_label=True, logging_steps=10) learner.fit(epochs=args.epochs, lr=2e-3, schedule_type="warmup_cosine_hard_restarts", optimizer_type="lamb") # validate=True) learner.save_model()
model_type='bert') device_cuda = torch.device('cuda') metrics = [] metrics.append({'name': 'accuracy', 'function': accuracy}) metrics.append({'name': 'F1_macro', 'function': F1_macro}) metrics.append({'name': 'F1_micro', 'function': F1_micro}) learner = BertLearner.from_pretrained_model( databunch, pretrained_path='bert-base-german-dbmdz-cased', metrics=metrics, device=device_cuda, logger=logger, output_dir='./output/', finetuned_wgts_path=None, warmup_steps=500, multi_gpu=False, is_fp16=True, multi_label=False, logging_steps=500) learner.fit( epochs=3, lr=6e-4, validate=True, # Evaluate the model after each epoch schedule_type="warmup_cosine", optimizer_type="lamb") learner.validate()
multi_label=False, model_type='bert') # Choose the metrics used for the error function in training metrics = [] metrics.append({'name': 'accuracy', 'function': accuracy}) import logging logger = logging.getLogger() OUTPUT_DIR = "/content/drive/My Drive/Colab Notebooks/output" # The learner contains the logic for training loop, validation loop, # optimiser strategies and key metrics calculation learner = BertLearner.from_pretrained_model(databunch, bert_model, metrics=metrics, device=device, logger=logger, output_dir=OUTPUT_DIR, finetuned_wgts_path=None, is_fp16=args['fp16'], loss_scale=args['loss_scale'], multi_gpu=multi_gpu, multi_label=False) # Train the model learner.fit(6, lr=args['learning_rate'], schedule_type="warmup_cosine") # Save the model into a file learner.save_and_reload(MODEL_PATH, "trained_model_name")
train_file='train.csv', val_file='valid.csv', # val.csv label_file='labels.csv', text_col='content', label_col=label_cols, batch_size_per_gpu=2, max_seq_length=512, multi_gpu=True, multi_label=True, model_type='bert') from fast_bert.metrics import accuracy_multilabel from fast_bert.learner_cls import BertLearner metrics = [{'name': 'accuracy', 'function': accuracy_multilabel}] learner = BertLearner.from_pretrained_model( databunch, pretrained_path='bert-base-uncased', metrics=metrics, device=device_cuda, logger=logger, output_dir=r"./output_dir", is_fp16=True, multi_gpu=True, multi_label=True) learner.fit(6, lr=6e-5, validate=True, schedule_type="warmup_linear", optimizer_type="lamb") learner.save_model()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--data_dir", default=None, type=str, required=True, help="Path specifying the location of the dataset") parser.add_argument( "--label_dir", default=None, type=str, required=True, help="Path specifying the location of the labels.csv file") parser.add_argument( "--output_dir", default=None, required=True, type=str, help="Path specifying the location to save the results") parser.add_argument("--text_col", default=None, required=True, type=str, help="The column name of the text") parser.add_argument("--batch_size", default=16, required=False, type=int, help="Batch size per GPU") parser.add_argument( "--max_seq_len", default=320, required=False, type=int, help="Maximum length of the token sequence to input to BERT") parser.add_argument("--multi_gpu", default=False, required=False, type=bool, help="Whether to use multi-gpu for training") parser.add_argument("--epochs", default=6, type=int, required=False, help="Number of epochs to train") parser.add_argument("--lr", default=6e-5, type=float, required=False, help="Initial learning rate for training") parser.add_argument("--save_model", required=False, default=None, help="Whether to save the model or not") parser.add_argument("--eval", required=False, type=bool, default=True, help="Whether to run evaluation after each epoch") args = parser.parse_args() DATA_PATH = args.data_dir LABEL_PATH = args.label_dir OUTPUT_PATH = args.output_dir EPOCHS = args.epochs LR = args.lr EVAL = args.eval TEXT_COL = args.text_col BATCH_SIZE = args.batch_size MAX_SEQ_LEN = args.max_seq_len MULTI_GPU = args.multi_gpu labels = pd.read_csv(os.path.join(DATA_PATH, 'labels.csv'), header=None).values LABEL_LIST = [val[0] for val in labels] databunch = BertDataBunch(DATA_PATH, LABEL_PATH, tokenizer='bert-base-uncased', train_file='m_aspect_train.csv', val_file='m_aspect_test.csv', label_file='labels.csv', text_col=TEXT_COL, label_col=LABEL_LIST, batch_size_per_gpu=BATCH_SIZE, max_seq_length=MAX_SEQ_LEN, multi_gpu=MULTI_GPU, multi_label=True, model_type='bert', no_cache=True) # display(databunch.get_dl_from_texts) device_cuda = torch.device("cuda") metrics = [{'name': 'accuracy', 'function': accuracy_multilabel}] learner = BertLearner.from_pretrained_model( databunch, pretrained_path='bert-base-uncased', metrics=metrics, device=device_cuda, logger=None, output_dir=OUTPUT_PATH, finetuned_wgts_path=None, warmup_steps=500, multi_gpu=MULTI_GPU, is_fp16=False, multi_label=True, logging_steps=50) global_step, loss = learner.fit( epochs=EPOCHS, lr=LR, validate=EVAL, # Evaluate the model after each epoch schedule_type="warmup_cosine", optimizer_type="lamb") print("global_Step:", global_step, "loss:", loss) if args.save_model: learner.save_model()
batch_size_per_gpu=64, max_seq_length=50, multi_gpu=False, multi_label=False, model_type='roberta') logger = logging.getLogger() device_cuda = torch.device("cuda") metrics = [{'name': 'accuracy', 'function': accuracy}] learner = BertLearner.from_pretrained_model(databunch, pretrained_path='roberta-base', metrics=metrics, device=device_cuda, logger=logger, output_dir=OUTPUT_DIR, finetuned_wgts_path=None, warmup_steps=500, multi_gpu=False, is_fp16=False, multi_label=False, logging_steps=4000) #learner.lr_find(start_lr=1e-5,optimizer_type='lamb') learner.fit( epochs=3, lr=6e-5, validate=True, # Evaluate the model after each epoch schedule_type="warmup_cosine", optimizer_type="lamb")
max_seq_length=15, multi_gpu=False, multi_label=False, model_type='bert') OUTPUT_DIR = '../../bert_model/' logger = logging.getLogger() device_cuda = torch.device("cuda") metrics = [{'name': 'accuracy', 'function': accuracy}] learner = BertLearner.from_pretrained_model(databunch, pretrained_path=PRETRAINED_PATH, metrics=metrics, device=device_cuda, logger=logger, output_dir=OUTPUT_DIR, finetuned_wgts_path=None, warmup_steps=10000, multi_gpu=False, is_fp16=True, multi_label=False, logging_steps=0) for i in range(3): try: learner.fit( epochs=1, lr=3e-4, validate=True, # Evaluate the model after each epoch schedule_type="warmup_cosine", optimizer_type="lamb")
import torch from fast_bert.learner_cls import BertLearner from fast_bert.metrics import accuracy import logging logger = logging.getLogger() device_cuda = torch.device("cuda") metrics = [{'name': 'accuracy', 'function': accuracy}] OUTPUT_DIR = '/hdd/user4/xlnet_classfication3/output' learner = BertLearner.from_pretrained_model( databunch, pretrained_path='xlnet-base-cased', metrics=metrics, device=device_cuda, logger=logger, output_dir=OUTPUT_DIR, finetuned_wgts_path=None, # finetuned_wgts_path = '/hdd/user4/xlnet_classification3/output/model_out/pytorch_model.bin', warmup_steps=500, multi_gpu=False, is_fp16=False, multi_label=False, logging_steps=250) learner.fit( epochs=10000, lr=6e-4, #default = 6e-5 validate=True, # Evaluate the model after each epoch schedule_type="warmup_cosine", optimizer_type="lamb") # learner.save_model()
multi_gpu=multi_gpu, multi_label=False, model_type='bert') metrics = [] metrics.append({'name': 'accuracy', 'function': accuracy}) metrics.append({'name': 'roc_auc', 'function': roc_auc_2}) metrics.append({'name': 'fbeta', 'function': fbeta_2}) learner = BertLearner.from_pretrained_model( databunch, pretrained_path='bert-base-uncased', metrics=metrics, device=device, logger=logger, output_dir=OUTPUT_PATH, finetuned_wgts_path=None, warmup_steps=500, multi_gpu=multi_gpu, is_fp16=False, multi_label=False, logging_steps=-1) learner.fit( epochs=4, lr=6e-5, validate=True, # Evaluate the model after each epoch schedule_type="warmup_cosine", optimizer_type="lamb") learner.save_model()
metrics.append({'name': 'roc_auc', 'function': roc_auc}) # metrics.append({'name': 'roc_auc_save_to_plot', 'function': roc_auc_save_to_plot}) metrics.append({'name': 'fbeta', 'function': fbeta}) metrics.append({'name': 'accuracy', 'function': accuracy}) metrics.append({ 'name': 'accuracy_multilabel', 'function': accuracy_multilabel }) learner = BertLearner.from_pretrained_model( databunch, pretrained_path='/scratch/da2734/twitter/mturk_mar6/output_100/model_out/', metrics=metrics, device=device, logger=logger, output_dir=args.output_dir, finetuned_wgts_path=FINETUNED_PATH, warmup_steps=args.warmup_steps, multi_gpu=args.multi_gpu, is_fp16=args.fp16, multi_label=True, logging_steps=0) print('time taken to load all this stuff:', str(time.time() - start_time), 'seconds') # In[ ]: import time import pyarrow.parquet as pq from glob import glob
label_col="label", batch_size_per_gpu=32, max_seq_length=128, multi_gpu=False, multi_label=False, model_type="bert", ) learner = BertLearner.from_pretrained_model( databunch, pretrained_path=pretrained_path, metrics=metrics, device=device_cuda, logger=logger, output_dir=out_path, finetuned_wgts_path=None, warmup_steps=200, multi_gpu=False, is_fp16=True, fp16_opt_level="O2", multi_label=False, logging_steps=100, ) learner.fit( epochs=epochs, lr=lr, validate=True, schedule_type="warmup_cosine", optimizer_type="lamb", )
def train_bert(experiment_parameters, args): # logging run_start_time = datetime.datetime.today().strftime('%Y-%m-%d_%H-%M-%S') logfile = str(experiment_parameters.LOG_PATH/'log-{}-{}.txt'.format(run_start_time, args["run_text"])) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', handlers=[ logging.FileHandler(logfile), logging.StreamHandler(sys.stdout) ]) logger = logging.getLogger() # cuda device = torch.device('cuda') if torch.cuda.device_count() > 1: args.multi_gpu = True else: args.multi_gpu = False print() print('BERT training file: ',args['data_dir'],'train.csv') # create a fast-bert-specific data format torch.manual_seed(args.seed) databunch = BertDataBunch(args['data_dir'], experiment_parameters.LABEL_PATH, experiment_parameters.tokenizer, train_file='train.csv', val_file=None,#'test.csv', test_data='test.csv', text_col="comment_text", label_col=experiment_parameters.LABEL_COLS, batch_size_per_gpu=args['train_batch_size'], max_seq_length=args['max_seq_length'], multi_gpu=args.multi_gpu, multi_label=True, model_type=args.model_type, clear_cache=False) metrics = [] metrics.append({'name': 'accuracy_thresh', 'function': accuracy_thresh}) metrics.append({'name': 'roc_auc', 'function': roc_auc}) metrics.append({'name': 'fbeta', 'function': fbeta}) # create learner object learner = BertLearner.from_pretrained_model(databunch, args.model_name, metrics=metrics, device=device, logger=logger, output_dir=args.output_dir, finetuned_wgts_path=experiment_parameters.FINETUNED_PATH, warmup_steps=args.warmup_steps, multi_gpu=args.multi_gpu, is_fp16=args.fp16, multi_label=True, logging_steps=0) # train torch.manual_seed(args.seed) learner.fit(args.num_train_epochs, args.learning_rate, validate=False) # save learner.save_model() # free memory and exit del learner return
multi_gpu=True, multi_label=True, backend='nccl', model_type='bert') metrics = [] metrics.append({'name': 'accuracy_thresh', 'function': accuracy_thresh}) metrics.append({'name': 'roc_auc', 'function': roc_auc}) metrics.append({'name': 'fbeta', 'function': fbeta}) device_cuda = torch.device("cuda") logger = logging.getLogger() learner = BertLearner.from_pretrained_model( dataBunch=databunch, pretrained_path='./chinese_roberta_wwm_large_ext_pytorch', metrics=metrics, device=device_cuda, logger=logger, output_dir='./Data/loan/data/model/keda', multi_label=True) learner.fit( epochs=6, lr=3e-5, validate=False, # Evaluate the model after each epoch schedule_type="warmup_cosine", optimizer_type="lamb") learner.save_model() text_list = list(pd.read_csv('./Data/loan/new_data/test.csv')['text'].values) output = learner.predict_batch(text_list)
def create_model(columnm, epoch): if not os.path.exists( '/scratch/da2734/twitter/jobs/running_on_200Msamples/logs/log_binary_pos_neg_{}/' .format(column)): os.makedirs( '/scratch/da2734/twitter/jobs/running_on_200Msamples/logs/log_binary_pos_neg_{}/' .format(column)) LOG_PATH = Path( '/scratch/da2734/twitter/jobs/running_on_200Msamples/logs/log_binary_pos_neg_{}/' .format(column)) print('LOG_PATH', LOG_PATH) DATA_PATH = Path( '/scratch/da2734/twitter/data/may20_9Klabels/data_binary_pos_neg_balanced/' ) LABEL_PATH = Path( '/scratch/da2734/twitter/data/may20_9Klabels/data_binary_pos_neg_balanced/' ) OUTPUT_PATH = Path( '/scratch/da2734/twitter/jobs/training_binary/models_may20_9Klabels/output_{}' .format(column)) FINETUNED_PATH = None args = Box({ "run_text": "100Msamples", "train_size": -1, "val_size": -1, "log_path": LOG_PATH, "full_data_dir": DATA_PATH, "data_dir": DATA_PATH, "task_name": "labor_market_classification", "no_cuda": False, # "bert_model": BERT_PRETRAINED_PATH, "output_dir": OUTPUT_PATH, "max_seq_length": 512, "do_train": True, "do_eval": True, "do_lower_case": True, "train_batch_size": 8, "eval_batch_size": 16, "learning_rate": 5e-5, "num_train_epochs": 100, "warmup_proportion": 0.0, "no_cuda": False, "local_rank": -1, "seed": 42, "gradient_accumulation_steps": 1, "optimize_on_cpu": False, "fp16": False, "fp16_opt_level": "O1", "weight_decay": 0.0, "adam_epsilon": 1e-8, "max_grad_norm": 1.0, "max_steps": -1, "warmup_steps": 500, "logging_steps": 50, "eval_all_checkpoints": True, "overwrite_output_dir": True, "overwrite_cache": True, "seed": 42, "loss_scale": 128, "task_name": 'intent', "model_name": 'bert-base-uncased', "model_type": 'bert' }) import logging logfile = str(LOG_PATH / 'log-{}-{}.txt'.format(run_start_time, args["run_text"])) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', handlers=[ logging.FileHandler(logfile), logging.StreamHandler(sys.stdout) ]) logger = logging.getLogger() logger.info(args) device = torch.device('cuda') if torch.cuda.device_count() > 1: args.multi_gpu = True else: args.multi_gpu = False label_cols = ['class'] databunch = BertDataBunch( args['data_dir'], LABEL_PATH, args.model_name, train_file='train_{}.csv'.format(column), val_file='val_{}.csv'.format(column), label_file='label_{}.csv'.format(column), # test_data='test.csv', text_col= "text", # this is the name of the column in the train file that containts the tweet text label_col=label_cols, batch_size_per_gpu=args['train_batch_size'], max_seq_length=args['max_seq_length'], multi_gpu=args.multi_gpu, multi_label=False, model_type=args.model_type) num_labels = len(databunch.labels) print('num_labels', num_labels) print('time taken to load all this stuff:', str(time.time() - start_time), 'seconds') # metrics defined: https://github.com/kaushaltrivedi/fast-bert/blob/d89e2aa01d948d6d3cdea7ad106bf5792fea7dfa/fast_bert/metrics.py metrics = [] # metrics.append({'name': 'accuracy_thresh', 'function': accuracy_thresh}) # metrics.append({'name': 'roc_auc', 'function': roc_auc}) # metrics.append({'name': 'fbeta', 'function': fbeta}) metrics.append({'name': 'accuracy', 'function': accuracy}) metrics.append({ 'name': 'roc_auc_save_to_plot_binary', 'function': roc_auc_save_to_plot_binary }) # metrics.append({'name': 'accuracy_multilabel', 'function': accuracy_multilabel}) learner = BertLearner.from_pretrained_model( databunch, pretrained_path= '/scratch/da2734/twitter/jobs/training_binary/models_may20_9Klabels/output_{}/model_out_{}/' .format(column, epoch), metrics=metrics, device=device, logger=logger, output_dir=args.output_dir, finetuned_wgts_path=FINETUNED_PATH, warmup_steps=args.warmup_steps, multi_gpu=args.multi_gpu, is_fp16=args.fp16, multi_label=False, logging_steps=0) return learner
"is_relevant", ], batch_size_per_gpu=1, max_seq_length=2, multi_gpu=False, multi_label=True, model_type="bert", ) learner = BertLearner.from_pretrained_model( databunch, pretrained_path="bert-base-uncased", metrics=metrics, device=device_cuda, logger=logger, output_dir=OUTPUT_DIR, finetuned_wgts_path=None, multi_gpu=False, is_fp16=False, multi_label=True, logging_steps=50, ) learner.fit( epochs=6, lr=6e-5, # validate=True, # Evaluate the model after each epoch # # schedule_type="warmup_cosine", optimizer_type="adamW", ) learner.save_model()
batch_size_per_gpu=args['train_batch_size'], max_seq_length=args['max_seq_length'], multi_gpu=args.multi_gpu, multi_label=True, model_type=args.model_type) print(databunch.train_dl.dataset[0][3]) num_labels = len(databunch.labels) print(num_labels) metrics = [] metrics.append({'name': 'accuracy_thresh', 'function': accuracy_thresh}) metrics.append({'name': 'roc_auc', 'function': roc_auc}) metrics.append({'name': 'fbeta', 'function': fbeta}) learner = BertLearner.from_pretrained_model(databunch, args.model_name, metrics=metrics, device=device, logger=logger, output_dir=args.output_dir, finetuned_wgts_path=FINETUNED_PATH, warmup_steps=args.warmup_steps, multi_gpu=args.multi_gpu, is_fp16=args.fp16, multi_label=True, logging_steps=0) learner.validate() learner.save_model()
def train_fast_bert(): MAX_LEN = 512 # previous model was 300 text_col = 'script' label_col = [ 'Action', 'Adventure', 'Comedy', 'Crime', 'Drama', 'Fantasy', 'Horror', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller' ] DATA_PATH = Path('./data/') LABEL_PATH = DATA_PATH train_file = 'fast_train_' + str(MAX_LEN) + '.csv' val_file = 'fast_val_' + str(MAX_LEN) + '.csv' goodtogo = check_fastBert_data(MAX_LEN) if not goodtogo: die() MODEL_NAME = 'bert-base-uncased' databunch = BertDataBunch(DATA_PATH, LABEL_PATH, tokenizer=MODEL_NAME, train_file=train_file, val_file=val_file, label_file='fast_labels.csv', text_col=text_col, label_col=label_col, batch_size_per_gpu=16, max_seq_length=MAX_LEN, multi_gpu=False, multi_label=True, model_type='bert') # **NOTE** remember to change `usePretrained` to True if we've already have a fine-tuned model def my_accuracy_thresh( y_pred: Tensor, y_true: Tensor, thresh: float = 0.7, sigmoid: bool = False, ): "Compute accuracy when `y_pred` and `y_true` are the same size." if sigmoid: y_pred = y_pred.sigmoid() return ((y_pred > thresh) == y_true.bool()).float().mean().item() logging.basicConfig(level=logging.NOTSET) logger = logging.getLogger() device_cuda = torch.device("cuda") metrics = [{'name': 'accuracy_thresh', 'function': my_accuracy_thresh}] OUTPUTDIR = Path('./models/') MODEL_PATH = OUTPUTDIR / 'model_out_bert_cased' usePretrained = False if usePretrained: pretrained_path = MODEL_PATH else: pretrained_path = 'bert-base-uncased' # Setting up apex properly on Colab required dowgrading Torch version (check first block of notebook for details) learner = BertLearner.from_pretrained_model( databunch, pretrained_path=usePretrained, #MODEL_PATH #(to use saved model) metrics=metrics, device=device_cuda, logger=logger, output_dir=OUTPUTDIR, finetuned_wgts_path=None, warmup_steps=500, multi_gpu=False, is_fp16=False, # need apex setup properly for this (note above) multi_label=True, logging_steps=50) learner.fit( epochs=5, lr=6e-4, validate=True, # Evaluate the model after each epoch schedule_type="warmup_cosine", optimizer_type="lamb")