MAX_LEN = 16670 genes, labels = read_non_split_file( '/home/brian/Downloads/all_samples_6-mer_train.txt') seq_ids, masks, labels = tokenize_and_pad_samples(genes, labels) print(seq_ids[0]) print(len(seq_ids)) print("Finished making data") batch_size = 1 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = BertForTokenClassification( BertConfig.from_json_file( '/home/brian/attentive_splice/bert_configuration_all_hex.json')) model.resize_token_embeddings(4099) model.to(device) optimizer = Adam(model.parameters(), lr=1e-3) #lr=3e-5) class_weights = torch.tensor(np.array([1.0, 165.0])).float().cuda() loss = CrossEntropyLoss(weight=class_weights) last_i = 0 def load_model_from_saved(): with open('/home/brian/bert_last_i.txt', 'r') as last_i_file: i = last_i_file.read() last_i = int(i) model.load_state_dict(torch.load("/home/brian/bert_splice_weights.pt")) def save_weights():
batch_size = 1 MAX_LEN = 1002 tokenizer, formatted_hexamers, attention_masks, labels = process_data( '/home/brian/Downloads/split_samples_6-mer_train.txt') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() torch.cuda.get_device_name(0) model = BertForTokenClassification( BertConfig.from_json_file( '/home/brian/attentive_splice/bert_configuration_split_hex.json')) model.resize_token_embeddings(len(tokenizer)) model.to(device) print(model.bert.config) #model.load_state_dict(torch.load("/home/brian/bert_splice_weights.pt")) last_i = 0 #with open('/home/brian/bert_last_i.txt', 'r') as last_i_file: # i = last_i_file.read() # last_i = int(i) from sklearn.metrics import precision_recall_curve import matplotlib.pyplot as plt from inspect import signature from sklearn.metrics import precision_recall_curve from sklearn.metrics import f1_score from sklearn.metrics import auc