MAX_LEN = 16670
genes, labels = read_non_split_file(
    '/home/brian/Downloads/all_samples_6-mer_train.txt')
seq_ids, masks, labels = tokenize_and_pad_samples(genes, labels)
print(seq_ids[0])
print(len(seq_ids))
print("Finished making data")

batch_size = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = BertForTokenClassification(
    BertConfig.from_json_file(
        '/home/brian/attentive_splice/bert_configuration_all_hex.json'))
model.resize_token_embeddings(4099)
model.to(device)
optimizer = Adam(model.parameters(), lr=1e-3)  #lr=3e-5)
class_weights = torch.tensor(np.array([1.0, 165.0])).float().cuda()
loss = CrossEntropyLoss(weight=class_weights)
last_i = 0


def load_model_from_saved():
    with open('/home/brian/bert_last_i.txt', 'r') as last_i_file:
        i = last_i_file.read()
        last_i = int(i)
        model.load_state_dict(torch.load("/home/brian/bert_splice_weights.pt"))


def save_weights():
Пример #2
0

batch_size = 1
MAX_LEN = 1002

tokenizer, formatted_hexamers, attention_masks, labels = process_data(
    '/home/brian/Downloads/split_samples_6-mer_train.txt')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

model = BertForTokenClassification(
    BertConfig.from_json_file(
        '/home/brian/attentive_splice/bert_configuration_split_hex.json'))
model.resize_token_embeddings(len(tokenizer))
model.to(device)
print(model.bert.config)
#model.load_state_dict(torch.load("/home/brian/bert_splice_weights.pt"))
last_i = 0
#with open('/home/brian/bert_last_i.txt', 'r') as last_i_file:
#  i = last_i_file.read()
#  last_i = int(i)

from sklearn.metrics import precision_recall_curve
import matplotlib.pyplot as plt
from inspect import signature
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc