Exemplo n.º 1
0
def main(args):
	
	ds=Dataset('pubmed_rct20k')

	Statistic(ds.documents_train,ds.labels_train)
	
	return 0
Exemplo n.º 2
0
def main(args):

    ds = Dataset('20newsgroups')
    documents = ds.documents_train
    labels = ds.labels_train
    pp = Preprocess(documents=documents, labels=labels, nb_sample=500)
    documents = pp.documents
    labels = pp.labels

    cl = Classification(documents_train=documents, labels_train=labels)
    cl.list_labels = pp.list_labels

    cl.process()

    print(cl.predict(documents[:5]))

    return 0
Exemplo n.º 3
0
def main(args):

    ds = Dataset('Short_Jokes')

    model = Model(model_name='gpt2')
    text_loader = Create_DataLoader_generation(ds.documents_train[:10000],
                                               batch_size=32)
    model.load_type()
    model.load_tokenizer()
    model.load_class()
    model.devices()
    model.configuration(text_loader)

    gn = Generation(model)

    gn.model.fit_generation(text_loader)
    output = model.predict_generation('What did you expect ?')
    output_text = decode_text(output, model.tokenizer)
    print(output_text)

    return 0
Exemplo n.º 4
0
from Manteia.Dataset import Dataset

ds = Dataset('Amazon Review Full', test=True, desc=True)

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])

print('Test : ')
print(ds.documents_test[:5])
print(ds.labels_test[:5])

print('Description :')
print(ds.description)
Exemplo n.º 5
0
from Manteia.Dataset import Dataset

ds=Dataset('pubmed_rct20k')

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])

Exemplo n.º 6
0
from Manteia.Dataset import Dataset

ds = Dataset('Yelp Review Full', test=True, desc=True)

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])

print('Test : ')
print(ds.documents_test[:5])
print(ds.labels_test[:5])

print('Description :')
print(ds.description)
Exemplo n.º 7
0
#modifier->parametre du notebook->GPU
#import nltk
#nltk.download('wordnet')
from Manteia.Classification import Classification 
from Manteia.Model import *
from Manteia.Dataset import Dataset
from Manteia.Preprocess import list_labels
from Manteia.Augmentation import *
from sklearn.model_selection import train_test_split,KFold

ds=Dataset('drugscom')
ds.documents_train=np.array(ds.documents_train[:100])
ds.labels_train=np.array(ds.labels_train[:100])

model = Model(model_name ='bert',early_stopping=True)
model.load_type()
model.load_tokenizer()
list_label=list_labels(ds.labels_train)
print(list_label)
model.num_labels=len(list_label)
model.load_class()
model.save('model_init')



#validation croisée
nb_pass=4
def coss_validation_idx(nb_pass,nb_docs):
  docs_idx = [idx for idx in range(nb_docs)]
  train_idx, test_idx = [], []
  for pli in range(nb_pass):
Exemplo n.º 8
0
from Manteia.Dataset import Dataset

ds = Dataset('20newsgroups')

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])
Exemplo n.º 9
0
from Manteia.Dataset import Dataset

ds = Dataset('Yelp Review Polarity', test=True, desc=True)

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])
print(ds.documents_test[:5])
print(ds.labels_test[:5])
print(ds.description)
Exemplo n.º 10
0
from Manteia.Dataset import Dataset

ds = Dataset('Yahoo! Answers', test=True, desc=True)

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])

print('Test : ')
print(ds.documents_test[:5])
print(ds.labels_test[:5])

print('Description :')
print(ds.description)

print('List labels :')
print(ds.list_labels)
Exemplo n.º 11
0
from Manteia.Dataset import Dataset

ds=Dataset('drugscom')

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])
Exemplo n.º 12
0
from Manteia.Dataset import Dataset

ds = Dataset('Amazon Review Polarity', test=True, desc=True)

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])
print(ds.documents_test[:5])
print(ds.labels_test[:5])
print(ds.description)
Exemplo n.º 13
0
from Manteia.Dataset import Dataset

ds=Dataset('agnews')

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])


Exemplo n.º 14
0
from Manteia.Dataset import Dataset

ds=Dataset('DBPedia',path='test/test/',test=True,desc=True,classe=True)

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])

print('Test : ')
print(ds.documents_test[:5])
print(ds.labels_test[:5])

print('Description :')
print(ds.description)

print('List labels :')
print(ds.list_labels)
Exemplo n.º 15
0
from Manteia.Dataset import Dataset

ds = Dataset('SST-5', dev=True)

print('Dev : ')
print(ds.documents_dev[:5])
print(ds.labels_dev[:5])
Exemplo n.º 16
0
from Manteia.Dataset import Dataset

ds = Dataset('Tweeter Airline Sentiment')

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])
Exemplo n.º 17
0
from Manteia.Dataset import Dataset

ds = Dataset('Short_Jokes')

print('Train : ')
print(ds.documents_train[:5])
Exemplo n.º 18
0
from Manteia.Dataset import Dataset

ds = Dataset('SST-2')

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])
Exemplo n.º 19
0
from Manteia.Dataset import Dataset

ds = Dataset('trec')

print('Train : ')
print(ds.documents_train[:5])
print(ds.labels_train[:5])