Exemplo n.º 1
0
 def __init__(self,
              batch_size=32,
              hidden_layer=0,
              build_model=False,
              comparison=compare.SimMul(),
              positions=False,
              regression=False,
              trainable=False,
              siamese=False):
     self.batch_size = batch_size
     self.trainable = trainable
     self.sentence_length = 250
     self.hidden_layer = hidden_layer
     self.build_model = build_model
     #        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
     self.tokenizer = self.load_tokenizer()
     self.cleantext = lambda t: t
     self.comparison = comparison
     self.positions = positions
     self.regression_model = regression
     self.siamese = siamese
Exemplo n.º 2
0
    def __init__(self,
                 sentence_length=SENTENCE_LENGTH,
                 batch_size=128,
                 embeddings=True,
                 hidden_layer=0,
                 build_model=False,
                 comparison=compare.SimMul(),
                 positions=False,
                 regression=False,
                 siamese=False):
        if build_model:
            BasicNN.__init__(self, sentence_length, batch_size, embeddings,
                             hidden_layer, build_model)
        else:
            # Do not load embeddings if we do not need to build the model
            BasicNN.__init__(self, sentence_length, batch_size, False,
                             hidden_layer, build_model)
            self.embeddings = embeddings

        self.build_model = build_model
        self.comparison = comparison
        self.positions = positions
        self.regression_model = regression
        self.siamese = siamese
Exemplo n.º 3
0
    def __init__(self,
                 corpusFile,
                 rougeFile,
                 metric=['SU4'],
                 rouge_labels=True,
                 labels="topn",
                 labelsthreshold=5,
                 nb_epoch=3,
                 verbose=2,
                 classification_type="Bi-LSTM",
                 embeddings=True,
                 hidden_layer=0,
                 dropout=0.5,
                 regression=False,
                 batch_size=128):
        """Initialise the classification system."""
        BaseClassification.__init__(self,
                                    corpusFile,
                                    rougeFile,
                                    metric=metric,
                                    rouge_labels=rouge_labels,
                                    labels=labels,
                                    labelsthreshold=labelsthreshold)
        self.nb_epoch = nb_epoch
        self.verbose = verbose
        self.dropout = dropout

        self.nnc = None
        if classification_type == "BasicNN":
            self.nnc = nnc.BasicNN(embeddings=embeddings,
                                   hidden_layer=hidden_layer,
                                   batch_size=batch_size)
        elif classification_type == "BERT":
            self.nnc = nnc.BasicBERT(hidden_layer=hidden_layer,
                                     batch_size=batch_size)
        elif classification_type == "SimilaritiesBERT":
            self.nnc = nnc.SimilaritiesBERT(hidden_layer=hidden_layer,
                                            batch_size=batch_size,
                                            comparison=compare.SimMul(),
                                            regression=regression,
                                            positions=True)
        elif classification_type == "LSTMSimilaritiesBERT":
            self.nnc = nnc.LSTMSimilaritiesBERT(hidden_layer=hidden_layer,
                                                batch_size=batch_size,
                                                comparison=compare.SimMul(),
                                                regression=regression,
                                                positions=True)
        elif classification_type == "SimilaritiesBioBERT":
            self.nnc = nnc.SimilaritiesBioBERT(hidden_layer=hidden_layer,
                                               batch_size=batch_size,
                                               comparison=compare.SimMul(),
                                               regression=regression,
                                               positions=True)
        elif classification_type == "LSTMSimilaritiesBioBERT":
            self.nnc = nnc.LSTMSimilaritiesBioBERT(hidden_layer=hidden_layer,
                                                   batch_size=batch_size,
                                                   comparison=compare.SimMul(),
                                                   regression=regression,
                                                   positions=True)
        elif classification_type == "SimilaritiesBERTTrainable":
            self.nnc = nnc.SimilaritiesBERT(hidden_layer=hidden_layer,
                                            batch_size=batch_size,
                                            comparison=compare.SimMul(),
                                            regression=regression,
                                            positions=True,
                                            trainable=True)
        elif classification_type == "Similarities":
            self.nnc = nnc.Similarities(embeddings=embeddings,
                                        hidden_layer=hidden_layer,
                                        batch_size=batch_size,
                                        comparison=compare.SimMul(),
                                        regression=regression,
                                        positions=True)
        elif classification_type == "SimilaritiesYu":
            self.nnc = nnc.Similarities(embeddings=embeddings,
                                        hidden_layer=hidden_layer,
                                        batch_size=batch_size,
                                        comparison=compare.SimYu(),
                                        regression=regression,
                                        positions=True)
        elif classification_type == "SimilaritiesEuc":
            self.nnc = nnc.Similarities(embeddings=embeddings,
                                        hidden_layer=hidden_layer,
                                        batch_size=batch_size,
                                        comparison=compare.SimEuc(),
                                        regression=regression,
                                        positions=True)
        elif classification_type == "CNNSimilarities":
            self.nnc = nnc.CNNSimilarities(embeddings=embeddings,
                                           hidden_layer=hidden_layer,
                                           batch_size=batch_size,
                                           comparison=compare.SimMul())
        elif classification_type == "CNNSimilaritiesYu":
            self.nnc = nnc.CNNSimilarities(embeddings=embeddings,
                                           hidden_layer=hidden_layer,
                                           batch_size=batch_size,
                                           comparison=compare.SimYu())
        elif classification_type == "CNNSimilaritiesEuc":
            self.nnc = nnc.CNNSimilarities(embeddings=embeddings,
                                           hidden_layer=hidden_layer,
                                           batch_size=batch_size,
                                           comparison=compare.SimEuc())
        elif classification_type == "LSTMSimilarities":
            self.nnc = nnc.LSTMSimilarities(embeddings=embeddings,
                                            hidden_layer=hidden_layer,
                                            batch_size=batch_size,
                                            comparison=compare.SimMul(),
                                            regression=regression,
                                            positions=True)
        elif classification_type == "LSTMSiameseSimilarities":
            self.nnc = nnc.LSTMSimilarities(embeddings=embeddings,
                                            hidden_layer=hidden_layer,
                                            batch_size=batch_size,
                                            comparison=compare.SimMul(),
                                            regression=regression,
                                            positions=True,
                                            siamese=True)
        elif classification_type == "LSTMSimilaritiesYu":
            self.nnc = nnc.LSTMSimilarities(embeddings=embeddings,
                                            hidden_layer=hidden_layer,
                                            batch_size=batch_size,
                                            comparison=compare.SimMul(),
                                            regression=regression,
                                            positions=True)
        elif classification_type == "LSTMSimilaritiesEuc":
            self.nnc = nnc.LSTMSimilarities(embeddings=embeddings,
                                            hidden_layer=hidden_layer,
                                            batch_size=batch_size,
                                            comparison=compare.SimEuc(),
                                            regression=regression,
                                            positions=True)
Exemplo n.º 4
0
 def __init__(self, layer1=50, batch_size=128, comparison=compare.SimMul()):
     self.layer1 = layer1
     self.batch_size = batch_size
     self.comparison = comparison
     self.name = "NNR-%s-relu(%i)" % (self.comparison.name, self.layer1)
Exemplo n.º 5
0
    def __init__(self,
                 corpusFile,
                 rougeFile,
                 metric=['SU4'],
                 nb_epoch=3,
                 verbose=2,
                 regression_type="Bi-LSTM",
                 embeddings=True,
                 use_peepholes=False,
                 hidden_layer=0,
                 dropout=0.5,
                 batch_size=128):
        """Initialise the regression system."""
        BaseRegression.__init__(self, corpusFile, rougeFile, metric)
        self.nb_epoch = nb_epoch
        self.use_peepholes = use_peepholes
        self.verbose = verbose
        self.dropout = dropout

        self.lstm = None
        if regression_type == "BasicNN":
            self.lstm = lstm.BasicNN(embeddings=embeddings,
                                     hidden_layer=hidden_layer,
                                     batch_size=batch_size)
        elif regression_type == "CNN":
            self.lstm = lstm.CNN(embeddings=embeddings,
                                 hidden_layer=hidden_layer,
                                 batch_size=batch_size)
        elif regression_type == "LSTM":
            self.lstm = lstm.LSTM(embeddings=embeddings,
                                  hidden_layer=hidden_layer,
                                  batch_size=batch_size)
        elif regression_type == "Bi-LSTM":
            self.lstm = lstm.LSTMBidirectional(embeddings=embeddings,
                                               hidden_layer=hidden_layer,
                                               batch_size=batch_size)
        elif regression_type == "Similarities":
            self.lstm = lstm.Similarities(embeddings=embeddings,
                                          hidden_layer=hidden_layer,
                                          batch_size=batch_size,
                                          comparison=compare.SimMul(),
                                          positions=True)
        elif regression_type == "SimilaritiesYu":
            self.lstm = lstm.Similarities(embeddings=embeddings,
                                          hidden_layer=hidden_layer,
                                          batch_size=batch_size,
                                          comparison=compare.SimYu(),
                                          positions=True)
        elif regression_type == "SimilaritiesEuc":
            self.lstm = lstm.Similarities(embeddings=embeddings,
                                          hidden_layer=hidden_layer,
                                          batch_size=batch_size,
                                          comparison=compare.SimEuc(),
                                          positions=True)
        elif regression_type == "CNNSimilarities":
            self.lstm = lstm.CNNSimilarities(embeddings=embeddings,
                                             hidden_layer=hidden_layer,
                                             batch_size=batch_size,
                                             comparison=compare.SimMul())
        elif regression_type == "CNNSimilaritiesYu":
            self.lstm = lstm.CNNSimilarities(embeddings=embeddings,
                                             hidden_layer=hidden_layer,
                                             batch_size=batch_size,
                                             comparison=compare.SimYu())
        elif regression_type == "CNNSimilaritiesEuc":
            self.lstm = lstm.CNNSimilarities(embeddings=embeddings,
                                             hidden_layer=hidden_layer,
                                             batch_size=batch_size,
                                             comparison=compare.SimEuc())
        elif regression_type == "LSTMSimilarities":
            self.lstm = lstm.LSTMSimilarities(embeddings=embeddings,
                                              hidden_layer=hidden_layer,
                                              batch_size=batch_size,
                                              comparison=compare.SimMul(),
                                              positions=True)
        elif regression_type == "LSTMSimilaritiesYu":
            self.lstm = lstm.LSTMSimilarities(embeddings=embeddings,
                                              hidden_layer=hidden_layer,
                                              batch_size=batch_size,
                                              comparison=compare.SimMul(),
                                              positions=True)
        elif regression_type == "LSTMSimilaritiesEuc":
            self.lstm = lstm.LSTMSimilarities(embeddings=embeddings,
                                              hidden_layer=hidden_layer,
                                              batch_size=batch_size,
                                              comparison=compare.SimEuc(),
                                              positions=True)
        elif args.regression_type == "TfidfNNR":
            self.lstm = regressionbaselines.TfidfNNR(
                batch_size=args.batch_size, n_components=args.svd_components)
        elif args.regression_type == "TfidfSimNNR":
            self.lstm = regressionbaselines.TfidfSimNNR(
                batch_size=args.batch_size, n_components=args.svd_components)
        elif args.regression_type == "TfidfSim2NNR":
            self.lstm = regressionbaselines.TfidfSim2NNR(
                batch_size=args.batch_size,
                n_components=args.svd_components,
                comparison=compare.SimMul())
        elif args.regression_type == "TfidfSimYuNNR":
            self.lstm = regressionbaselines.TfidfSim2NNR(
                batch_size=args.batch_size,
                n_components=args.svd_components,
                comparison=compare.SimYu())
        elif args.regression_type == "TfidfSimEucNNR":
            self.lstm = regressionbaselines.TfidfSim2NNR(
                batch_size=args.batch_size,
                n_components=args.svd_components,
                comparison=compare.SimEuc())