示例#1
0
    def __init__(self, embeddings=None, embeddings_path=None, labels_count=1, mlp_dim=512, mlp_layers_count=1, concat='simple', prob='auto'):
        super().__init__()

        self.labels_count = labels_count

        if embeddings:
            # id to vector mapping is provided as matrix
            self.embed = nn.Embedding.from_pretrained(embeddings)
        elif embeddings_path:
            # load id to vector mapping from file
            # Load from txt file (in word2vec format)
            w2v_model = gensim.models.KeyedVectors.load_word2vec_format(embeddings_path)

            # Convert to PyTorch tensor
            weights = torch.FloatTensor(w2v_model.vectors)
            self.embed = nn.Embedding.from_pretrained(weights)
        else:
            raise ValueError('Either `embeddings` or `embeddings_path` must be set!')

        self.embedding_dim = self.embed.embedding_dim

        # Concat mode
        self.concat = concat
        self.concat_func, self.concat_dim = get_concat(concat, self.embedding_dim)

        self.mlp = get_mlp(input_dim=self.concat_dim,
                           output_dim=self.labels_count,
                           hidden_dim=mlp_dim,
                           hidden_layers_count=mlp_layers_count,
                           activation_cls=nn.ReLU)

        self.prob = self.get_classification_probability_layer(prob)
示例#2
0
    def __init__(self, bert_model_path, hidden_dim=768, dropout=0.1, mlp_dim=512, mlp_layers_count=1, labels_count=1, bert_cls=None, concat='simple', prob='auto'):
        super().__init__()
        self.labels_count = labels_count

        self.embedding_dim = hidden_dim  # size of bert representations
        self.bert_cls = bert_cls if bert_cls else BertModel

        self.bert = self.bert_cls.from_pretrained(bert_model_path, output_hidden_states=False, output_attentions=False)
        self.dropout = nn.Dropout(dropout)

        # distilbert
        self.distil_pooler = nn.Linear(self.embedding_dim, self.embedding_dim)

        # Concat mode
        self.concat = concat
        self.concat_func, self.concat_dim = get_concat(concat, self.embedding_dim)

        self.mlp = get_mlp(input_dim=self.concat_dim,
                   output_dim=self.labels_count,
                   hidden_dim=mlp_dim,
                   hidden_layers_count=mlp_layers_count,
                   activation_cls=nn.ReLU)

        # TODO fill linear layers
        # nn.init.xavier_normal_(self.classifier.weight)
        # Fills the input Tensor with values according to the method described in “Understanding the difficulty of training deep feedforward neural networks” - Glorot, X. & Bengio, Y. (2010), using a normal distribution.
        # kaiming_normal_
        # Fills the input Tensor with values according to the method described in “Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification” - He, K. et al. (2015), using a normal distribution.

        self.prob = self.get_classification_probability_layer(prob)
示例#3
0
    def __init__(self, bert_model_path, hidden_dim=768, dropout=0.1, mlp_dim=512, mlp_layers_count=1, labels_count=1, bert_cls=None, concat='simple', prob='auto'):
        super().__init__()
        self.labels_count = labels_count

        self.embedding_dim = hidden_dim  # size of bert representations
        self.bert_cls = bert_cls if bert_cls else BertModel

        self.bert = self.bert_cls.from_pretrained(bert_model_path, output_hidden_states=False)
        self.dropout = nn.Dropout(dropout)

        self.lstm = LSTM(input_size=hidden_dim, hidden_size=hidden_dim, batch_first=True)

        # Concat mode
        self.concat = concat
        self.concat_func, self.concat_dim = get_concat(concat, self.embedding_dim)

        self.mlp = get_mlp(input_dim=self.concat_dim,
                   output_dim=self.labels_count,
                   hidden_dim=mlp_dim,
                   hidden_layers_count=mlp_layers_count,
                   activation_cls=nn.ReLU)

        self.prob = self.get_classification_probability_layer(prob)