def __init__(self, vocab: Vocabulary, bert_model: Union[str, BertModel], dropout: float = 0.0, num_labels: int = None, index: str = "tokens", label_namespace: str = "labels", trainable: bool = True, initializer: InitializerApplicator = InitializerApplicator()) -> None: super().__init__(vocab) if isinstance(bert_model, str): self.bert_model = PretrainedBertModel.load(bert_model) else: self.bert_model = bert_model self.bert_model.requires_grad = trainable in_features = self.bert_model.config.hidden_size #if num_labels: out_features = 1 #else: # out_features = vocab.get_vocab_size(label_namespace) self._dropout = torch.nn.Dropout(p=dropout) self._classification_layer = torch.nn.Linear(in_features, out_features) #self._accuracy = CategoricalAccuracy() #self._loss = torch.nn.CrossEntropyLoss() self._index = index
def __init__( self, vocab: Vocabulary, bert_model: BertModel, dropout: float = 0.0, index: str = "bert", trainable: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._index = index self.bert_model = PretrainedBertModel.load(bert_model) hidden_size = self.bert_model.config.hidden_size for param in self.bert_model.parameters(): param.requires_grad = trainable self.dropout = Dropout(dropout) # 1. Instantiate any additional parts of your network self.start_linear = torch.nn.Linear(hidden_size, 1) self.end_linear = torch.nn.Linear(hidden_size, 1) self.loss_function = CrossEntropyLoss() # 2. DON'T FORGET TO INITIALIZE the additional parts of your network. initializer(self.start_linear) initializer(self.end_linear) # 3. Instantiate your metrics self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy()
def __init__(self, vocab, text_field_embedder, hidden_size=128, num_layers=2, dropout=0.5, tag_namespace='tags', initializer=None, metric=None): if initializer is None: initializer = InitializerApplicator() if metric is None: metric = SpanBasedF1Measure(vocab, tag_namespace=tag_namespace) super().__init__(vocab) self.text_field_embedder = text_field_embedder self.hidden_size = hidden_size self.num_layers = num_layers self.dropout = dropout self.tag_namespace = tag_namespace self.initializer = initializer self.metric = metric self.seq2seq_encoder = Seq2SeqEncoder.from_params(Params({ 'type': 'lstm', 'input_size': text_field_embedder.get_output_dim(), 'hidden_size': hidden_size, 'num_layers': num_layers, 'dropout': dropout, 'bidirectional': True, })) self.num_tags = vocab.get_vocab_size(tag_namespace) self.tags_projection_layer = TimeDistributed( Linear(self.seq2seq_encoder.get_output_dim(), self.num_tags)) self.crf = CRF(self.num_tags) self.initializer(self)
def __init__( self, vocab: Vocabulary, bert_model: PretrainedBertEmbedder, aggregate_feedforward: FeedForward, dropout: float = 0.0, index: str = "bert", label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self.bert_model = bert_model.bert_model self._label_namespace = label_namespace self._dropout = torch.nn.Dropout(p=dropout) self._classification_layer = aggregate_feedforward self._loss = torch.nn.CrossEntropyLoss() self._index = index self._f1 = BinaryThresholdF1() initializer(self._classification_layer)
def __init__(self, vocab: Vocabulary, mydatabase: str, schema_path: str, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._dropout = torch.nn.Dropout(p=dropout) self._exact_match = Average() self._action_similarity = Average() self._valid_sql_query = SqlValidity(mydatabase=mydatabase) self._token_match = TokenSequenceAccuracy() self._kb_match = KnowledgeBaseConstsAccuracy(schema_path=schema_path) self._schema_free_match = GlobalTemplAccuracy(schema_path=schema_path) self._coverage_loss = CoverageAttentionLossMetric() # the padding value used by IndexField self._action_padding_index = -1 num_actions = vocab.get_vocab_size("rule_labels") input_action_dim = action_embedding_dim if self._add_action_bias: input_action_dim += 1 self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(beam_size=1) self._transition_function = BasicTransitionFunction( encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, add_action_bias=self._add_action_bias, dropout=dropout) initializer(self)
def __init__( self, vocab: Vocabulary, criterion: Criterion, evaluate_mode: bool = False, attention_dim: int = 1024, hidden_size: int = 1024, dropout: float = 0.1, vocab_size: int = 50264, model_name: str = 'roberta-base', namespace: str = 'bpe', index: str = 'roberta', padding_value: int = 1, use_context: bool = True, sampling_topk: int = 1, sampling_temp: float = 1.0, weigh_bert: bool = False, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super().__init__(vocab) # self.criterion = criterion self.criterion = nn.BCELoss() self.index = index self.namespace = namespace self.resnet = resnet152() self.roberta = torch.hub.load('pytorch/fairseq:2f7e3f3323', 'roberta.large') self.roberta.eval() self.use_context = use_context self.padding_idx = padding_value self.evaluate_mode = evaluate_mode self.sampling_topk = sampling_topk self.sampling_temp = sampling_temp self.weigh_bert = weigh_bert self.lstm = nn.LSTM(hidden_size=1024, input_size=1024, num_layers=2, batch_first=True) self.loss_func = nn.MSELoss() size = 100 self.conv = nn.Conv2d(2048, size, 7) self.linear = nn.Linear(1024, size) self.relu = nn.ReLU() if weigh_bert: self.bert_weight = nn.Parameter(torch.Tensor(25)) nn.init.uniform_(self.bert_weight) self.n_batches = 0 self.n_samples = 0 self.sample_history = {} self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.dbr = "/specific/netapp5/joberant/nlp_fall_2021/shlomotannor/newscaptioning/dbr/" initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'DecomposableAttention': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) premise_encoder_params = params.pop("premise_encoder", None) if premise_encoder_params is not None: premise_encoder = Seq2SeqEncoder.from_params(premise_encoder_params) else: premise_encoder = None hypothesis_encoder_params = params.pop("hypothesis_encoder", None) if hypothesis_encoder_params is not None: hypothesis_encoder = Seq2SeqEncoder.from_params(hypothesis_encoder_params) else: hypothesis_encoder = None attend_feedforward = FeedForward.from_params(params.pop('attend_feedforward')) similarity_function = SimilarityFunction.from_params(params.pop("similarity_function")) compare_feedforward = FeedForward.from_params(params.pop('compare_feedforward')) aggregate_feedforward = FeedForward.from_params(params.pop('aggregate_feedforward')) initializer = InitializerApplicator.from_params(params.pop("initializer", [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, attend_feedforward=attend_feedforward, similarity_function=similarity_function, compare_feedforward=compare_feedforward, aggregate_feedforward=aggregate_feedforward, initializer=initializer, premise_encoder=premise_encoder, hypothesis_encoder=hypothesis_encoder)
def __init__( self, vocab: Vocabulary, bert_model: BertModel, dropout: float = 0.0, index: str = "bert", trainable: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._index = index self.bert_model = PretrainedBertModel.load(bert_model) hidden_size = self.bert_model.config.hidden_size for param in self.bert_model.parameters(): param.requires_grad = trainable # TODO 1. Instantiate any additional parts of your network self.drop = torch.nn.Dropout(p=dropout) self.encoded2start_index = torch.nn.Linear( self.bert_model.config.hidden_size, 1) self.encoded2end_index = torch.nn.Linear( self.bert_model.config.hidden_size, 1) # TODO 2. DON'T FORGET TO INITIALIZE the additional parts of your network. initializer(self) # TODO 3. Instantiate your metrics self.start_acc = CategoricalAccuracy() self.end_acc = CategoricalAccuracy() self.span_acc = BooleanAccuracy()
def __init__( self, vocab: Vocabulary, embedding_size: int, encoder_hidden_size: int, encoder_num_layers: int, decoder: DecoderNet, decoder_type: str = "lstm", decoder_num_layers: int = 1, share_decoder_params: bool = True, # only valid when decoder_type == `transformer` text_field_embedder: TextFieldEmbedder = None, start_token: str = "[CLS]", end_token: str = "[SEP]", index_name: str = "tokens", beam_size: int = 4, max_turn_len: int = 3, min_dec_len: int = 4, max_dec_len: int = 30, coverage_factor: float = 0.0, device: Union[int, str, List[int]] = -1, metrics: Optional[List[Metric]] = None, valid_metric_keys: List[str] = None, dropout_rate: float = 0.1, seed: int = 42, initializer: InitializerApplicator = InitializerApplicator(), regularizer: RegularizerApplicator = None): # 初始化vocab和regularizer Model.__init__(self, vocab, regularizer) # ----------- 定义embedding和编码器 --------------- # 获取单词序列的embedding self._text_field_embedder = text_field_embedder # 定义编码器 self.encoder = torch.nn.LSTM(input_size=embedding_size, hidden_size=encoder_hidden_size, num_layers=encoder_num_layers, batch_first=True, dropout=dropout_rate, bidirectional=True) self.encoder_num_layers = encoder_num_layers # 由于编码器是双向的,而解码器是单向的 # 所有将编码器的输出转换成单向的维度 self.bi2uni_dec_init_state = torch.nn.Linear(2 * encoder_hidden_size, encoder_hidden_size) self.encoder_output_dim = encoder_hidden_size # ------------- 通用初始化过程 --------------------- self.common_init(self.encoder_output_dim, decoder, decoder_type, decoder_num_layers, share_decoder_params, start_token, end_token, index_name, beam_size, min_dec_len, max_dec_len, coverage_factor, device, metrics, valid_metric_keys, seed, initializer) # -------------- 不同编码器不同的初始化过程 --------------- # 获取embedding的维度 embedding_size = self._text_field_embedder.get_output_dim() self.turn_embedding = torch.nn.Embedding(max_turn_len, embedding_size)
def __init__( self, vocab: Vocabulary, bert_model: Union[str, BertModel], dropout: float = 0.0, num_labels: int = None, index: str = "bert", label_namespace: str = "labels", trainable: bool = True, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super().__init__(vocab) if isinstance(bert_model, str): self.bert_model = PretrainedBertModel.load(bert_model) else: self.bert_model = bert_model self.bert_model.requires_grad = trainable in_features = self.bert_model.config.hidden_size if num_labels: out_features = num_labels else: out_features = vocab.get_vocab_size(label_namespace) self._dropout = torch.nn.Dropout(p=dropout) self._tagger_layer = torch.nn.Linear(in_features, out_features) self._span_f1 = SpanBasedF1Measure(vocab, label_namespace, label_encoding='BIO') self._loss = torch.nn.CrossEntropyLoss() self._index = index initializer(self._tagger_layer)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, dropout_proba: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: RegularizerApplicator = None, ) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder num_dense = encoder.get_output_dim() * 2 self.projection = nn.Sequential( nn.BatchNorm1d(num_dense, num_dense), nn.Dropout(p=dropout_proba), nn.Linear(num_dense, num_dense), nn.ReLU(inplace=True), nn.BatchNorm1d(num_dense, num_dense), nn.Dropout(p=dropout_proba), nn.Linear(num_dense, 1), nn.Sigmoid(), ) self.lossfun = nn.BCEWithLogitsLoss() self.metrics = {"accuracy": CategoricalAccuracy()} initializer(self)
def __init__( self, vocab: Vocabulary, bert_model: Union[str, LayeredPretrainedBertModel], dropout: float = 0.0, num_labels: int = None, index: str = "bert", label_namespace: str = "labels", trainable: bool = True, scaling_temperature: str = "1", temperature_threshold: float = -1, layer_indices: str = "23", multitask: bool = False, debug: bool = False, add_previous_layer_logits: bool = True, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super().__init__(vocab) if isinstance(bert_model, str): self.bert_model = LayeredPretrainedBertModel.load(bert_model) else: self.bert_model = bert_model # self.bert_model.requires_grad = trainable self._dropout = torch.nn.Dropout(p=dropout) self._add_previous_layer_logits = add_previous_layer_logits self._layer_indices = [int(x) for x in layer_indices.split("_")] self._sum_weights = torch.nn.ParameterList([ torch.nn.Parameter(torch.randn(i + 1)) for i in self._layer_indices ]) self._multitask = multitask self._debug = debug self._normalize_sum_weights() max_layer = max(self._layer_indices) assert max_layer < len( self.bert_model.encoder.layer ), "Recieved layer {} (in {}) which is not smaller than the number of layers in BERT model ({})".format( max_layer, layer_indices, len(self.bert_model.encoder.layer)) # Removing all unused parameters self.bert_model.encoder.layer = self.bert_model.encoder.layer[: max_layer + 1] for param in self.bert_model.parameters(): param.requires_grad = trainable self._count_n_layers = CountNLayers(self._layer_indices) self._index = index self._scaling_temperatures = [ float(x) for x in scaling_temperature.split("_") ] self._temperature_threshold = temperature_threshold
def __init__(self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, database_file: str, add_action_bias: bool = True, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._dropout = torch.nn.Dropout(p=dropout) self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() self._executor = SqlExecutor(database_file) # the padding value used by IndexField self._action_padding_index = -1 num_actions = vocab.get_vocab_size("rule_labels") input_action_dim = action_embedding_dim if self._add_action_bias: input_action_dim += 1 self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(beam_size=1) self._transition_function = BasicTransitionFunction( encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout) initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SemanticRoleLabeler': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) stacked_encoder = Seq2SeqEncoder.from_params(params.pop("stacked_encoder")) binary_feature_dim = params.pop("binary_feature_dim") initializer = InitializerApplicator.from_params(params.pop("initializer", [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, stacked_encoder=stacked_encoder, binary_feature_dim=binary_feature_dim, initializer=initializer)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SemanticRoleLabeler': """ With an empty ``params`` argument, this will instantiate a SRL model with the same configuration as published in the "Deep Semantic Role Labeling - What works and what's next" paper, as long as you've set ``allennlp.common.constants.GLOVE_PATH`` to the location of your gzipped 100-dimensional glove vectors. If you want to change parameters, the keys in the ``params`` object must match the constructor arguments above. """ default_embedder_params = { 'tokens': { 'type': 'embedding', 'pretrained_file': GLOVE_PATH, 'trainable': True } } embedder_params = params.pop("text_field_embedder", default_embedder_params) text_field_embedder = TextFieldEmbedder.from_params( vocab, embedder_params) default_lstm_params = { 'type': 'alternating_lstm', 'input_size': 101, # Because of the verb_indicator feature. 'hidden_size': 300, 'num_layers': 8, 'recurrent_dropout_probability': 0.1, 'use_highway': True } encoder_params = params.pop("stacked_encoder", default_lstm_params) stacked_encoder = Seq2SeqEncoder.from_params(encoder_params) default_initializer_params = { 'bias': { 'type': 'normal', 'std': 0.1 }, 'default': 'orthogonal' } initializer_params = params.pop('initializer', default_initializer_params) initializer = InitializerApplicator.from_params(initializer_params) return cls(vocab=vocab, text_field_embedder=text_field_embedder, stacked_encoder=stacked_encoder, initializer=initializer)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SentenceClassifier': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) question_encoder = Seq2VecEncoder.from_params(params.pop("question_encoder")) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, question_encoder=question_encoder, initializer=initializer, regularizer=regularizer)
def __init__( self, vocab: Vocabulary, criterion: Criterion, evaluate_mode: bool = False, attention_dim: int = 1024, hidden_size: int = 1024, dropout: float = 0.1, vocab_size: int = 50264, model_name: str = 'roberta-base', namespace: str = 'bpe', index: str = 'roberta', padding_value: int = 1, use_context: bool = True, sampling_topk: int = 1, sampling_temp: float = 1.0, weigh_bert: bool = False, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super().__init__(vocab) self.criterion = criterion self.index = index self.namespace = namespace self.resnet = resnet152() self.roberta = torch.hub.load('pytorch/fairseq:2f7e3f3323', 'roberta.large') self.roberta.eval() self.use_context = use_context self.padding_idx = padding_value self.evaluate_mode = evaluate_mode self.sampling_topk = sampling_topk self.sampling_temp = sampling_temp self.weigh_bert = weigh_bert self.loss_func = nn.MSELoss() self.conv = nn.Conv2d(2048, 512, 7) self.linear = nn.Linear(2048, 512) self.relu = nn.ReLU() if weigh_bert: self.bert_weight = nn.Parameter(torch.Tensor(25)) nn.init.uniform_(self.bert_weight) self.n_batches = 0 self.n_samples = 0 self.sample_history = {} initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'ToxicModel': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) encoder = Seq2VecEncoder.from_params(params.pop("encoder")) classifier_feedforward = FeedForward.from_params(params.pop("classifier_feedforward")) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, encoder=encoder, classifier_feedforward=classifier_feedforward, initializer=initializer, regularizer=regularizer)
def __init__(self, vocab: Vocabulary, doc_field_embedder: TextFieldEmbedder, scorer: Scorer, validation_metrics: Dict[str, Metric], temperature: float = 2.0, alpha: float = 0.8, ranking_loss: bool = False, query_field_embedder: Optional[TextFieldEmbedder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, idf_embedder: Optional[TextFieldEmbedder] = None, dropout: Optional[float] = 0.) -> None: super(LeToRWrapper, self).__init__(vocab, regularizer) self.query_field_embedder = query_field_embedder if self.query_field_embedder is None: self.query_field_embedder = doc_field_embedder self.doc_field_embedder = doc_field_embedder self.idf_embedder = idf_embedder self.scorer = scorer self.initializer = initializer self.regularizer = regularizer self.accuracy = CategoricalAccuracy() self.metrics = copy.deepcopy(validation_metrics) self.metrics.update({'accuracy': CategoricalAccuracy()}) self.training_metrics = { True: ['accuracy'], False: validation_metrics.keys() } self.temperature = temperature self.kd_alpha = alpha self.classification_loss = nn.CrossEntropyLoss() self.kl_loss = nn.KLDivLoss(reduction='batchmean') # self.ranking_loss = ranking_loss # if self.ranking_loss: # self.loss = nn.MarginRankingLoss(margin=1.0) # else: # self.loss = nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.classifier_feedforward = classifier_feedforward self.f1 = MultiLabelF1Measure() self.loss = torch.nn.MultiLabelSoftMarginLoss() initializer(self)
def __init__( self, vocab: Vocabulary, model_name: str, num_labels: int, translation_factor: float = 0.5, seq_decoder: SeqDecoder = None, decoding_dim: int = 512, target_embedding_dim: int = 512, load_classifier: bool = False, transformer_trainable: bool = True, classifier_traninable: bool = True, dropout: float = 0.1, index: str = "transformer", label_namespace: str = "label", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) if not num_labels: num_labels = vocab.get_vocab_size(namespace=label_namespace) config = AutoConfig.from_pretrained(model_name) config.num_labels = num_labels self.transformer = AutoModel.from_pretrained(model_name, config=config) for param in self.transformer.parameters(): param.requires_grad = transformer_trainable # Only BERT supports loading classifier layer currently if load_classifier: self.classifier = BertForNextSentencePrediction.from_pretrained( model_name, config=config).cls for param in self.classifier.parameters(): param.requires_grad = classifier_traninable else: classifier = torch.nn.Linear(config.hidden_size, config.num_labels) initializer(classifier) self.classifier = torch.nn.Sequential(torch.nn.Dropout(dropout), classifier) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._index = index self._label_namespace = label_namespace self._translation_factor = translation_factor self._seq_decoder = seq_decoder
def __init__( self, vocab: Vocabulary, bert_model: BertModel, dropout: float = 0.0, index: str = "bert", trainable: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._index = index self.bert_model = PretrainedBertModel.load(bert_model) hidden_size = self.bert_model.config.hidden_size for param in self.bert_model.parameters(): param.requires_grad = trainable
def __init__( self, vocab: Vocabulary, bert_model: Union[str, BertModel], dropout: float = 0.0, num_labels: int = None, index: str = "bert", label_namespace: str = "labels", trainable: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) if isinstance(bert_model, str): self.bert_model = PretrainedBertModel.load(bert_model) else: self.bert_model = bert_model for param in self.bert_model.parameters(): param.requires_grad = trainable in_features = self.bert_model.config.hidden_size self._label_namespace = label_namespace if num_labels: out_features = num_labels else: out_features = vocab.get_vocab_size( namespace=self._label_namespace) self._dropout = torch.nn.Dropout(p=dropout) self._classification_layer = torch.nn.Linear(in_features, out_features) self._accuracy = CategoricalAccuracy() # ****** add by jlk ****** self._f1score = F1Measure(positive_label=1) # ****** add by jlk ****** self._loss = torch.nn.CrossEntropyLoss() self._index = index initializer(self._classification_layer)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, question_encoder: Seq2VecEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.nb_classes = self.vocab.get_vocab_size("labels") self.question_encoder = question_encoder self.enc_dropout = torch.nn.Dropout(0.5) self.classifier_feedforward = Linear(question_encoder.get_output_dim(), self.nb_classes) self.ff_dropout = torch.nn.Dropout(0.5) self.metrics = { "accuracy": CategoricalAccuracy(), } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, bert_model: Union[str, LayeredPretrainedBertModel], dropout: float = 0.0, num_labels: int = None, index: str = "bert", label_namespace: str = "labels", trainable: bool = True, scaling_temperature: str = "1", temperature_threshold: float = -1, layer_indices: str = "23", multitask: bool = False, debug: bool = False, add_previous_layer_logits: bool = True, print_selected_layer: bool = False, ensemble: str = None, initializer: InitializerApplicator = InitializerApplicator()) -> None: super().__init__(vocab, bert_model, dropout, num_labels, index, label_namespace, trainable, scaling_temperature, temperature_threshold, layer_indices, multitask, debug, add_previous_layer_logits, initializer) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self.print_selected_layer = print_selected_layer in_features = self.bert_model.config.hidden_size if num_labels: out_features = num_labels else: out_features = vocab.get_vocab_size(label_namespace) if ensemble is not None: self.ensemble = [float(x) for x in ensemble.split(",")] else: self.ensemble = None self._classification_layers = torch.nn.ModuleList([torch.nn.Linear(in_features+(i*out_features*add_previous_layer_logits), out_features) for i in range(len(self._layer_indices))]) for l in self._classification_layers: initializer(l)
def __init__(self, vocab: Vocabulary, decoder: Decoder, criterion: Criterion, evaluate_mode: bool = False, namespace: str = 'bpe', index: str = 'roberta', padding_value: int = 1, use_context: bool = True, sampling_topk: int = 1, sampling_temp: float = 1.0, max_caption_len: int = 50, weigh_bert: bool = False, initializer: InitializerApplicator = InitializerApplicator()) -> None: super().__init__(vocab) self.decoder = decoder self.criterion = criterion self.index = index self.namespace = namespace self.resnet = resnet152() self.roberta = torch.hub.load( 'pytorch/fairseq:2f7e3f3323', 'roberta.large') self.use_context = use_context self.padding_idx = padding_value self.evaluate_mode = evaluate_mode self.sampling_topk = sampling_topk self.sampling_temp = sampling_temp self.max_caption_len = max_caption_len self.weigh_bert = weigh_bert if weigh_bert: self.bert_weight = nn.Parameter(torch.Tensor(25)) nn.init.uniform_(self.bert_weight) self.n_batches = 0 self.n_samples = 0 self.sample_history: Dict[str, float] = defaultdict(float) initializer(self) self.nlp = spacy.load('en_core_web_lg', disable=['textcat', 'parser', 'tagger', 'ner'])
def __init__(self, vocab: Vocabulary, query_field_embedder: TextFieldEmbedder, doc_field_embedder: TextFieldEmbedder, scorer: Scorer, validation_metrics: Dict[str, Metric], temperature: float = 15.0, alpha: float = 0.8, ranking_loss: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, idf_embedder: Optional[TextFieldEmbedder] = None, dropout: float = 0.) -> None: super(LeToRWrapper, self).__init__(vocab, regularizer) self.embedder = doc_field_embedder self.idf_embedder = idf_embedder self.final_scorer = FeedForward(2, 1, 1, lambda x: x) self.scorer = scorer self.initializer = initializer self.regularizer = regularizer self.metrics = copy.deepcopy(validation_metrics) self.metrics.update({'accuracy': CategoricalAccuracy()}) self.training_metrics = { True: ['accuracy'], False: validation_metrics.keys() } self.temperature = temperature self.kd_alpha = alpha # self.ranking_loss = ranking_loss # if self.ranking_loss: #self.loss = nn.MarginRankingLoss(margin=1.0) # else: self.loss = nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocab: Vocabulary, bert_model: PretrainedBertEmbedder, aggregate_feedforward: FeedForward, dropout: float = 0.0, index: str = "bert", label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self.bert_model = bert_model.bert_model self._label_namespace = label_namespace label_vocab = self.vocab.get_index_to_token_vocabulary('labels') try: print(list(self.vocab._retained_counter["labels"].items())) total_size = sum(self.vocab._retained_counter['labels'].values()) self._class_weight = [0] * len(label_vocab) for i, t in label_vocab.items(): self._class_weight[ i] = total_size / self.vocab._retained_counter['labels'][t] except: self._class_weight = [1.0] * len(label_vocab) self._dropout = torch.nn.Dropout(p=dropout) self._pos_index = self.vocab.get_token_to_index_vocabulary( label_namespace)['True'] self._classification_layer = aggregate_feedforward self._index = index self._loss = torch.nn.CrossEntropyLoss( weight=torch.tensor(self._class_weight)) self._f1 = BinaryThresholdF1() initializer(self._classification_layer)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'DecAccSRL': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) premise_encoder_params = params.pop("premise_encoder", None) if premise_encoder_params is not None: premise_encoder = Seq2SeqEncoder.from_params(premise_encoder_params) else: premise_encoder = None hypothesis_encoder_params = params.pop("hypothesis_encoder", None) if hypothesis_encoder_params is not None: hypothesis_encoder = Seq2SeqEncoder.from_params(hypothesis_encoder_params) else: hypothesis_encoder = None srl_model_archive = params.pop('srl_model_archive', None) if srl_model_archive is not None: logger.info("Loaded pretrained SRL model from {}".format(srl_model_archive)) archive = load_archive(srl_model_archive) srl_model = archive.model else: srl_model = None attend_feedforward = FeedForward.from_params(params.pop('attend_feedforward')) similarity_function = SimilarityFunction.from_params(params.pop("similarity_function")) compare_feedforward = FeedForward.from_params(params.pop('compare_feedforward')) aggregate_feedforward = FeedForward.from_params(params.pop('aggregate_feedforward')) initializer = InitializerApplicator.from_params(params.pop("initializer", [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, attend_feedforward=attend_feedforward, similarity_function=similarity_function, compare_feedforward=compare_feedforward, aggregate_feedforward=aggregate_feedforward, initializer=initializer, srl_model=srl_model, premise_encoder=premise_encoder, hypothesis_encoder=hypothesis_encoder)
def __init__( self, vocab: Vocabulary, transformer_embedder: Union[str, TransformerEmbedder, Params], dropout: float = 0.0, index: str = "bert", label_namespace: str = "labels", classification_type: str = 'multi-class', pos_label: str = None, threshold: float = 0.5, neg_weight: float = 1.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab=vocab, classification_type=classification_type, pos_label=pos_label, threshold=threshold, neg_weight=neg_weight, label_namespace=label_namespace, regularizer=regularizer) # 不知道什么原因,在使用配置文件的时候,embedder无法从Params实例化获得 if isinstance(transformer_embedder, Params): self.transformer_model = TokenEmbedder.from_params( transformer_embedder, vocab=vocab) elif isinstance(transformer_embedder, TransformerEmbedder): self.transformer_model = transformer_embedder else: logging.fatal("embedder 无法实例化") exit() self.classification_layer = nn.Sequential( nn.Dropout(dropout), nn.Linear(self.transformer_model.get_output_dim(), vocab.get_vocab_size(self._label_namespace))) self._index = index initializer(self.classification_layer)