Пример #1
0
 def __init__(self, words, args):
     super(Model, self).__init__()
     self.args = args
     if args.n_e:
         self.n_e = args.n_e
     else:
         self.n_e = len(words) if len(words) < args.n_d else args.n_d
     self.n_d = args.n_d
     self.depth = args.depth
     self.drop = nn.Dropout(args.dropout)
     self.embedding_layer = nn.Embedding(len(words), self.n_e)
     self.n_V = len(words)
     custom_m_list = [CustomLinear(self.n_e, self.n_d * 4, bias=False)]
     for i in range(self.depth - 1):
         custom_m_list.append(
             flop.ProjectedLinear(self.n_d,
                                  self.n_d * 3,
                                  proj_features=args.n_proj,
                                  bias=False))
     self.rnn = sru.SRU(
         self.n_e,
         self.n_d,
         self.depth,
         dropout=args.dropout,
         highway_bias=args.bias,
         layer_norm=args.layer_norm,
         rescale=args.rescale,
         custom_m=custom_m_list,
     )
     self.output_layer = nn.Linear(self.n_d, self.n_V)
     self.init_weights()
Пример #2
0
 def __init__(self, args):
     super(Model, self).__init__()
     self.args = args
     # self.cutoffs = [20000, 60000]
     self.cutoffs = [10000, 20000, 40000, 60000, 100000]
     self.n_V = args.n_token
     self.n_e = args.n_e or args.n_proj
     self.n_d = args.n_d
     self.depth = args.depth
     self.drop = nn.Dropout(args.dropout)
     self.embedding_layer = AdaptiveEmbedding(
         self.n_V,
         self.n_e,
         self.n_d,
         self.cutoffs,
         div_val=args.div_val,
         div_freq=2,
         dropout=args.dropout_e,
     )
     self.rnn = sru.SRU(
         self.n_d,
         self.n_d,
         self.depth,
         projection_size=args.n_proj,
         dropout=args.dropout,
         highway_bias=args.bias,
         layer_norm=args.layer_norm,
         rescale=args.rescale,
         custom_m=flop.ProjectedLinear(self.n_d,
                                       self.n_d * 3,
                                       proj_features=args.n_proj,
                                       bias=False),
     )
     self.output_layer = AdaptiveLogSoftmax(
         self.n_V,
         self.n_e,
         self.n_d,
         self.cutoffs,
         div_val=args.div_val,
         div_freq=2,
         dropout=args.dropout_e,
         keep_order=False,
     )
     self.init_weights()
     if not args.not_tie:
         self.tie_weights()