示例#1
0
    def __init__(self, args):
        super(Controller, self).__init__()
        self.args = args

        if self.args.network_type == 'rnn':
            self.num_tokens = [len(args.shared_rnn_activations)]
            for idx in range(self.args.num_blocks):
                    self.num_tokens += [idx + 1, len(args.shared_rnn_activations)]
            self.func_names = args.shared_rnn_activations
        elif self.args.network_type == 'cnn':
            self.num_tokens = [len(args.shared_cnn_types), self.args.num_blocks]
            self.func_names = args.shared_cnn_types

        num_total_tokens = sum(self.num_tokens)

        self.encoder = nn.Embedding(num_total_tokens, args.controller_hid)
        self.lstm = nn.LSTMCell(
                args.controller_hid,
                args.controller_hid)

        pivot = 0
        self.decoders = []

        for idx, size in enumerate(self.num_tokens):
            decoder = nn.Linear(args.controller_hid, size)
            self.decoders.append(decoder)

        self._decoders = nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = keydefaultdict(self.init_hidden)

        fn = lambda key: get_variable(
                t.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False)
        self.static_inputs = keydefaultdict(fn)
    def __init__(self, args):
        torch.nn.Module.__init__(self)
        self.args = args

        self.num_tokens = [
            len(args.augment_types),  # 所用增强方式的个数
            len(args.magnitude_types),  # 划分取值的个数
            len(args.prob_types)  # 划分概率的个数
        ] * self.args.op_num_pre_subpolicy * self.args.subpolicy_num
        num_total_tokens = sum(self.num_tokens)  # 30个

        self.encoder = torch.nn.Embedding(num_total_tokens,
                                          args.controller_hid_size)
        self.lstm = torch.nn.LSTMCell(args.controller_hid_size,
                                      args.controller_hid_size)

        # LSTM一次决定用哪个增强方式、增强方式的幅度、增强方式的概率
        self.decoders = []
        for idx, size in enumerate(self.num_tokens):
            decoder = torch.nn.Linear(args.controller_hid_size, size)
            self.decoders.append(decoder)
        self._decoders = torch.nn.ModuleList(self.decoders)

        self._init_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return utils.get_variable(torch.zeros(
                key, self.args.controller_hid_size),
                                      self.args.cuda,
                                      requires_grad=False)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
示例#3
0
    def __init__(self, args):
        torch.nn.Module.__init__(self)
        #         args.cuda = True
        self.args = args
        self.controller_hid = 100
        self.num_tokens = [9, 24, 9]  ###

        num_total_tokens = self.num_tokens[1]

        self.encoder = torch.nn.Embedding(num_total_tokens,
                                          self.controller_hid)
        self.lstm = torch.nn.LSTMCell(self.controller_hid, self.controller_hid)

        self.decoders = []

        for i in range(self.num_tokens[0]):
            decoder = torch.nn.Linear(self.controller_hid, self.num_tokens[1])
            self.decoders.append(decoder)
            decoder = torch.nn.Linear(self.controller_hid, self.num_tokens[2])
            self.decoders.append(decoder)

        self._decoders = torch.nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return utils.get_variable(torch.zeros(key, self.controller_hid),
                                      self.args.cuda,
                                      requires_grad=False)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
示例#4
0
    def __init__(self, args):
        torch.nn.Module.__init__(self)
        self.args = args
        self.num_tokens = [len(OPS), 5]  # ops 수 , block 수
        self.func_names = PRIMITIVES

        num_total_tokens = sum(self.num_tokens)

        self.encoder = torch.nn.Embedding(num_total_tokens,
                                          args.controller_hid)
        self.lstm = torch.nn.LSTMCell(args.controller_hid,
                                      args.controller_hid)  # 100, 100

        # TODO(brendan): Perhaps these weights in the decoder should be
        # shared? At least for the activation functions, which all have the
        # same size.

        self.decoders = []
        for idx, size in enumerate(self.num_tokens):
            decoder = torch.nn.Linear(args.controller_hid, size)
            self.decoders.append(decoder)

        self._decoders = torch.nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return utils.get_variable(torch.zeros(key,
                                                  self.args.controller_hid),
                                      self.args.cuda,
                                      requires_grad=False)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
示例#5
0
    def __init__(self, n_ops, n_nodes, device, controller_hid=None, lstm_num_layers=2):
        super(ArchMaster, self).__init__()
        self.K = sum([x + 2 for x in range(n_nodes)])
        self.n_ops = n_ops
        self.n_nodes = n_nodes
        self.device = device

        self.controller_hid = controller_hid
        self.attention_hid = self.controller_hid
        self.lstm_num_layers = lstm_num_layers

        # Embedding of (n_nodes+1) nodes
        # Note that the (n_nodes+2)-th node will not be used
        self.node_op_hidden = nn.Embedding(n_nodes + 1 + n_ops, self.controller_hid)
        self.emb_attn = nn.Linear(self.controller_hid, self.attention_hid, bias=False)
        self.hid_attn = nn.Linear(self.controller_hid, self.attention_hid, bias=False)
        self.v_attn = nn.Linear(self.controller_hid, 1, bias=False)
        self.w_soft = nn.Linear(self.controller_hid, self.n_ops)
        self.lstm = nn.LSTMCell(self.controller_hid, self.controller_hid)
        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)
        self.static_inputs = utils.keydefaultdict(self._get_default_hidden)
        self.tanh = nn.Tanh()
        self.prev_nodes, self.prev_ops = [], []
        self.query_index = torch.LongTensor(range(0, n_nodes+1)).to(device)
示例#6
0
    def __init__(self, args):
        torch.nn.Module.__init__(self)
        self.args = args

        self.num_tokens = [] 

        for idx in range(self.args._max_depth):
            self.num_tokens += [len(args.operations)]
        
        self.op_names = args.operations

        num_total_tokens = sum(self.num_tokens)

        self.encoder = torch.nn.Embedding(num_total_tokens,
                                          args.controller_hid)
        self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid)

        # TODO(brendan): Perhaps these weights in the decoder should be
        # shared? At least for the activation functions, which all have the
        # same size.
        self.decoders = []
        for idx, size in enumerate(self.num_tokens):
            decoder = torch.nn.Linear(args.controller_hid, size)
            self.decoders.append(decoder)

        self._decoders = torch.nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return torch.zeros(key, self.args.controller_hid)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
    def __init__(self, args):
        super(CNNMicroController, self).__init__(args)
        self.args = args

        if self.args.network_type == 'micro_cnn':
            # First node always take the input to the Cell.

            # For normal cell
            # self.num_normal_tokens = [len(args.shared_cnn_normal_types)]
            self.num_normal_tokens = []
            for idx in range(self.args.num_blocks):
                # NOTE for CNN, the node have two input and op rather than 1.
                self.num_normal_tokens += [
                    idx + 2, len(args.shared_cnn_normal_types)
                ] * 2
            self.normal_func_names = args.shared_cnn_normal_types

            # reduce_tokens
            # self.num_reduce_tokens = [len(args.shared_cnn_reduce_types)]
            self.num_reduce_tokens = []
            for idx in range(self.args.num_blocks):
                self.num_reduce_tokens += [
                    idx + 2, len(args.shared_cnn_reduce_types)
                ] * 2
            self.reduce_func_names = args.shared_cnn_reduce_types

            # Combine the num tokens as a full list.
            self.num_tokens = self.num_normal_tokens + self.num_reduce_tokens
            self.func_names = [self.normal_func_names, self.reduce_func_names]
        else:
            raise NotImplementedError(
                f'{self.args.network_type} is not supported yet')

        num_total_tokens = sum(self.num_tokens)

        self.encoder = torch.nn.Embedding(num_total_tokens,
                                          args.controller_hid)
        self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid)

        self.decoders = []
        for idx, size in enumerate(self.num_tokens):
            decoder = torch.nn.Linear(args.controller_hid, size)
            self.decoders.append(decoder)

        self._decoders = torch.nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return utils.get_variable(torch.zeros(key,
                                                  self.args.controller_hid),
                                      self.args.cuda,
                                      requires_grad=False)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
示例#8
0
    def __init__(self, args):
        torch.nn.Module.__init__(self)
        self.args = args

        if self.args.network_type == 'rnn':
            # NOTE(brendan): `num_tokens` here is just the activation function
            # for every even step,
            self.num_tokens = [len(args.shared_rnn_activations)
                               ]  #"['tanh', 'ReLU', 'identity', 'sigmoid']"
            for idx in range(self.args.num_blocks):  #num_blocks:default=12
                self.num_tokens += [idx + 1, len(args.shared_rnn_activations)]
            #经过上面那个for循环,num_tokens变为[4, 1, 4, 2, 4, 3, 4, 4, 4, 5, 4, 6, 4, 7, 4, 8, 4, 9, 4, 10, 4, 11, 4, 12, 4]
            #4代表激活函数使用哪个,1-12代表节点编号
            self.func_names = args.shared_rnn_activations  #func_name= "['tanh', 'ReLU', 'identity', 'sigmoid']"
        elif self.args.network_type == 'cnn':
            self.num_tokens = [
                len(args.shared_cnn_types), self.args.num_blocks
            ]
            self.func_names = args.shared_cnn_types

        num_total_tokens = sum(self.num_tokens)  #num_total_tokens=130

        self.encoder = torch.nn.Embedding(num_total_tokens,
                                          args.controller_hid)  #(130,100)
        self.lstm = torch.nn.LSTMCell(args.controller_hid,
                                      args.controller_hid)  #(100,100)

        # TODO(brendan): Perhaps these weights in the decoder should be
        # shared? At least for the activation functions, which all have the
        # same size.
        self.decoders = []  #len(self.decoders)=25=len(self.num_tokens)
        for idx, size in enumerate(self.num_tokens):
            decoder = torch.nn.Linear(args.controller_hid, size)
            self.decoders.append(decoder)
        #ModuleList can be indexed like a regular Python list,
        #but modules it contains are properly registered, and will be visible by all Module methods.
        #使用ModuleList可以管理多个Mudule而不用每个Module都起个名字,实际上就是一个Module数组
        self._decoders = torch.nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        #产生一个Variable(Tensor([key,100])
        def _get_default_hidden(key):
            return utils.get_variable(torch.zeros(key,
                                                  self.args.controller_hid),
                                      self.args.cuda,
                                      requires_grad=False)

        #定义了一个defaultdict,当不存在默认值的时候回调用defaultdict.default_factory()方法指定一个生成默认值的方法,
        #因此这里的默认值就是由_get_default_hidden生成的
        #因此,static_inputs就是一个defaultdict
        #用于产生一系列tensor
        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
示例#9
0
    def __init__(self, args):
        torch.nn.Module.__init__(self)
        self.args = args

        if self.args.network_type == 'rnn':
            # NOTE(brendan): `num_tokens` here is just the activation function
            # for every even step,
            self.num_tokens = [len(args.shared_rnn_activations)] #[]
            for idx in range(self.args.num_blocks):
                self.num_tokens += [idx + 1,
                                    len(args.shared_rnn_activations)]
            self.func_names = args.shared_rnn_activations
        elif self.args.network_type == 'cnn':
            self.num_tokens = [len(args.shared_cnn_types)] #[4]
            # print("----cnn_num_blocks", args.cnn_num_blocks)
            for idx in range( sum(args.cnn_num_blocks) - 1):
                # [4 2 4 3 4 4 4 5 4 6 4 7 4 8 4 9 4 10 4 11 4 12 4 ]
                self.num_tokens += [idx + 2,
                                    len(args.shared_cnn_types)]
                # larger 1 because can add use original image for skip connection
            self.func_names = args.shared_cnn_types

        num_total_tokens = sum(self.num_tokens)
        # if self.args.network_type == 'rnn':
        self.encoder = torch.nn.Embedding(num_total_tokens,
                                            args.controller_hid)
        self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid)

        # TODO(brendan): Perhaps these weights in the decoder should be
        # shared? At least for the activation functions, which all have the
        # same size.
        self.decoders = []
        # print("4444444  ")
        for idx, size in enumerate(self.num_tokens):
            # 4 2 4 3 4 4 4 5 4 6 4 7 4 8 4 9 4 10 4 11 4 12 4
            decoder = torch.nn.Linear(args.controller_hid, size)
            self.decoders.append(decoder)

        self._decoders = torch.nn.ModuleList(self.decoders)
        # print("222222")
        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return utils.get_variable(
                torch.zeros(key, self.args.controller_hid),
                self.args.cuda,
                requires_grad=False)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
示例#10
0
 def filter_deleted_structure(self, responses):
     deleted_things = keydefaultdict(
         lambda t: t.closest_deleted_ancestor() is not None)
     filtered_responses = [
         r for r in responses if not deleted_things[r.qnode_measure]
     ]
     return filtered_responses, []
示例#11
0
    def __init__(self, args, corpus):
        models.shared_base.SharedModel.__init__(self)

        self.args = args
        self.corpus = corpus
        self.forward_eval = 0

        self.decoder = nn.Linear(args.shared_hid, corpus.num_tokens)
        self.encoder = EmbeddingDropout(corpus.num_tokens,
                                        args.shared_embed,
                                        dropout=args.shared_dropoute)
        self.lockdrop = LockedDropout()

        if self.args.tie_weights:
            self.decoder.weight = self.encoder.weight

        # NOTE(brendan): Since W^{x, c} and W^{h, c} are always summed, there
        # is no point duplicating their bias offset parameter. Likewise for
        # W^{x, h} and W^{h, h}.
        self.w_xc = nn.Linear(args.shared_embed, args.shared_hid)
        self.w_xh = nn.Linear(args.shared_embed, args.shared_hid)

        # The raw weights are stored here because the hidden-to-hidden weights
        # are weight dropped on the forward pass.
        self.w_hc_raw = torch.nn.Parameter(
            torch.Tensor(args.shared_hid, args.shared_hid))
        self.w_hh_raw = torch.nn.Parameter(
            torch.Tensor(args.shared_hid, args.shared_hid))
        self.w_hc = None
        self.w_hh = None

        self.w_h = collections.defaultdict(dict)
        self.w_c = collections.defaultdict(dict)

        for idx in range(args.num_blocks):
            for jdx in range(idx + 1, args.num_blocks):
                self.w_h[idx][jdx] = nn.Linear(args.shared_hid,
                                               args.shared_hid,
                                               bias=False)
                self.w_c[idx][jdx] = nn.Linear(args.shared_hid,
                                               args.shared_hid,
                                               bias=False)

        self._w_h = nn.ModuleList(
            [self.w_h[idx][jdx] for idx in self.w_h for jdx in self.w_h[idx]])
        self._w_c = nn.ModuleList(
            [self.w_c[idx][jdx] for idx in self.w_c for jdx in self.w_c[idx]])

        if args.mode == 'train':
            self.batch_norm = nn.BatchNorm1d(args.shared_hid)
        else:
            self.batch_norm = None

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        logger.info(f'# of parameters: {format(self.num_parameters, ",d")}')
示例#12
0
    def __init__(self, args):
        torch.nn.Module.__init__(self)
        self.args = args
        self.baseline = None

        if self.args.network_type == 'rnn':
            # NOTE(brendan): `num_tokens` here is just the activation function
            # for every even step,
            self.num_tokens = [len(args.shared_rnn_activations)]
            for idx in range(self.args.num_blocks):
                self.num_tokens += [idx + 1, len(args.shared_rnn_activations)]
            self.func_names = args.shared_rnn_activations
        elif self.args.network_type == 'cnn':
            self.num_tokens = [
                len(args.shared_cnn_types), self.args.num_blocks
            ]
            self.func_names = args.shared_cnn_types

        num_total_tokens = sum(self.num_tokens)

        self.encoder = torch.nn.Embedding(num_total_tokens,
                                          args.controller_hid)
        self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid)

        # TODO(brendan): Perhaps these weights in the decoder should be
        # shared? At least for the activation functions, which all have the
        # same size.
        self.decoders = []
        for idx, size in enumerate(self.num_tokens):
            decoder = torch.nn.Linear(args.controller_hid, size)
            self.decoders.append(decoder)

        self._decoders = torch.nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return utils.get_variable(torch.zeros(key,
                                                  self.args.controller_hid),
                                      self.args.cuda,
                                      requires_grad=False)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
示例#13
0
    def __init__(self, args):
        super(Controller, self).__init__()
        self.args = args
        self.num_dags = 1
        if self.args.use_single_controller and self.args.use_dual_controller:
            self.num_dags += 1
        if self.args.use_single_controller  and self.args.use_atten_controller:
            self.num_dags += 1

        if self.args.network_type in ['rnn','seq2seq','classification']:
            self.num_tokens = [len(args.rnn_activations)]
            for idx in range(self.args.num_blocks):
                    self.num_tokens += [idx + 1, len(args.rnn_activations)]

            self.func_names = args.rnn_activations
        elif self.args.network_type == 'cnn':
            self.num_tokens = [len(args.cnn_types), self.args.num_blocks]
            self.func_names = args.cnn_types
        else:
            raise Exception('Unknown network type: {self.args.network_type}')

        num_total_tokens = sum(self.num_tokens)

        self.encoder = nn.Embedding(num_total_tokens, args.controller_hid)
        self.lstm = nn.LSTMCell(
                args.controller_hid,
                args.controller_hid)

        pivot = 0
        self.decoders = []

        for idx, size in enumerate(self.num_tokens):
            decoder = nn.Linear(args.controller_hid, size)
            self.decoders.append(decoder)

        self._decoders = nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = keydefaultdict(self.init_hidden)

        fn = lambda key: get_variable(
                t.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False)
        self.static_inputs = keydefaultdict(fn)
示例#14
0
    def __init__(self, args, corpus):
        shared_base.SharedModel.__init__(self)

        self.args = args
        self.corpus = corpus

        self.decoder = torch.nn.Linear(in_features=args.shared_hid,
                                       out_features=corpus.num_tokens)
        self.encoder = EmbeddingDropout(corpus.num_tokens,
                                        args.shared_embed,
                                        dropout=args.shared_dropoute)
        self.lockdrop = LockedDropout()

        ## ???
        if self.args.tie_weights:
            self.decoder.weight = self.encoder.weight

        self.w_xc = torch.nn.Linear(args.shared_embed, args.shared_hid)
        self.w_xh = torch.nn.Linear(args.shared_embed, args.shared_hid)

        # The raw weights are stored here because the hidden-to-hidden weights
        # are weight dropped on the forward pass.
        self.w_hc_raw = torch.nn.Parameter(
            torch.Tensor(args.shared_hid, args.shared_hid))
        self.w_hh_raw = torch.nn.Parameter(
            torch.Tensor(args.shared_hid, args.shared_hid))
        self.w_hc = None
        self.w_hh = None

        self.w_h = collections.defaultdict(dict)
        self.w_c = collections.defaultdict(dict)

        for idx in range(args.num_blocks):
            for jdx in range(idx + 1, args.num_blocks):
                self.w_h[idx][jdx] = torch.nn.Linear(args.shared_hid,
                                                     args.shared_hid,
                                                     bias=False)
                self.w_c[idx][jdx] = torch.nn.Linear(args.shared_hid,
                                                     args.shared_hid,
                                                     bias=False)

        self._w_h = torch.nn.ModuleList(
            [self.w_h[idx][jdx] for idx in self.w_h for jdx in self.w_h[idx]])
        self._w_c = torch.nn.ModuleList(
            [self.w_c[idx][jdx] for idx in self.w_c for jdx in self.w_c[idx]])

        if args.mode == 'train':
            self.batch_norm = torch.nn.BatchNorm1d(args.shared_hid)
        else:
            self.batch_norm = None

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)
示例#15
0
    def __init__(self, args):
        torch.nn.Module.__init__(self)
        self.args = args

        if self.args.network_type == 'rnn':
            self.num_tokens = [len(args.shared_rnn_activations)]
            for idx in range(self.args.num_blocks):
                self.num_tokens += [idx + 1, len(args.shared_rnn_activations)]
            self.func_names = args.shared_rnn_activations
        elif self.args.network_type == 'micro_cnn':

            # First node always take the input to the Cell.
            return
        else:
            raise NotImplementedError(
                f'{self.args.network_type} is not supported yet')

        num_total_tokens = sum(self.num_tokens)

        self.encoder = torch.nn.Embedding(num_total_tokens,
                                          args.controller_hid)
        self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid)

        self.decoders = []
        for idx, size in enumerate(self.num_tokens):
            decoder = torch.nn.Linear(args.controller_hid, size)
            self.decoders.append(decoder)

        self._decoders = torch.nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return utils.get_variable(torch.zeros(key,
                                                  self.args.controller_hid),
                                      self.args.cuda,
                                      requires_grad=False)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
示例#16
0
    def __init__(self, args, corpus):
        super(RNN, self).__init__()

        self.args = args
        self.corpus = corpus

        self.encoder = nn.Embedding(corpus.num_tokens, args.shared_embed)
        self.decoder = nn.Linear(args.shared_hid, corpus.num_tokens)
        self.lockdrop = LockedDropout()

        if self.args.tie_weights:
            self.decoder.weight = self.encoder.weight

        self.w_xh = nn.Linear(args.shared_embed + args.shared_hid,
                              args.shared_hid)
        self.w_xc = nn.Linear(args.shared_embed + args.shared_hid,
                              args.shared_hid)

        self.w_h, self.w_c = defaultdict(dict), defaultdict(dict)

        for idx in range(args.num_blocks):
            for jdx in range(idx + 1, args.num_blocks):
                self.w_h[idx][jdx] = nn.Linear(args.shared_hid,
                                               args.shared_hid,
                                               bias=False)
                self.w_c[idx][jdx] = nn.Linear(args.shared_hid,
                                               args.shared_hid,
                                               bias=False)

        self._w_h = nn.ModuleList(
            [self.w_h[idx][jdx] for idx in self.w_h for jdx in self.w_h[idx]])
        self._w_c = nn.ModuleList(
            [self.w_c[idx][jdx] for idx in self.w_c for jdx in self.w_c[idx]])

        if args.mode == 'train':
            self.batch_norm = nn.BatchNorm1d(args.shared_hid)
        else:
            self.batch_norm = None

        self.reset_parameters()
        self.static_init_hidden = keydefaultdict(self.init_hidden)

        logger.info(f("# of parameters: {format(self.num_parameters, ',d')}"))
示例#17
0
    def __init__(self, args):
        torch.nn.Module.__init__(self)
        self.args = args
        self.num_tokens = []
        self.arch_layer = args.layers + 1
        self.multi_layer = args.multi_layer
        self.every_cell = args.every_cell
        if self.multi_layer or self.every_cell:
            layers = self.arch_layer
        else:
            layers = 1
        for _ in range(layers):
            for idx in range(self.args.num_blocks):
                self.num_tokens += [
                    idx + 1, idx + 1,
                    len(args.shared_cnn_types),
                    len(args.shared_cnn_types)
                ]
        if self.every_cell:
            for _ in range(layers - 1):
                for idx in range(self.args.num_blocks):
                    self.num_tokens += [
                        idx + 1, idx + 1,
                        len(args.shared_cnn_types),
                        len(args.shared_cnn_types)
                    ]

        self.func_names = args.shared_cnn_types

        num_total_tokens = sum(self.num_tokens)

        self.encoder = torch.nn.Embedding(num_total_tokens,
                                          args.controller_hid)

        if self.args.rnn_type == 'lstm':
            self.lstm = []
            for _ in range(self.args.rnn_layer):
                self.lstm.append(
                    torch.nn.LSTMCell(args.controller_hid,
                                      args.controller_hid))
            self._lstm = torch.nn.ModuleList(self.lstm)

        if self.args.rnn_type == 'rnn':
            self.rnn = []
            for _ in range(self.args.rnn_layer):
                self.rnn.append(
                    torch.nn.RNNCell(args.controller_hid, args.controller_hid))
            self._rnn = torch.nn.ModuleList(self.rnn)

        #self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid)

        # TODO(brendan): Perhaps these weights in the decoder should be
        # shared? At least for the activation functions, which all have the
        # same size.
        self.decoders = []
        for _ in range(layers):
            for idx, size in enumerate(self.num_tokens):
                decoder = torch.nn.Linear(args.controller_hid, size)
                self.decoders.append(decoder)

        if self.every_cell:
            for _ in range(layers - 1):
                for idx, size in enumerate(self.num_tokens):
                    decoder = torch.nn.Linear(args.controller_hid, size)
                    self.decoders.append(decoder)

        self._decoders = torch.nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return utils.get_variable(torch.zeros(key,
                                                  self.args.controller_hid),
                                      self.args.cuda,
                                      requires_grad=False)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
示例#18
0
    def __init__(self, args, corpus):
        """
        :param args: 命令行参数
        :param corpus: 数据集
        :properties
            decoder,从1000到10000的映射,一个全链接层
            encoder,一个自定义的EmbeddingDropout层,从10000,到1000的映射,可以设置dropout
            lockdrop,一个单独dropout层,作用?
            args.tie_weights:作用不明,用encode的权重覆盖decoder的权重?这也不是一个网络结构,怎么能覆盖呢?
            w_xc,w_xh,w_hc,w_hh,w_hc_raw,w_hh_raw,w_h,w_c,RNN的参数矩阵这里执行的就是一些初始化的工作
            static_init_hidden,作用,在forward中hidden不存在的时候可以设置一个hidden保证程序执行
        """
        models.shared_base.SharedModel.__init__(self)  #构造父类

        self.args = args
        self.corpus = corpus
        #linear实现从1000到10000的映射,也就是从隐藏维度映射回词的编号
        self.decoder = nn.Linear(
            args.shared_hid, corpus.num_tokens
        )  #shared_hid=1000,corpus.num_tokens=10000,在数据集中一共有10000个不同的词、
        #encoder实现从10000到1000的映射
        self.encoder = EmbeddingDropout(
            corpus.num_tokens,
            args.shared_embed,  #shared_embed=1000隐藏维度
            dropout=args.shared_dropoute)  #shared_dropoute=0.1
        self.lockdrop = LockedDropout()  #一个单独的dropout
        #???
        if self.args.tie_weights:
            self.decoder.weight = self.encoder.weight

        # NOTE(brendan): Since W^{x, c} and W^{h, c} are always summed, there
        # is no point duplicating their bias offset parameter. Likewise for
        # W^{x, h} and W^{h, h}.
        self.w_xc = nn.Linear(args.shared_embed, args.shared_hid)  #(1000,1000)
        self.w_xh = nn.Linear(args.shared_embed, args.shared_hid)

        # The raw weights are stored here because the hidden-to-hidden weights
        # are weight dropped on the forward pass.
        self.w_hc_raw = torch.nn.Parameter(
            torch.Tensor(args.shared_hid, args.shared_hid))
        self.w_hh_raw = torch.nn.Parameter(
            torch.Tensor(args.shared_hid, args.shared_hid))
        self.w_hc = None  #这两个参数是在forward中由w_hc_raw生成而来(dropout而来)
        self.w_hh = None

        self.w_h = collections.defaultdict(
            dict
        )  #collections.defaultdict(function_factory)一个函数工厂,里面的每个对象都是一个dict
        self.w_c = collections.defaultdict(dict)

        for idx in range(args.num_blocks):
            for jdx in range(idx + 1, args.num_blocks):
                #二维字典,形成一个下三角字典矩阵,存储的block的wh和wc
                self.w_h[idx][jdx] = nn.Linear(args.shared_hid,
                                               args.shared_hid,
                                               bias=False)
                self.w_c[idx][jdx] = nn.Linear(args.shared_hid,
                                               args.shared_hid,
                                               bias=False)
        #又把上面的字典矩阵转存到_w_h和_w_c中
        self._w_h = nn.ModuleList(
            [self.w_h[idx][jdx] for idx in self.w_h for jdx in self.w_h[idx]])
        self._w_c = nn.ModuleList(
            [self.w_c[idx][jdx] for idx in self.w_c for jdx in self.w_c[idx]])

        if args.mode == 'train':
            self.batch_norm = nn.BatchNorm1d(args.shared_hid)
        else:
            self.batch_norm = None
        #重置参数
        self.reset_parameters()
        #返回一个字典类keydefaultdict继承自defaultdic,自己实现了__missing__方法,当访问的key没有value的时候用init_hidden来初始化一个value,
        #这个value就是一个全零的Variable
        self.static_init_hidden = utils.keydefaultdict(
            self.init_hidden)  #init_hidden是一个方法,返回一个全零的Variable

        logger.info('# of parameters: {0}'.format(
            format(self.num_parameters, ",d")))
from utils import keydefaultdict

input_path = sys.argv[1]
input_filename = os.path.splitext(input_path)[0]

reader = csv.DictReader(open(input_path))


def make_csv_writer(solution_depth):
    'Uses input_filename and reader.fieldnames globals'
    path = "%s_depth%s.csv" % (input_filename, solution_depth)
    writer = csv.DictWriter(open(path, 'wb'), reader.fieldnames)
    writer.writeheader()
    return writer


gridname_to_csv_writer = keydefaultdict(make_csv_writer)
solution_depth_columns = [
    col_name for col_name in reader.fieldnames
    if col_name.endswith(" Solution Depth")
]

for row in reader:
    for col_name in solution_depth_columns:
        depth = int(row[col_name])
        if depth != -1:
            gridname_to_csv_writer[depth].writerow(row)
            break
    else:
        gridname_to_csv_writer[-1].writerow(row)
示例#20
0
    def __init__(self, args, num_layers=3, skip_conn=False, controller_hid=100, cuda=True, mode="train",
                 softmax_temperature=5.0, tanh_c=2.5):
        torch.nn.Module.__init__(self)
        self.mode = mode
        self.num_layers = num_layers
        self.skip_conn = skip_conn
        self.controller_hid = controller_hid
        self.is_cuda = cuda

        if args and args.softmax_temperature:
            self.softmax_temperature = args.softmax_temperature
        else:
            self.softmax_temperature = softmax_temperature
        if args and args.tanh_c:
            self.tanh_c = args.tanh_c
        else:
            self.tanh_c = tanh_c

        self.num_tokens = []
        state_space_length = []

        if not skip_conn:
            keys = state_space.keys()
            for key in keys:
                state_space_length.append(len(state_space[key]))
            for _ in range(self.num_layers):
                self.num_tokens += state_space_length
        else:
            keys = state_space.keys()
            for idx in range(1, self.num_layers + 1):
                self.num_tokens += [idx]
                for key in keys:
                    self.num_tokens += len(state_space[key])

        num_total_tokens = sum(self.num_tokens)

        self.encoder = torch.nn.Embedding(num_total_tokens,
                                          controller_hid)
        self.lstm = torch.nn.LSTMCell(controller_hid, controller_hid)

        self.decoders = []
        if not skip_conn:
            # share the same decoder
            for idx, size in enumerate(state_space_length):
                decoder = torch.nn.Linear(controller_hid, size)
                self.decoders.append(decoder)
        else:

            state_decoder = []  # shared decoder
            for idx, size in enumerate(state_space_length):
                decoder = torch.nn.Linear(controller_hid, size)
                state_decoder.append(decoder)

            for idx in range(1, self.num_layers + 1):
                # skip_connection
                decoder = torch.nn.Linear(controller_hid, idx)
                self.decoders.append(decoder)
                # common action
                for decoder in state_decoder:
                    self.decoders.append(decoder)

        self._decoders = torch.nn.ModuleList(self.decoders)

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return utils.get_variable(
                torch.zeros(key, controller_hid),
                cuda,
                requires_grad=False)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)
    def __init__(self, args, corpus):
        super(RNN, self).__init__(args)

        self.args = args
        self.corpus = corpus

        self.decoder = nn.Linear(args.shared_hid, corpus.num_tokens)
        self.encoder = EmbeddingDropout(corpus.num_tokens,
                                        args.shared_embed,
                                        dropout=args.shared_dropoute)
        self.lockdrop = LockedDropout()

        if self.args.tie_weights:
            self.decoder.weight = self.encoder.weight

        self.w_xc = nn.Linear(args.shared_embed, args.shared_hid)
        self.w_xh = nn.Linear(args.shared_embed, args.shared_hid)

        # The raw weights are stored here because the hidden-to-hidden weights
        # are weight dropped on the forward pass.
        self.w_hc_raw = torch.nn.Parameter(
            torch.Tensor(args.shared_hid, args.shared_hid))
        self.w_hh_raw = torch.nn.Parameter(
            torch.Tensor(args.shared_hid, args.shared_hid))
        self.w_hc = None
        self.w_hh = None

        # should remove? or keep as it? since it is in the module list.
        self.w_h = collections.defaultdict(dict)
        self.w_c = collections.defaultdict(dict)

        # keep connections for WPL module
        self.connections = dict()

        self.w_h_opt = collections.defaultdict(dict)
        self.w_c_opt = collections.defaultdict(dict)

        self.fisher = {}
        self.fisher['w_h'] = collections.defaultdict(dict)
        self.fisher['w_c'] = collections.defaultdict(dict)

        for idx in range(args.num_blocks):
            for jdx in range(idx + 1, args.num_blocks):
                self.w_h[idx][jdx] = self.connections[node_to_key((idx, jdx, 'h'))] = \
                    nn.Linear(args.shared_hid, args.shared_hid, bias=False)
                self.w_c[idx][jdx] = self.connections[node_to_key((idx, jdx, 'c'))] = \
                    nn.Linear(args.shared_hid, args.shared_hid, bias=False)

        self._w_h = nn.ModuleList(
            [self.w_h[idx][jdx] for idx in self.w_h for jdx in self.w_h[idx]])
        self._w_c = nn.ModuleList(
            [self.w_c[idx][jdx] for idx in self.w_c for jdx in self.w_c[idx]])
        if args.mode == 'train':
            self.batch_norm = nn.BatchNorm1d(args.shared_hid)
        else:
            self.batch_norm = None

        self.reset_parameters()
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        self.wpl_monitored_modules = self.connections

        logger.info(f'# of parameters: {format(self.num_parameters, ",d")}')