def build_cell_stems(self, in_shapes:TensorShapesList, conf_cell:Config, cell_index:int)\ ->Tuple[TensorShapes, List[OpDesc]]: # expect two stems, both should have same channels # TODO: support multiple stems assert len( in_shapes ) >= 2, "we must have outputs from at least two previous modules" # Get channels for previous two layers. # At start we have only one layer, i.e., model stems. # Typically model stems should have same channel count but for imagenet we do # reduction at model stem so stem1 will have twice channels as stem0 p_ch_out = self.get_ch(in_shapes[-1][0]) pp_ch_out = self.get_ch(in_shapes[-2][0]) # was the previous layer reduction layer? reduction_p = p_ch_out == pp_ch_out * 2 or in_shapes[-2][0][ 2] == in_shapes[-1][0][2] * 2 # find out the node channels for this cell node_ch_out = self.node_channels[cell_index][ 0] # init with first node in cell # Cell stemps will take prev channels and out sameput channels as nodes would. # If prev cell was reduction then we need to increase channels of prev-prev # by 2X. This is done by prepr_reduce stem. s0_op = OpDesc( 'prepr_reduce' if reduction_p else 'prepr_normal', params={'conv': ConvMacroParams(pp_ch_out, node_ch_out)}, in_len=1, trainables=None) s1_op = OpDesc('prepr_normal', params={'conv': ConvMacroParams(p_ch_out, node_ch_out)}, in_len=1, trainables=None) # output two shapes with proper channels setup # for default model desc, cell stems have same shapes and channels out_shape0 = copy.deepcopy(in_shapes[-1][0]) # set channels and reset shapes to -1 to indicate unknown # for imagenet HxW would be floating point numbers due to one input reduced out_shape0[0], out_shape0[2], out_shape0[3] = node_ch_out, -1, -1 out_shape1 = copy.deepcopy(out_shape0) return [out_shape0, out_shape1], [s0_op, s1_op]
def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config, cell_index:int, cell_type:CellType, node_count:int, in_shape:TensorShape, out_shape:TensorShape) \ ->Tuple[TensorShapes, List[NodeDesc]]: assert in_shape[0] == out_shape[0] reduction = (cell_type == CellType.Reduction) nodes: List[NodeDesc] = [] conv_params = ConvMacroParams(in_shape[0], out_shape[0]) # add div op for each edge in each node # how does the stride works? For all ops connected to s0 and s1, we apply # reduction in WxH. All ops connected elsewhere automatically gets # reduced WxH (because all subsequent states are derived from s0 and s1). # Note that channel is increased via conv_params for the cell for i in range(node_count): edges = [] for j in range(i + 2): op_desc = OpDesc('div_op', params={ 'conv': conv_params, 'stride': 2 if reduction and j < 2 else 1 }, in_len=1, trainables=None, children=None) edge = EdgeDesc(op_desc, input_ids=[j]) edges.append(edge) nodes.append(NodeDesc(edges=edges, conv_params=conv_params)) out_shapes = [copy.deepcopy(out_shape) for _ in range(node_count)] return out_shapes, nodes
def build_model_stems(self, in_shapes:TensorShapesList, conf_model_desc:Config)->List[OpDesc]: # TODO: why do we need stem_multiplier? # TODO: in original paper stems are always affine conf_model_stems = self.get_conf_model_stems() init_node_ch:int = conf_model_stems['init_node_ch'] stem_multiplier:int = conf_model_stems['stem_multiplier'] ops:List[str] = conf_model_stems['ops'] out_channels = init_node_ch*stem_multiplier conv_params = ConvMacroParams(self.get_ch(in_shapes[-1][0]), # channels of first input tensor init_node_ch*stem_multiplier) stems = [OpDesc(name=op_name, params={'conv': conv_params}, in_len=1, trainables=None) \ for op_name in ops] # get reduction factors done by each stem, typically they should be same but for # imagenet they can differ stem_reductions = ModelDescBuilder._stem_reductions(stems) # Each cell takes input from previous and 2nd previous cells. # To be consistence we create two outputs for model stems: [[s1, s0], [s0, s1] # This way when we access first element of each output we get s1, s0. # Normailly s0==s1 but for networks like imagenet, s0 will have twice the channels # of s1. for stem_reduction in stem_reductions: in_shapes.append([[out_channels, -1, -1.0/stem_reduction, -1.0/stem_reduction]]) return stems
def build_nodes_from_template(self, stem_shapes:TensorShapes, conf_cell:Config, cell_index:int) \ ->Tuple[TensorShapes, List[NodeDesc]]: cell_template = self.get_cell_template(cell_index) assert cell_template is not None cell_type = self.get_cell_type(cell_index) assert cell_template.cell_type==cell_type nodes:List[NodeDesc] = [] for n in cell_template.nodes(): edges_copy = [e.clone( # use new macro params conv_params=ConvMacroParams(self.get_ch(stem_shapes[0]), self.get_ch(stem_shapes[0])), # TODO: check for compatibility? clear_trainables=True ) for e in n.edges] nodes.append(NodeDesc(edges=edges_copy, conv_params=n.conv_params)) out_shapes = [copy.deepcopy(stem_shapes[0]) for _ in cell_template.nodes()] return out_shapes, nodes
def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config, cell_index:int, cell_type:CellType, node_count:int, in_shape:TensorShape, out_shape:TensorShape) \ ->Tuple[TensorShapes, List[NodeDesc]]: assert in_shape[0]==out_shape[0] reduction = (cell_type==CellType.Reduction) nodes:List[NodeDesc] = [] conv_params = ConvMacroParams(in_shape[0], out_shape[0]) # add xnas op for each edge for i in range(node_count): edges=[] for j in range(i+2): op_desc = OpDesc('xnas_op', params={ 'conv': conv_params, 'stride': 2 if reduction and j < 2 else 1 }, in_len=1, trainables=None, children=None) edge = EdgeDesc(op_desc, input_ids=[j]) edges.append(edge) nodes.append(NodeDesc(edges=edges, conv_params=conv_params)) out_shapes = [copy.deepcopy(out_shape) for _ in range(node_count)] return out_shapes, nodes
def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config, cell_index:int, cell_type:CellType, node_count:int, in_shape:TensorShape, out_shape:TensorShape) \ ->Tuple[TensorShapes, List[NodeDesc]]: assert in_shape[0]==out_shape[0] nodes:List[NodeDesc] = [] conv_params = ConvMacroParams(in_shape[0], out_shape[0]) for i in range(node_count): edges = [] input_ids = [] first_proj = False # if input node is connected then it needs projection if self._cell_matrix[0, i+1]: # nadbench internal node starts at 1 input_ids.append(0) # connect to s0 first_proj = True for j in range(i): # look at all internal vertex before us if self._cell_matrix[j+1, i+1]: # if there is connection input_ids.append(j+2) # offset because of s0, s1 op_desc = OpDesc('nasbench101_op', params={ 'conv': conv_params, 'stride': 1, 'vertex_op': self._vertex_ops[i+1], # offset because of input node 'first_proj': first_proj }, in_len=len(input_ids), trainables=None, children=None) # TODO: should we pass children here? edge = EdgeDesc(op_desc, input_ids=input_ids) edges.append(edge) nodes.append(NodeDesc(edges=edges, conv_params=conv_params)) out_shapes = [copy.deepcopy(out_shape) for _ in range(node_count)] return out_shapes, nodes
def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config, cell_index:int, cell_type:CellType, node_count:int, in_shape:TensorShape, out_shape:TensorShape) \ ->Tuple[TensorShapes, List[NodeDesc]]: assert in_shape[0] == out_shape[0] reduction = (cell_type == CellType.Reduction) ops = self._reduction_ops if reduction else self._normal_ops assert node_count == len(ops.ops_and_ins) nodes: List[NodeDesc] = [] conv_params = ConvMacroParams(in_shape[0], out_shape[0]) for op_names, to_states in ops.ops_and_ins: edges = [] # add random edges for op_name, to_state in zip(op_names, to_states): op_desc = OpDesc(op_name, params={ 'conv': conv_params, 'stride': 2 if reduction and to_state < 2 else 1 }, in_len=1, trainables=None, children=None) edge = EdgeDesc(op_desc, input_ids=[to_state]) edges.append(edge) nodes.append(NodeDesc(edges=edges, conv_params=conv_params)) out_shapes = [copy.deepcopy(out_shape) for _ in range(node_count)] return out_shapes, nodes
def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config, cell_index:int, cell_type:CellType, node_count:int, in_shape:TensorShape, out_shape:TensorShape) \ ->Tuple[TensorShapes, List[NodeDesc]]: # For petridish we add one node with identity to s1. # This will be our seed model to start with. # Later in PetridishSearcher, we will add one more node in parent after each sampling. assert in_shape[0] == out_shape[0] reduction = (cell_type == CellType.Reduction) # channels for conv filters conv_params = ConvMacroParams(in_shape[0], out_shape[0]) # identity op to connect S1 to the node op_desc = OpDesc('skip_connect', params={ 'conv': conv_params, 'stride': 2 if reduction else 1 }, in_len=1, trainables=None, children=None) edge = EdgeDesc(op_desc, input_ids=[1]) new_node = NodeDesc(edges=[edge], conv_params=conv_params) nodes = [new_node] # each node has same out channels as in channels out_shapes = [copy.deepcopy(out_shape) for _ in nodes] return out_shapes, nodes
def build_model_pool(self, in_shapes:TensorShapesList, conf_model_desc:Config)\ ->OpDesc: model_post_op = conf_model_desc['model_post_op'] last_shape = in_shapes[-1][0] in_shapes.append([copy.deepcopy(last_shape)]) return OpDesc(model_post_op, params={'conv': ConvMacroParams(last_shape[0], last_shape[0])}, in_len=1, trainables=None)
def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config, cell_index:int, cell_type:CellType, node_count:int, in_shape:TensorShape, out_shape:TensorShape) \ ->Tuple[TensorShapes, List[NodeDesc]]: # default: create nodes with empty edges nodes:List[NodeDesc] = [NodeDesc(edges=[], conv_params=ConvMacroParams( in_shape[0], out_shape[0])) for _ in range(node_count)] out_shapes = [copy.deepcopy(out_shape) for _ in range(node_count)] return out_shapes, nodes
def build_cell_post_op(self, stem_shapes:TensorShapes, node_shapes:TensorShapes, conf_cell:Config, cell_index:int)\ -> Tuple[TensorShape, OpDesc]: post_op_name = conf_cell['cell_post_op'] op_ch_in, cell_ch_out, out_states = self._post_op_ch(post_op_name, node_shapes) post_op_desc = OpDesc(post_op_name, { 'conv': ConvMacroParams(op_ch_in, cell_ch_out), 'out_states': out_states }, in_len=1, trainables=None, children=None) out_shape = copy.deepcopy(node_shapes[-1]) out_shape[0] = cell_ch_out return out_shape, post_op_desc
def __init__(self, op_desc: OpDesc, affine: bool) -> None: """MultiOp combines multiple ops to one op. The set of ops to combine if passed through op_desc.children and each of children's inputs are passed through op_desc.children_ins. This op will receive list of inputs in forward() and each of the children works on one of these inputs and generates an output. All outputs of children are then combined using projection operation to produce final output of the overall op. """ super().__init__() # get list of inputs and associated primitives iop_descs = op_desc.children ins = op_desc.children_ins assert iop_descs is not None and ins is not None and len( iop_descs) == len(ins) # conv params typically specified by macro builder conv_params: ConvMacroParams = op_desc.params['conv'] self._ops = nn.ModuleList() self._ins: List[int] = [] for i, iop_desc in zip(ins, iop_descs): iop_desc.params['conv'] = conv_params self._ops.append(Op.create(iop_desc, affine=affine)) self._ins.append(i) # number of channels as we will concate output of ops ch_out_sum = conv_params.ch_out * len(self._ins) ch_adj_desc = OpDesc( 'proj_channels', { 'conv': ConvMacroParams(ch_out_sum, conv_params.ch_out), 'out_states': len(self._ins) }, in_len=1, trainables=None, children=None) self._ch_adj = Op.create(ch_adj_desc, affine=affine)