def MakeMade(scale, cols_to_train, seed, fixed_ordering=None): if args.inv_order: print('Inverting order!') fixed_ordering = InvertOrder(fixed_ordering) model = made.MADE( nin=len(cols_to_train), hidden_sizes=[scale] * args.layers if args.layers > 0 else [512, 256, 512, 128, 1024], nout=sum([c.DistributionSize() for c in cols_to_train]), input_bins=[c.DistributionSize() for c in cols_to_train], input_encoding=args.input_encoding, output_encoding=args.output_encoding, embed_size=32, seed=seed, do_direct_io_connections=args.direct_io, natural_ordering=False if seed is not None and seed != 0 else True, residual_connections=args.residual, fixed_ordering=fixed_ordering, column_masking=args.column_masking, ).to(DEVICE) return model
def MakeMade( table, scale, layers, cols_to_train, seed, factor_table=None, fixed_ordering=None, special_orders=0, order_content_only=True, order_indicators_at_front=True, inv_order=True, residual=True, direct_io=True, input_encoding='embed', output_encoding='embed', embed_size=32, dropout=True, grouped_dropout=False, per_row_dropout=False, fixed_dropout_ratio=False, input_no_emb_if_leq=False, embs_tied=True, resmade_drop_prob=0., # Join specific: num_joined_tables=None, table_dropout=None, table_num_columns=None, table_column_types=None, table_indexes=None, table_primary_index=None, # DMoL num_dmol=0, scale_input=False, dmol_cols=[]): dmol_col_indexes = [] if dmol_cols: for i in range(len(cols_to_train)): if cols_to_train[i].name in dmol_cols: dmol_col_indexes.append(i) model = made.MADE( nin=len(cols_to_train), hidden_sizes=[scale] * layers if layers > 0 else [512, 256, 512, 128, 1024], nout=sum([c.DistributionSize() for c in cols_to_train]), num_masks=max(1, special_orders), natural_ordering=True, input_bins=[c.DistributionSize() for c in cols_to_train], do_direct_io_connections=direct_io, input_encoding=input_encoding, output_encoding=output_encoding, embed_size=embed_size, input_no_emb_if_leq=input_no_emb_if_leq, embs_tied=embs_tied, residual_connections=residual, factor_table=factor_table, seed=seed, fixed_ordering=fixed_ordering, resmade_drop_prob=resmade_drop_prob, # Wildcard skipping: dropout_p=dropout, fixed_dropout_p=fixed_dropout_ratio, grouped_dropout=grouped_dropout, learnable_unk=True, per_row_dropout=per_row_dropout, # DMoL num_dmol=num_dmol, scale_input=scale_input, dmol_col_indexes=dmol_col_indexes, # Join support. num_joined_tables=num_joined_tables, table_dropout=table_dropout, table_num_columns=table_num_columns, table_column_types=table_column_types, table_indexes=table_indexes, table_primary_index=table_primary_index, ).to(train_utils.get_device()) if special_orders > 0: orders = [] if order_content_only: print('Leaving out virtual columns from orderings') cols = [c for c in cols_to_train if not c.name.startswith('__')] inds_cols = [ c for c in cols_to_train if c.name.startswith('__in_') ] num_indicators = len(inds_cols) num_content, num_virtual = len( cols), len(cols_to_train) - len(cols) # Data: { content }, { indicators }, { fanouts }. for i in range(special_orders): rng = np.random.RandomState(i + 1) content = rng.permutation(np.arange(num_content)) inds = rng.permutation( np.arange(num_content, num_content + num_indicators)) fanouts = rng.permutation( np.arange(num_content + num_indicators, len(cols_to_train))) if order_indicators_at_front: # Model: { indicators }, { content }, { fanouts }, # permute each bracket independently. order = np.concatenate( (inds, content, fanouts)).reshape(-1, ) else: # Model: { content }, { indicators }, { fanouts }. # permute each bracket independently. order = np.concatenate( (content, inds, fanouts)).reshape(-1, ) assert len(np.unique(order)) == len(cols_to_train), order orders.append(order) else: # Permute content & virtual columns together. for i in range(special_orders): orders.append( np.random.RandomState(i + 1).permutation( np.arange(len(cols_to_train)))) if factor_table: # Correct for subvar ordering. for i in range(special_orders): # This could have [..., 6, ..., 4, ..., 5, ...]. # So we map them back into: # This could have [..., 4, 5, 6, ...]. # Subvars have to be in order and also consecutive order = orders[i] for orig_col, sub_cols in factor_table.fact_col_mapping.items( ): first_subvar_index = cols_to_train.index(sub_cols[0]) print('Before', order) for j in range(1, len(sub_cols)): subvar_index = cols_to_train.index(sub_cols[j]) order = np.delete(order, np.argwhere(order == subvar_index)) order = np.insert( order, np.argwhere(order == first_subvar_index)[0][0] + j, subvar_index) orders[i] = order print('After', order) print('Special orders', np.array(orders)) if inv_order: for i, order in enumerate(orders): orders[i] = np.asarray(utils.InvertOrder(order)) print('Inverted special orders:', orders) model.orderings = orders return model