def test_db_sanity_last_n(self): print() db_path = './profile_data/Sin_in-1-out-1/' db_name = 'ann.db' db.interact.sanity_last_n_commit('meta|ann',db_name=db_name, db_path=db_path) db.interact.sanity_last_n_commit(db_name=db_name, db_path=db_path) printf('CHECK THE DB YOURSELF TO SEE IF TEST IS PASSED!', type='WARN', separator='*')
def test_db_interact(self): print() db_path = './profile_data/Sin_in-1-out-1/' db_name = 'ann.db' meta_table = 'meta|ann' data_table = 'profile_cost|ann' db.interact.db_control_dim(meta_table, data_table, 'learn_rate', db_path=db_path, db_name=db_name) printf('CHECK THE DB YOURSELF TO SEE IF TEST IS PASSED!', type='WARN', separator='*')
def test_db_basic(self): print() db_name = 'cool.db' table_name = 'stuff' db_path = './unittest/db/basic/' db.basic.populate_db(['this','is','cool'], ['INTEGER','TEXT','REAL'], 0, [['zero',0.],['two',0.2],['one',0.1]], db_path=db_path, db_name=db_name, table_name=table_name) db.basic.sanity_db('is', ['zero'], 'stuff', db_name=db_name, db_path=db_path) db.basic.sanity_db(['is'], 'zero', 'stuff', db_name=db_name, db_path=db_path) db.basic.sanity_db('shhhh', 'zero', 'stuff', db_name=db_name, db_path=db_path) printf('CHECK THE DB YOURSELF TO SEE IF TEST IS PASSED!', type='WARN', separator='*')
def load_as_array(db_fullpath, table, attr_list, size=-1, c=None): """ load data from db as numpy array attr name in attr_list may or may not be surrounded by '[' and ']' """ conn = None if not c: conn = sqlite3.connect(db_fullpath) c = conn.cursor() attr_list = surround_by_brackets(attr_list) table = surround_by_brackets(table) tot_num = count_entry(db_fullpath, table, c=c) size = (size==-1) and tot_num or size if tot_num < size: printf('db don\'t have enough entries to load!', type='ERROR') c.execute('SELECT {} FROM {} LIMIT {}'.format(','.join(attr_list), table, size)) ret = np.array(list(c.fetchall())) if conn: conn.close() return ret
def sanity_last_n_commit(*table, num_run=1, db_name=DB_NAME, db_path=DB_DIR_PARENT, time_attr=TIME_ATTR): """ delete the entries with the latest populate_time, for all tables with the time attr ARGUMENTS: table if table=(), then delete entries for all tables, otherwise only delete for that in *table num_run delete entries with the last (num_run) populate time time_attr the name of the time attribute """ db_fullpath = '{}/{}'.format(db_path, db_name) conn = sqlite3.connect(db_fullpath) c = conn.cursor() if len(table) == 0: table = list(c.execute('SELECT name FROM sqlite_master WHERE type=\'table\'')) table = list(map(lambda x: '[{}]'.format(x[0]), table)) else: table = list(map(lambda x: '[{}]'.format(x), table)) # fliter table list to those actually contains the time_attr table_flt = [] for tbl in table: tbl_attr = list(get_attr_info(tbl, enclosing=False, db_fullpath=db_fullpath).keys()) if time_attr in tbl_attr: table_flt += [tbl] time_attr = surround_by_brackets(time_attr) time_set = set() for tbl in table_flt: cur_time_set = set(c.execute('SELECT DISTINCT {} FROM {}'.format(time_attr, tbl))) time_set |= set(map(lambda x: x[0], cur_time_set)) conn.close() time_len = len(time_set) num_run = (num_run>time_len) and time_len or num_run time_list = sorted(list(time_set))[time_len-num_run:] for tbl in table_flt: for t in time_list: sanity_db(time_attr[1:-1], t, tbl[1:-1], db_name=db_name, db_path=db_path) printf('Done: cleared last {} commits for {}'.format(num_run, table_flt)) bad_table = set(table) - set(table_flt) if bad_table: printf('tables {} don\'t have attr {}', bad_table, time_attr, type='WARN')
def _load_db(self, yaml_model, timestamp, profile=True): """ load data from sqlite3 db: suitable for 1D objective function """ db_dir = conf.TRAINING_DIR db_name = yaml_model['data_path'] db_table = yaml_model['data_table'] db_fullpath = '{}/{}'.format(db_dir, db_name) data_size = yaml_model['data_size'] test_size = yaml_model['test_size'] start_time = timeit.default_timer() conn = sqlite3.connect(db_fullpath) c = conn.cursor() end_time = timeit.default_timer() printf('time spent on db connection: {:.3f}', end_time-start_time) # check table exists start_time = timeit.default_timer() if not db.util.is_table_exist(db_fullpath, db_table, c=c): printf('table not exist: {}\npath: {}', db_table, db_fullpath, type='ERROR') exit() # setup x,y attr name list data_attr = list(db.util.get_attr_info(db_table, c=c, enclosing=False).keys()) regex_x = re.compile('^x\d+$') regex_y = re.compile('^y\d+$') attr_x = sorted([itm for itm in data_attr if regex_x.match(itm)]) attr_y = sorted([itm for itm in data_attr if regex_y.match(itm)]) # load from db data_entire = db.util.load_as_array(db_fullpath, db_table, attr_x, size=(data_size+test_size), c=c) self.data = data_entire[0:data_size] self.test_d = data_entire[-test_size::] target_entire = db.util.load_as_array(db_fullpath, db_table, attr_y, size=(data_size+test_size), c=c) self.target = target_entire[0:data_size] self.test_t = target_entire[-test_size::] end_time = timeit.default_timer() printf('time spent on load data: {:.3f}', end_time-start_time) # store raw into profile db prof_subdir = yaml_model['obj_name'] if profile: # don't store y: they will be stored when training starts start_time = timeit.default_timer() regex_xothers = re.compile('^x.*$') attr_xothers = [itm for itm in data_attr if regex_xothers.match(itm) and itm not in attr_x] xothers = db.util.load_as_array(db_fullpath, db_table, attr_xothers, size=data_size, c=c) attr_full = attr_x + attr_xothers data_util.profile_input_data(prof_subdir, timestamp, attr_full, self.data, xothers) end_time = timeit.default_timer() printf('time spent on storing training data into db: {:.3f}', end_time-start_time) conn.close()
def test_lin3_lin2_single_tuple(self, verbose=True): net = Net_structure([3,2], [Node_linear], Cost_sqr) net.set_w_b([array(range(6)).reshape(3,2)], [zeros((1,2))]) if verbose: printf('INITIAL NET') printf(net, type=None, separator=None) data = array([[11,22,33], [-1, 2, 82]]) net_op = array([[176., 242.], [332., 415.]]) target = array([[5.,4.], [2., 5.]]) test_net_op = net.net_act_forward(data) printf('net forward output') printf(test_net_op, type=None, separator=None) assert_array_equal(net_op, test_net_op, ACT_FORWARD_FAIL) self.assertSequenceEqual(net_op.shape, test_net_op.shape, ACT_FORWARD_FAIL) assert_array_equal(net.cost.act_forward(net_op, target), [42942.5, 138500.]) assert_array_equal(net.cost.c_d_y(net_op, target), array([[171., 238.], [330., 410.]])) assert_array_equal(net.activ_list[0].yn_d_yn1(net.y_list[1], net.w_list[0]), array([net.w_list[0], net.w_list[0]]))
def test_conv(self): print() image_path_gs = './test/grayscale.jpg' output_path_gs = './test/output_grayscale.jpg' image_path_rgb = './test/rgb.jpg' output_path_rgb = './test/output_rgb.jpg' kernel_core = np.array([[0,-1,0],[-1,5,-1],[0,-1,0]]) conv_layer = conv.Node_conv from PIL import Image printf('conv grayscale') layer_img_gs = np.asarray(Image.open(image_path_gs)) Y,X = layer_img_gs.shape C = 1 layer_img_gs = layer_img_gs.reshape(Y,X,C) layer_img_gs = layer_img_gs.transpose((2,0,1)).reshape(1,C,Y,X) kernel_gs = np.zeros((1,1,3,3)) kernel_gs[0,0,:,:] = kernel_core output_img_gs = conv_layer.act_forward(layer_img_gs, np.swapaxes(kernel_gs,0,1), 1, 1) output_img_gs = output_img_gs.reshape(C,Y,X).transpose(1,2,0) output_img_gs = output_img_gs.reshape(Y,X) Image.fromarray(np.uint8(output_img_gs.clip(0,255))).save(output_path_gs) printf('conv rgb') layer_img_rgb = np.array(Image.open(image_path_rgb)) Y,X,C = layer_img_rgb.shape layer_img_rgb = layer_img_rgb.transpose((2,0,1)).reshape(1,C,Y,X) kernel_rgb = np.zeros((3,3,3,3)) kernel_rgb[0,0,:,:] = kernel_core kernel_rgb[1,1,:,:] = kernel_core kernel_rgb[2,2,:,:] = kernel_core output_img_rgb = conv_layer.act_forward(layer_img_rgb, np.swapaxes(kernel_rgb,0,1), 1, 1) output_img_rgb = output_img_rgb.reshape(C,Y,X).transpose(1,2,0) Image.fromarray(np.uint8(output_img_rgb.clip(0,255))).save(output_path_rgb)
def trainingFunc(funcName, input_size, output_size): """ first class function: return a closure of the actual training function argument: funcName the description of how the training function should behave return: the actual training function """ attr_list = ["x{}".format(i) for i in range(input_size)] attr_list += ["y{}".format(i) for i in range(output_size)] if funcName == "sigmoid": if output_size > 1: printf(_OP_SIZE_ERR, funcName, type="ERROR") return def sigmoid(xList, output_range): """ simple sigmoid function: most suitable for neuron of sigmoid activation can be treated as baseline """ op_avg = (output_range[0] + output_range[1]) / 2 op_rag = output_range[1] - output_range[0] xAvg = reduce(lambda x1, x2: x1 + x2, xList) / len(xList) return xList + [op_rag * (1 / (1 + exp(-xAvg) - 0.5)) + op_avg] + [xAvg] attr_list += ["xAvg"] return sigmoid, attr_list elif funcName == "lin": if output_size > 1: printf(_OP_SIZE_ERR, funcName, type="ERROR") return def lin(xList, output_range): assert len(xList) >= 1 x_lin = reduce(lambda x1, x2: xList.index(x1) * x1 + xList.index(x2) * x2, xList) return xList + [4 * x_lin] + [x_lin] attr_list += ["xLin"] return lin, attr_list elif funcName == "sin": """ simple sin function """ if output_size > 1: printf(_OP_SIZE_ERR, funcName, type="ERROR") return def sine(xList, output_range): assert len(xList) >= 1 op_avg = (output_range[0] + output_range[1]) / 2 op_rag = output_range[1] - output_range[0] sinAvg = reduce(lambda x1, x2: x1 + x2, xList) / len(xList) return xList + [op_rag * sin(sinAvg) + op_avg] + [sinAvg] attr_list += ["xAvg"] return sine, attr_list elif funcName == "random": def rand(xList, output_range): return xList + [uniform(output_range[0], output_range[1])] return rand, attr_list elif funcName == "ann": """ output is generated by the ANN, and the data is intended to be learned by ANN in return """ def forwardANN(xList, struct, activ_list, cost_type): net = Net_structure(struct, [activation_dict[n] for n in activ_list], cost_dict[cost_type]) w_list = [] b_list = [] for l in range(len(struct) - 1): w_list += [ (array(range(struct[l] * struct[l + 1])).reshape(struct[l], struct[l + 1]) + float(l)) / 100.0 ] b_list += [(array(range(struct[l + 1])) - float(l)) / 100.0] net.set_w_b(w_list, b_list) return xList + list(net.net_act_forward(array(xList))) return forwardANN, attr_list
'learn_rate', 'inc_rate', 'dec_rate', 'momentum'] def parse_args(): parser = argparse.ArgumentParser('db analysis, specific for ANN application') parser.add_argument('-t', '--data_table', type=str, metavar='DATA', default=_DATA_TABLE, choices=_DATA_TABLE_CHOICES, help='table containing data info of training') parser.add_argument('-atr', '--attr_list', type=str, metavar='ATR', nargs='+', choices=_ATTR_RANGE, help='want to control what variables?') parser.add_argument('-d', '--db_name', type=str, metavar='DB_NAME', default='ann.db', help='provide the name of db file. e.g.: ann.b') return parser.parse_args() if __name__ == '__main__': args = parse_args() if args.data_table == 'profile_cost|ann': temp_table = ANALYSIS_TABLE_COST elif args.data_table == 'output_data|ann': temp_table = ANALYSIS_TABLE_OUTPUT else: temp_table = ANALYSIS_TABLE printf('table is neither cost nor output. DOUBLE CHECK!', type='WARN') db_control_dim(_META_TABLE, args.data_table, *args.attr_list, temp_table=temp_table, db_name=args.db_name) if args.data_table == 'output_data|ann': join_input_output_table(ANALYSIS_DB, DB_NAME)