def main2(): dnn = DNN(input=28 * 28, layers=[DropoutLayer(160, LQ), Layer(10, LCE)], eta=0.05, lmbda=1) # 98% dnn.initialize_rand() train, test, vadilation = load_mnist_simple() f_names = [f'mnist_expaned_k0{i}.pkl.gz' for i in range(50)] shuffle(f_names) for f_name in f_names: print(f_name) with timing("load"): raw_data = load_data(f_name) with timing("shuffle"): shuffle(raw_data) with timing("reshape"): data = [(x.reshape((784, 1)), y) for x, y in islice(raw_data, 100000)] del raw_data with timing("learn"): dnn.learn(data) del data print('TEST:', dnn.test(test))
def main(self) -> int: input_lines = self.parse_input() with timing(): print(f"Part 1 (original): {self.compute_1(input_lines)}") for name, fn in self._alternate_solutions_1: with timing(): print(f"Part 1 ({name}): {fn(input_lines)}") with timing(): print(f"Part 2 (original): {self.compute_2(input_lines)}") for name, fn in self._alternate_solutions_2: with timing(): print(f"Part 2 ({name}): {fn(input_lines)}") return 0
def __init__(self, config_file, model_file, weights_file): with open(config_file) as f: self.config = json.load(f) self.maxlen = self.config.get('MAXLEN', 32) self.invert = self.config.get('INVERT', True) self.ngram = self.config.get('NGRAM', 5) self.pad_words_input = self.config.get('PAD_WORDS_INPUT', True) self.codec = CharacterCodec(utils.ALPHABET, self.maxlen) if self.config.get('BASE_CODEC_INPUT', False): self.input_codec = CharacterCodec(utils.BASE_ALPHABET, self.maxlen) else: self.input_codec = self.codec with utils.timing('Create model'): with open(model_file) as f: self.model = model_from_json(f.read()) with utils.timing('Compile model'): self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) with utils.timing('Load weights'): self.model.load_weights(weights_file)
def main(): train, test, vadilation = load_mnist_simple() # x, y = train[0] # print("x: ", x.shape) # print("y: ", y) with timing(f""): # dnn = DNN(input=28 * 28, layers=[Layer(30, LQ), Layer(10, LCE)], eta=0.05) # 96% # dnn = DNN(input=28 * 28, layers=[Layer(30, LQ), Layer(10, SM)], eta=0.001) # 68% # dnn = DNN(input=28 * 28, layers=[Layer(100, LQ), Layer(10, LCE)], eta=0.05, lmbda=5) # 98% # dnn = DNN(input=28 * 28, layers=[DropoutLayer(100, LQ), Layer(10, LCE)], eta=0.05) # 97.5% dnn = DNN(input=28 * 28, layers=[DropoutLayer(160, LQ), Layer(10, LCE)], eta=0.05, lmbda=3) dnn.initialize_rand() dnn.learn(train, epochs=30, test=vadilation, batch_size=29) print('test:', dnn.test(test)) print(dnn.stats())
def prepare_datasets(dataset_name: str, working_folder: str, s3_bucket: str = None, s3_folder: str = None, target: str = None, headers: bool = False, verbose: bool = False, clean: bool = False, preprocessor_name: str = None, scale: bool = False, categ_encoding: bool = False, categ_features: list = None): # timing start_time = time.time() latest_time = start_time preproc_class = get_preprocessor_class(preprocessor_name) preprocessor = preproc_class(target, categ_features) # loading logging.info('Loading raw dataset: {}...'.format(dataset_name)) frame = load_raw_dataset(dataset_name, target, preproc_class, working_folder, s3_bucket, s3_folder, headers, verbose, clean) preprocessor.initialize(frame) latest_time = timing('loading data', latest_time) # pre-processing logging.info('Pre-processing...') frame = preprocessor.pre_process(frame) latest_time = timing('pre-processing', latest_time) # splitting logging.info('Splitting dataset...') frames_dict = preproc_class.split_datasets(frame, [("train", TRAIN_PROPORTION), ("valid", VALID_PROPORTION), ("test", TEST_PROPORTION)], target) latest_time = timing('splitting', latest_time) # normalizing if scale: logging.info('Normalizing datasets...') frames_dict = preprocessor.normalize(frames_dict) latest_time = timing('normalizing', latest_time) # one-hot-encoding if categ_encoding: logging.info('Encoding categorical features...') frames_dict = preprocessor.categorical_encoding(frames_dict) latest_time = timing('encoding', latest_time) preprocessor.log_dataset_features(frames_dict) # saving logging.info('Saving datasets...') save_datasets(frames_dict, working_folder, dataset_name + '_preprocessed', s3_bucket, s3_folder, verbose) latest_time = timing('saving', latest_time) timing('total time', start_time)
def train_level_model(train_args) -> None: # timing start_time = time.time() latest_time = start_time # destination folder and files timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3] training_job_common_dir = os.path.join(train_args.working_folder, train_args.dataset_name, train_args.config_id) training_job_dir = os.path.join(training_job_common_dir, timestamp) if not os.path.exists(training_job_dir): logging.info('creating training job dir: {}'.format(training_job_dir)) os.makedirs(training_job_dir) # set logging fh = logging.FileHandler(os.path.join(training_job_dir, 'logging.log')) fh.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s:%(levelname)s:%(filename)s:%(funcName)s:%(lineno)d: %(message)s' ) fh.setFormatter(formatter) logging.getLogger().addHandler(fh) # loading model parameters and performing feature selection logging.info('Loading training parameters...') logging.info('Training config file: {}'.format(train_args.config_filename)) logging.info('Training model id: {}'.format(train_args.config_id)) config = load_config('model_{}.yaml'.format(train_args.config_filename)) features, target, algo, hyperparameters = get_training_config( config, train_args.config_id) infra_s3 = load_infra_s3(args.infra_s3) infra_sm = load_infra_sm(args.infra_sm) logging.info('Infra S3: {}'.format(infra_s3)) logging.info('Infra SageMaker: {}'.format(infra_sm)) logging.info( 'Selected features: {}'.format(features if features else 'all')) logging.info('Target: {}'.format(target)) if train_args.model_id: logging.info('AWS model ID: {}'.format(train_args.model_id)) # initializing model object logging.info('Initializing model object...') if 'sklearn' in algo.lower(): model = eval(algo)(train_args.dataset_name, hyperparameters, infra_s3, features, target, train_args.data_folder, training_job_dir, train_args.clean, train_args.model_id) elif 'aws' in algo.lower(): if infra_s3 is None or infra_sm is None: raise ValueError( 'Parameters --infra-s3 and --infra-sm are required for SageMaker algorithms' ) model = eval(algo)(train_args.dataset_name, hyperparameters, infra_s3, infra_sm, features, target, train_args.data_folder, training_job_common_dir, training_job_dir, train_args.model_id, train_args.clean) elif 'h2o' in algo.lower(): model = eval(algo)(train_args.dataset_name, hyperparameters, infra_s3, features, target, train_args.h2o, train_args.data_folder, training_job_dir, train_args.clean, train_args.model_id) else: logging.error('Unknown algo: {}. Exiting.'.format(algo)) raise ValueError # Training if 't' in train_args.actions: logging.info('training {}...'.format(algo)) logging.info('hyper-parameters: {}'.format(hyperparameters)) model.train() timing('training', latest_time) # Predict if 'p' in train_args.actions: model.predict() timing('predict', latest_time) # Evaluate results if 'e' in train_args.actions: model.evaluate() # Deploy if 'd' in train_args.actions: model.deploy() # Remove if 'r' in train_args.actions: model.delete_endpoint() # Grid search if 'g' in train_args.actions: logging.info('grid search {}...'.format(algo)) model.grid_search() timing('grid search', latest_time) # Final timing timing('total time', start_time) return
super_strings = [node.value] while overlap_graph: while node.has_out: node_id, overlap = node.get_next_node_id_and_overlap() node = overlap_graph[node_id] overlap_graph.remove_node(node) super_strings[-1] += node.value[overlap:] if overlap_graph: node = overlap_graph.get_node_with_smallest_number_of_entries() overlap_graph.remove_node(node) super_strings.append(node.value) super_strings = [super_string for super_string in super_strings if len(super_string) > minimal_super_string_length] return super_strings def consensus(contigs): raise NotImplementedError if __name__ == '__main__': # DEBUG logging.basicConfig(level=logging.DEBUG) from io_utils import parse_input, dump_output from algorithms.error_corrections import CorrectedReads data = parse_input('./sample_data/reads_1_percent_bad.fasta') with timing(): super_string = olc(CorrectedReads(data)) print(len(max(super_string, key=len))) dump_output('./sample_data/con.fasta', super_string)
def hk_read(self, hk): """ Purpose: to read hk files and maybe more? idk input : hk - hk data file outputs: mega_hk - idk calls : None """ mega_hk = [] name = [] data = [] time = [] file = gzip.open(hk) try: for line in file: fields = line.strip().split(",") t_type = str(fields[0]) time_stamp = float(fields[1]) name1 = str(fields[2]) name2 = str(fields[3]) names = (name1 + "_" + name2).replace('"', '') name.append(names.replace(' ', '_')) # ============================================================================================ if t_type == 't' and names != 'HKMBv1b0_SYNC_number' and names != 'HKMBv2b0_SYNC_number': if self.offset.value != 0.0: sync_time = ut.utc_to_sync( time_stamp, self.offset) # check to see if offset has been set time.append(float(sync_time)) data.append(float(fields[4])) else: time.append( float(time_stamp) ) # this won't go into file anyway, so doesn't matter what it does data.append(float(fields[4])) elif t_type == 't' and names == 'HKMBv1b0_SYNC_number': time.append( float(fields[4]) ) # append sync number as timestamp, rather than network time data.append(float(time_stamp)) with self.offset.get_lock(): self.offset.value = ut.timing(float(fields[1]), float(fields[4])) print(colored((fields[1], fields[4]), 'magenta')) print( colored( 'Offset: %s , %s' % (float(fields[1]), self.offset.value), 'red')) sys.stdout.flush() self.time_tuple[0] = time[-1] self.time_tuple[1] = data[-1] elif t_type == 't' and names == 'HKMBv2b0_SYNC_number': time.append(float(fields[4])) data.append(float(time_stamp)) else: time.append(float(time_stamp)) data.append(float(fields[4])) # ============================================================================================== except IOError: print(colored('HK FILE CORRUPT! %s' % (file), 'red')) self.bad_counter += 1 # makng dict entry for name as integer ==================================== """ Creating a Dictionary of HK Sensors :return: writes dictionary to file (.txt) """ if self.n == 0: if os.path.exists( self.dir2 + '/hk_dict.txt'): # if we already have a saved dictionary : f = open(self.dir2 + '/hk_dict.txt', 'r') dict_data = f.read() f.close() self.name_dict = eval(dict_data) for i in range(len(name)): if name[i] not in self.name_dict.values(): self.name_dict.update( {len(self.name_dict.keys()) + 1.0: name[i]}) else: master_names = [] for i in range(len(name)): if name[i] not in master_names: master_names.append(name[i]) name_num = np.arange(0.0, len(master_names), 1.0) self.name_dict = dict(zip(name_num, master_names)) else: for i in range(len(name)): if name[i] not in self.name_dict.values(): self.name_dict.update( {len(self.name_dict.keys()) + 1.0: name[i]}) # ========================================================================= f = open(self.dir2 + '/hk_dict.txt', 'w') f.write(str(self.name_dict)) f.close() #============================================== ''' Routine for Sorting Data by Time Index :return: mega_hk ''' sort_name = [x for _, x in sorted(zip(time, name))] sort_data = [x for _, x in sorted(zip(time, data))] sort_time = sorted(time) #============================================== # loop through and append to final array until new time index is found l = 0 for i in range(len(sort_time) - 1): # change all hk sensor names to integers for storage # num = int(self.name_dict.keys()[self.name_dict.values().index(sort_name[i])]) for k, v in self.name_dict.iteritems(): if v == sort_name[i]: num = int(k) val = v sort_name[i] = float(num) # only incremment index for a new timestamp,not for file num or for t ======= if l == 0: # if start of a new time (or new file) new_time = sort_time[i] time2 = np.zeros(500) names2 = np.zeros(500) data2 = np.zeros(500) time2[num] = sort_time[i] names2[num] = float(sort_name[i]) data2[num] = sort_data[i] l += 1 else: if new_time == sort_time[i]: time2[num] = sort_time[i] names2[num] = float(sort_name[i]) data2[num] = sort_data[i] else: new_time = sort_time[i] l = 0 # reset timer for new timestamp # ================================================================== if len( self.name_dict.keys() ) <= 500: # make sure num of sensors isn't over array limit hk_data = np.array( (time2, names2, data2 )) # make monolithic array, only of one timestamp mega_hk.append(hk_data) else: print(len(self.name_dict.keys())) print( colored( "Number of reported sensors over size limit!", 'red')) time2 = np.zeros(500) names2 = np.zeros(500) data2 = np.zeros(500) time2[num] = sort_time[i] names2[num] = float(sort_name[i]) data2[num] = sort_data[i] # ================================================================== # send data to append_hk return mega_hk
return transform def expand(data): # transforms = [transform_f(A) for A in generate_distortions()] transforms = list(generate_distortions()) total = len(data) for i, (x, y) in enumerate(data): for f in transforms: yield sp_ndi.affine_transform(x, f, prefilter=False), y # yield sp_ndi.geometric_transform(x, f, prefilter=False), y print(f"{i}/{total} done") if __name__ == '__main__': train, test, validate = load_mnist_simple(shape=(28, 28)) # expand(train) # dump_mnist('mnist_test_dump.pkl.gz', train) chunk = 1000 for i in range(0, 50): a = i * chunk b = (i + 1) * chunk print(a, b) data = list(expand(train[a:b])) # with timing("npz"): # np.savez_compressed(f'{DATA_PATH}/mnist_expaned_k0{i}.npz', data) with timing("pickle gz"): dump_data(f'{DATA_PATH}/mnist_expaned_k0{i}.pkl.gz', data) # np.save('mnist_test_dump.npy', train)
def move(): data = {} time_remaining = [150] # leave 50ms for network position = None path = None next_move = list() thread_pool = list() potential_snake_positions = list() direction = None with timing("bottle", time_remaining): data = bottle.request.json try: with timing("data parsing", time_remaining): board = Board(**data) snake = board.get_snake(data['you']) direction = general_direction(board, snake.head, snake.attributes['health_points']) move = direction # fallback for enemy_snake in board.snakes: if enemy_snake.attributes['id'] != snake.attributes[ 'id']: # and enemy_snake.attributes['health_points'] >= snake.attributes['health_points']: potential_snake_positions.extend([ position for position in enemy_snake.potential_positions() if board.inside(position) ]) number_of_squares = list() # find number of empty squares in every direction. for cell in neighbours(snake.head): if board.inside(cell): count = len(flood_fill(board, cell, False)) number_of_squares.append((cell, count)) if count <= 10: potential_snake_positions.append(cell) if number_of_squares[0][1] <= 10 and number_of_squares[1][ 1] <= 10 and number_of_squares[2][ 1] <= 10 and number_of_squares[3][1] <= 10: largest = reduce( lambda carry, direction: carry if carry[1] > direction[1] else direction, number_of_squares, number_of_squares[0]) potential_snake_positions.remove(largest[0]) print potential_snake_positions with timing("need_food", time_remaining): food = need_food(board, snake.head, snake.attributes['health_points']) if food: #if snake.attributes['health_points'] < 30: #potential_snake_positions = [] with timing("find_food", time_remaining): food_positions = find_food(snake.head, snake.attributes['health_points'], board, food) positions = [position[0] for position in food_positions] # positions = list(set([ position[0] for position in food_positions ]) - set(potential_snake_positions)) print positions print[board.get_cell(position) for position in positions] for position in positions: t = Thread( target=bfs(snake.head, position, board, potential_snake_positions, next_move)) t = Thread( target=bfs(snake.head, position, board, [], next_move)) thread_pool.append(t) for thread in thread_pool: thread.start() thread.join() next_move = filter(lambda path: not len(path) == 0, next_move) path = min(next_move, key=len) move = get_direction(snake.head, path[0]) else: #with timing("flood_fill", time_remaining): # flood_fill(board.vacant, snake.head, True) with timing("find_safest_position", time_remaining): positions = find_safest_position(snake.head, direction, board) positions = [position[0] for position in positions] # positions = list(set([position[0] for position in positions]) - set(potential_snake_positions)) print positions print[board.get_cell(position) for position in positions] for position in positions: t = Thread( target=bfs(snake.head, position, board, potential_snake_positions, next_move)) t = Thread( target=bfs(snake.head, position, board, [], next_move)) thread_pool.append(t) for thread in thread_pool: thread.start() thread.join() path = max(next_move, key=len) move = get_direction(snake.head, path[0]) except Exception as e: print "WTF", e.message print next_move print path print move if len(next_move) == 0: print "CHANGING MOVE" with timing("floodfill", time_remaining): floods = { "up": len(flood_fill(board, (snake.head[0], snake.head[1] - 1))), "down": len(flood_fill(board, (snake.head[0], snake.head[1] + 1))), "right": len(flood_fill(board, (snake.head[0] + 1, snake.head[1]))), "left": len(flood_fill(board, (snake.head[0] - 1, snake.head[1]))) } move = max(floods.iterkeys(), key=(lambda key: floods[key])) # don't be stupid m_move = add(snake.head, DIR_VECTORS[DIR_NAMES.index(move)]) if board.inside(m_move) and board.get_cell(m_move) == 1: print "CHANGING MOVE" for direction in DIR_NAMES: m_move = add(snake.head, DIR_VECTORS[DIR_NAMES.index(direction)]) if board.inside(m_move) and board.get_cell(m_move) != 1: move = direction print "moving", move return {'move': move, 'taunt': random.choice(TAUNTS)}