예제 #1
0
def main2():
    dnn = DNN(input=28 * 28,
              layers=[DropoutLayer(160, LQ),
                      Layer(10, LCE)],
              eta=0.05,
              lmbda=1)  # 98%
    dnn.initialize_rand()
    train, test, vadilation = load_mnist_simple()

    f_names = [f'mnist_expaned_k0{i}.pkl.gz' for i in range(50)]
    shuffle(f_names)
    for f_name in f_names:
        print(f_name)
        with timing("load"):
            raw_data = load_data(f_name)
        with timing("shuffle"):
            shuffle(raw_data)
        with timing("reshape"):
            data = [(x.reshape((784, 1)), y)
                    for x, y in islice(raw_data, 100000)]
            del raw_data
        with timing("learn"):
            dnn.learn(data)
        del data
        print('TEST:', dnn.test(test))
예제 #2
0
    def main(self) -> int:
        input_lines = self.parse_input()
        with timing():
            print(f"Part 1 (original): {self.compute_1(input_lines)}")

        for name, fn in self._alternate_solutions_1:
            with timing():
                print(f"Part 1 ({name}): {fn(input_lines)}")

        with timing():
            print(f"Part 2 (original): {self.compute_2(input_lines)}")

        for name, fn in self._alternate_solutions_2:
            with timing():
                print(f"Part 2 ({name}): {fn(input_lines)}")

        return 0
예제 #3
0
    def __init__(self, config_file, model_file, weights_file):
        with open(config_file) as f:
            self.config = json.load(f)

        self.maxlen = self.config.get('MAXLEN', 32)
        self.invert = self.config.get('INVERT', True)
        self.ngram = self.config.get('NGRAM', 5)
        self.pad_words_input = self.config.get('PAD_WORDS_INPUT', True)

        self.codec = CharacterCodec(utils.ALPHABET, self.maxlen)
        if self.config.get('BASE_CODEC_INPUT', False):
            self.input_codec = CharacterCodec(utils.BASE_ALPHABET, self.maxlen)
        else:
            self.input_codec = self.codec

        with utils.timing('Create model'):
            with open(model_file) as f:
                self.model = model_from_json(f.read())
        with utils.timing('Compile model'):
            self.model.compile(loss='categorical_crossentropy',
                               optimizer='adam',
                               metrics=['accuracy'])
        with utils.timing('Load weights'):
            self.model.load_weights(weights_file)
예제 #4
0
    def __init__(self, config_file, model_file, weights_file):
        with open(config_file) as f:
            self.config = json.load(f)

        self.maxlen = self.config.get('MAXLEN', 32)
        self.invert = self.config.get('INVERT', True)
        self.ngram = self.config.get('NGRAM', 5)
        self.pad_words_input = self.config.get('PAD_WORDS_INPUT', True)

        self.codec = CharacterCodec(utils.ALPHABET, self.maxlen)
        if self.config.get('BASE_CODEC_INPUT', False):
            self.input_codec = CharacterCodec(utils.BASE_ALPHABET, self.maxlen)
        else:
            self.input_codec = self.codec

        with utils.timing('Create model'):
            with open(model_file) as f:
                self.model = model_from_json(f.read())
        with utils.timing('Compile model'):
            self.model.compile(loss='categorical_crossentropy',
                               optimizer='adam',
                               metrics=['accuracy'])
        with utils.timing('Load weights'):
            self.model.load_weights(weights_file)
예제 #5
0
def main():
    train, test, vadilation = load_mnist_simple()
    # x, y = train[0]
    # print("x: ", x.shape)
    # print("y: ", y)

    with timing(f""):
        # dnn = DNN(input=28 * 28, layers=[Layer(30, LQ), Layer(10, LCE)], eta=0.05)  # 96%
        # dnn = DNN(input=28 * 28, layers=[Layer(30, LQ), Layer(10, SM)], eta=0.001)  # 68%
        # dnn = DNN(input=28 * 28, layers=[Layer(100, LQ), Layer(10, LCE)], eta=0.05, lmbda=5)  # 98%
        # dnn = DNN(input=28 * 28, layers=[DropoutLayer(100, LQ), Layer(10, LCE)], eta=0.05)  # 97.5%
        dnn = DNN(input=28 * 28, layers=[DropoutLayer(160, LQ), Layer(10, LCE)], eta=0.05, lmbda=3)
        dnn.initialize_rand()
        dnn.learn(train, epochs=30, test=vadilation, batch_size=29)

    print('test:', dnn.test(test))
    print(dnn.stats())
예제 #6
0
def prepare_datasets(dataset_name: str,
                     working_folder: str,
                     s3_bucket: str = None,
                     s3_folder: str = None,
                     target: str = None,
                     headers: bool = False,
                     verbose: bool = False,
                     clean: bool = False,
                     preprocessor_name: str = None,
                     scale: bool = False,
                     categ_encoding: bool = False,
                     categ_features: list = None):
    # timing
    start_time = time.time()
    latest_time = start_time
    preproc_class = get_preprocessor_class(preprocessor_name)
    preprocessor = preproc_class(target, categ_features)

    # loading
    logging.info('Loading raw dataset: {}...'.format(dataset_name))
    frame = load_raw_dataset(dataset_name, target, preproc_class,
                             working_folder, s3_bucket, s3_folder, headers,
                             verbose, clean)
    preprocessor.initialize(frame)
    latest_time = timing('loading data', latest_time)

    # pre-processing
    logging.info('Pre-processing...')
    frame = preprocessor.pre_process(frame)
    latest_time = timing('pre-processing', latest_time)

    # splitting
    logging.info('Splitting dataset...')
    frames_dict = preproc_class.split_datasets(frame,
                                               [("train", TRAIN_PROPORTION),
                                                ("valid", VALID_PROPORTION),
                                                ("test", TEST_PROPORTION)],
                                               target)
    latest_time = timing('splitting', latest_time)

    # normalizing
    if scale:
        logging.info('Normalizing datasets...')
        frames_dict = preprocessor.normalize(frames_dict)
        latest_time = timing('normalizing', latest_time)

    # one-hot-encoding
    if categ_encoding:
        logging.info('Encoding categorical features...')
        frames_dict = preprocessor.categorical_encoding(frames_dict)
        latest_time = timing('encoding', latest_time)

    preprocessor.log_dataset_features(frames_dict)

    # saving
    logging.info('Saving datasets...')
    save_datasets(frames_dict, working_folder, dataset_name + '_preprocessed',
                  s3_bucket, s3_folder, verbose)
    latest_time = timing('saving', latest_time)

    timing('total time', start_time)
예제 #7
0
def train_level_model(train_args) -> None:

    # timing
    start_time = time.time()
    latest_time = start_time

    # destination folder and files
    timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
    training_job_common_dir = os.path.join(train_args.working_folder,
                                           train_args.dataset_name,
                                           train_args.config_id)
    training_job_dir = os.path.join(training_job_common_dir, timestamp)

    if not os.path.exists(training_job_dir):
        logging.info('creating training job dir: {}'.format(training_job_dir))
        os.makedirs(training_job_dir)

    # set logging
    fh = logging.FileHandler(os.path.join(training_job_dir, 'logging.log'))
    fh.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s:%(levelname)s:%(filename)s:%(funcName)s:%(lineno)d: %(message)s'
    )
    fh.setFormatter(formatter)
    logging.getLogger().addHandler(fh)

    # loading model parameters and performing feature selection
    logging.info('Loading training parameters...')
    logging.info('Training config file: {}'.format(train_args.config_filename))
    logging.info('Training model id: {}'.format(train_args.config_id))

    config = load_config('model_{}.yaml'.format(train_args.config_filename))
    features, target, algo, hyperparameters = get_training_config(
        config, train_args.config_id)
    infra_s3 = load_infra_s3(args.infra_s3)
    infra_sm = load_infra_sm(args.infra_sm)
    logging.info('Infra S3: {}'.format(infra_s3))
    logging.info('Infra SageMaker: {}'.format(infra_sm))

    logging.info(
        'Selected features: {}'.format(features if features else 'all'))
    logging.info('Target: {}'.format(target))
    if train_args.model_id:
        logging.info('AWS model ID: {}'.format(train_args.model_id))

    # initializing model object
    logging.info('Initializing model object...')

    if 'sklearn' in algo.lower():
        model = eval(algo)(train_args.dataset_name, hyperparameters, infra_s3,
                           features, target, train_args.data_folder,
                           training_job_dir, train_args.clean,
                           train_args.model_id)
    elif 'aws' in algo.lower():
        if infra_s3 is None or infra_sm is None:
            raise ValueError(
                'Parameters --infra-s3 and --infra-sm are required for SageMaker algorithms'
            )
        model = eval(algo)(train_args.dataset_name, hyperparameters, infra_s3,
                           infra_sm, features, target, train_args.data_folder,
                           training_job_common_dir, training_job_dir,
                           train_args.model_id, train_args.clean)
    elif 'h2o' in algo.lower():
        model = eval(algo)(train_args.dataset_name, hyperparameters, infra_s3,
                           features, target, train_args.h2o,
                           train_args.data_folder, training_job_dir,
                           train_args.clean, train_args.model_id)
    else:
        logging.error('Unknown algo: {}. Exiting.'.format(algo))
        raise ValueError

    # Training
    if 't' in train_args.actions:
        logging.info('training {}...'.format(algo))
        logging.info('hyper-parameters: {}'.format(hyperparameters))
        model.train()
        timing('training', latest_time)

    # Predict
    if 'p' in train_args.actions:
        model.predict()
        timing('predict', latest_time)

    # Evaluate results
    if 'e' in train_args.actions:
        model.evaluate()

    # Deploy
    if 'd' in train_args.actions:
        model.deploy()

    # Remove
    if 'r' in train_args.actions:
        model.delete_endpoint()

    # Grid search
    if 'g' in train_args.actions:
        logging.info('grid search {}...'.format(algo))
        model.grid_search()
        timing('grid search', latest_time)

    # Final timing
    timing('total time', start_time)

    return
예제 #8
0
파일: olc.py 프로젝트: Behoston/tsg2_zal_2
    super_strings = [node.value]
    while overlap_graph:
        while node.has_out:
            node_id, overlap = node.get_next_node_id_and_overlap()
            node = overlap_graph[node_id]
            overlap_graph.remove_node(node)
            super_strings[-1] += node.value[overlap:]
        if overlap_graph:
            node = overlap_graph.get_node_with_smallest_number_of_entries()
            overlap_graph.remove_node(node)
            super_strings.append(node.value)
    super_strings = [super_string for super_string in super_strings if len(super_string) > minimal_super_string_length]
    return super_strings


def consensus(contigs):
    raise NotImplementedError


if __name__ == '__main__':
    # DEBUG
    logging.basicConfig(level=logging.DEBUG)
    from io_utils import parse_input, dump_output
    from algorithms.error_corrections import CorrectedReads

    data = parse_input('./sample_data/reads_1_percent_bad.fasta')
    with timing():
        super_string = olc(CorrectedReads(data))
    print(len(max(super_string, key=len)))
    dump_output('./sample_data/con.fasta', super_string)
예제 #9
0
    def hk_read(self, hk):
        """
        Purpose: to read hk files and maybe more? idk
        input : hk - hk data file
        outputs: mega_hk - idk
        calls : None
        """
        mega_hk = []
        name = []
        data = []
        time = []
        file = gzip.open(hk)
        try:
            for line in file:
                fields = line.strip().split(",")
                t_type = str(fields[0])
                time_stamp = float(fields[1])
                name1 = str(fields[2])
                name2 = str(fields[3])
                names = (name1 + "_" + name2).replace('"', '')
                name.append(names.replace(' ', '_'))
                # ============================================================================================
                if t_type == 't' and names != 'HKMBv1b0_SYNC_number' and names != 'HKMBv2b0_SYNC_number':

                    if self.offset.value != 0.0:
                        sync_time = ut.utc_to_sync(
                            time_stamp,
                            self.offset)  # check to see if offset has been set
                        time.append(float(sync_time))
                        data.append(float(fields[4]))
                    else:
                        time.append(
                            float(time_stamp)
                        )  # this won't go into file anyway, so doesn't matter what it does
                        data.append(float(fields[4]))

                elif t_type == 't' and names == 'HKMBv1b0_SYNC_number':
                    time.append(
                        float(fields[4])
                    )  # append sync number as timestamp, rather than network time
                    data.append(float(time_stamp))
                    with self.offset.get_lock():
                        self.offset.value = ut.timing(float(fields[1]),
                                                      float(fields[4]))
                        print(colored((fields[1], fields[4]), 'magenta'))
                        print(
                            colored(
                                'Offset: %s , %s' %
                                (float(fields[1]), self.offset.value), 'red'))
                        sys.stdout.flush()
                    self.time_tuple[0] = time[-1]
                    self.time_tuple[1] = data[-1]

                elif t_type == 't' and names == 'HKMBv2b0_SYNC_number':
                    time.append(float(fields[4]))
                    data.append(float(time_stamp))

                else:
                    time.append(float(time_stamp))
                    data.append(float(fields[4]))
                # ==============================================================================================

        except IOError:
            print(colored('HK FILE CORRUPT! %s' % (file), 'red'))
            self.bad_counter += 1

        # makng dict entry for name as integer ====================================
        """
            Creating a Dictionary of HK Sensors
            :return: writes dictionary to file (.txt)
        """
        if self.n == 0:
            if os.path.exists(
                    self.dir2 +
                    '/hk_dict.txt'):  # if we already have a saved dictionary :
                f = open(self.dir2 + '/hk_dict.txt', 'r')
                dict_data = f.read()
                f.close()
                self.name_dict = eval(dict_data)
                for i in range(len(name)):
                    if name[i] not in self.name_dict.values():
                        self.name_dict.update(
                            {len(self.name_dict.keys()) + 1.0: name[i]})
            else:
                master_names = []
                for i in range(len(name)):
                    if name[i] not in master_names:
                        master_names.append(name[i])
                name_num = np.arange(0.0, len(master_names), 1.0)
                self.name_dict = dict(zip(name_num, master_names))

        else:
            for i in range(len(name)):
                if name[i] not in self.name_dict.values():
                    self.name_dict.update(
                        {len(self.name_dict.keys()) + 1.0: name[i]})
        # =========================================================================
        f = open(self.dir2 + '/hk_dict.txt', 'w')
        f.write(str(self.name_dict))
        f.close()

        #==============================================
        '''
            Routine for Sorting Data by Time Index
            :return: mega_hk
        '''
        sort_name = [x for _, x in sorted(zip(time, name))]
        sort_data = [x for _, x in sorted(zip(time, data))]
        sort_time = sorted(time)
        #==============================================
        # loop through and append to final array until new time index is found
        l = 0
        for i in range(len(sort_time) - 1):
            # change all hk sensor names to integers for storage
            # num = int(self.name_dict.keys()[self.name_dict.values().index(sort_name[i])])
            for k, v in self.name_dict.iteritems():
                if v == sort_name[i]:
                    num = int(k)
                    val = v
            sort_name[i] = float(num)

            # only incremment index for a new timestamp,not for file num or for t =======
            if l == 0:  # if start of a new time (or new file)
                new_time = sort_time[i]
                time2 = np.zeros(500)
                names2 = np.zeros(500)
                data2 = np.zeros(500)
                time2[num] = sort_time[i]
                names2[num] = float(sort_name[i])
                data2[num] = sort_data[i]
                l += 1

            else:
                if new_time == sort_time[i]:
                    time2[num] = sort_time[i]
                    names2[num] = float(sort_name[i])
                    data2[num] = sort_data[i]

                else:
                    new_time = sort_time[i]
                    l = 0  # reset timer for new timestamp
                    # ==================================================================
                    if len(
                            self.name_dict.keys()
                    ) <= 500:  # make sure num of sensors isn't over array limit
                        hk_data = np.array(
                            (time2, names2, data2
                             ))  # make monolithic array, only of one timestamp
                        mega_hk.append(hk_data)

                    else:
                        print(len(self.name_dict.keys()))
                        print(
                            colored(
                                "Number of reported sensors over size limit!",
                                'red'))

                    time2 = np.zeros(500)
                    names2 = np.zeros(500)
                    data2 = np.zeros(500)
                    time2[num] = sort_time[i]
                    names2[num] = float(sort_name[i])
                    data2[num] = sort_data[i]
                    # ==================================================================
        # send data to append_hk
        return mega_hk
예제 #10
0
    return transform


def expand(data):
    # transforms = [transform_f(A) for A in generate_distortions()]
    transforms = list(generate_distortions())
    total = len(data)
    for i, (x, y) in enumerate(data):
        for f in transforms:
            yield sp_ndi.affine_transform(x, f, prefilter=False), y
            # yield sp_ndi.geometric_transform(x, f, prefilter=False), y
        print(f"{i}/{total} done")


if __name__ == '__main__':
    train, test, validate = load_mnist_simple(shape=(28, 28))
    # expand(train)
    # dump_mnist('mnist_test_dump.pkl.gz', train)
    chunk = 1000
    for i in range(0, 50):
        a = i * chunk
        b = (i + 1) * chunk
        print(a, b)
        data = list(expand(train[a:b]))
        # with timing("npz"):
        #     np.savez_compressed(f'{DATA_PATH}/mnist_expaned_k0{i}.npz', data)
        with timing("pickle gz"):
            dump_data(f'{DATA_PATH}/mnist_expaned_k0{i}.pkl.gz', data)
    # np.save('mnist_test_dump.npy', train)
예제 #11
0
def move():
    data = {}
    time_remaining = [150]  # leave 50ms for network
    position = None
    path = None
    next_move = list()
    thread_pool = list()
    potential_snake_positions = list()
    direction = None

    with timing("bottle", time_remaining):
        data = bottle.request.json

    try:
        with timing("data parsing", time_remaining):
            board = Board(**data)
            snake = board.get_snake(data['you'])
            direction = general_direction(board, snake.head,
                                          snake.attributes['health_points'])
            move = direction  # fallback

        for enemy_snake in board.snakes:
            if enemy_snake.attributes['id'] != snake.attributes[
                    'id']:  # and enemy_snake.attributes['health_points'] >= snake.attributes['health_points']:
                potential_snake_positions.extend([
                    position for position in enemy_snake.potential_positions()
                    if board.inside(position)
                ])

        number_of_squares = list()
        # find number of empty squares in every direction.
        for cell in neighbours(snake.head):
            if board.inside(cell):
                count = len(flood_fill(board, cell, False))
                number_of_squares.append((cell, count))
                if count <= 10: potential_snake_positions.append(cell)

        if number_of_squares[0][1] <= 10 and number_of_squares[1][
                1] <= 10 and number_of_squares[2][
                    1] <= 10 and number_of_squares[3][1] <= 10:
            largest = reduce(
                lambda carry, direction: carry
                if carry[1] > direction[1] else direction, number_of_squares,
                number_of_squares[0])
            potential_snake_positions.remove(largest[0])

        print potential_snake_positions

        with timing("need_food", time_remaining):
            food = need_food(board, snake.head,
                             snake.attributes['health_points'])

        if food:
            #if snake.attributes['health_points'] < 30:
            #potential_snake_positions = []

            with timing("find_food", time_remaining):
                food_positions = find_food(snake.head,
                                           snake.attributes['health_points'],
                                           board, food)
                positions = [position[0] for position in food_positions]
                # positions = list(set([ position[0] for position in food_positions ]) - set(potential_snake_positions))
                print positions
                print[board.get_cell(position) for position in positions]

                for position in positions:
                    t = Thread(
                        target=bfs(snake.head, position, board,
                                   potential_snake_positions, next_move))
                    t = Thread(
                        target=bfs(snake.head, position, board, [], next_move))

                    thread_pool.append(t)

                for thread in thread_pool:
                    thread.start()
                    thread.join()

                next_move = filter(lambda path: not len(path) == 0, next_move)

                path = min(next_move, key=len)
                move = get_direction(snake.head, path[0])
        else:
            #with timing("flood_fill", time_remaining):
            # flood_fill(board.vacant, snake.head, True)
            with timing("find_safest_position", time_remaining):
                positions = find_safest_position(snake.head, direction, board)
                positions = [position[0] for position in positions]
                # positions = list(set([position[0] for position in positions]) - set(potential_snake_positions))
                print positions
                print[board.get_cell(position) for position in positions]

                for position in positions:
                    t = Thread(
                        target=bfs(snake.head, position, board,
                                   potential_snake_positions, next_move))
                    t = Thread(
                        target=bfs(snake.head, position, board, [], next_move))

                    thread_pool.append(t)

                for thread in thread_pool:
                    thread.start()
                    thread.join()

                path = max(next_move, key=len)
                move = get_direction(snake.head, path[0])
    except Exception as e:
        print "WTF", e.message

    print next_move
    print path
    print move

    if len(next_move) == 0:
        print "CHANGING MOVE"
        with timing("floodfill", time_remaining):
            floods = {
                "up": len(flood_fill(board,
                                     (snake.head[0], snake.head[1] - 1))),
                "down":
                len(flood_fill(board, (snake.head[0], snake.head[1] + 1))),
                "right":
                len(flood_fill(board, (snake.head[0] + 1, snake.head[1]))),
                "left":
                len(flood_fill(board, (snake.head[0] - 1, snake.head[1])))
            }

            move = max(floods.iterkeys(), key=(lambda key: floods[key]))

    # don't be stupid
    m_move = add(snake.head, DIR_VECTORS[DIR_NAMES.index(move)])
    if board.inside(m_move) and board.get_cell(m_move) == 1:
        print "CHANGING MOVE"
        for direction in DIR_NAMES:
            m_move = add(snake.head, DIR_VECTORS[DIR_NAMES.index(direction)])
            if board.inside(m_move) and board.get_cell(m_move) != 1:
                move = direction

    print "moving", move
    return {'move': move, 'taunt': random.choice(TAUNTS)}