示例#1
0
 def get_values(observation):
     return flatten(concat(
         observation['my_car'].values(),
         mapcat(methodcaller('values'), sorted(observation['other_cars'], key=itemgetter('position_length'))),  # 距離が近い順にソートします。前後も分けたほうが良い?
         mapcat(methodcaller('values'), sorted(observation['obstacles' ], key=itemgetter('position_length'))),  # noqa: E202
         mapcat(methodcaller('values'), sorted(observation['stars'     ], key=itemgetter('position_length')))   # noqa: E202
     ))
示例#2
0
def prepare_file_path(kwargs):
    """Determine file path from the first output name.

    Used in creating .dvc files.
    """
    from dvc.dvcfile import DVC_FILE, DVC_FILE_SUFFIX

    out = first(
        concat(
            kwargs.get("outs", []),
            kwargs.get("outs_no_cache", []),
            kwargs.get("metrics", []),
            kwargs.get("metrics_no_cache", []),
            kwargs.get("plots", []),
            kwargs.get("plots_no_cache", []),
            kwargs.get("outs_persist", []),
            kwargs.get("outs_persist_no_cache", []),
            kwargs.get("checkpoints", []),
            without([kwargs.get("live", None)], None),
        )
    )

    return (
        os.path.basename(os.path.normpath(out)) + DVC_FILE_SUFFIX
        if out
        else DVC_FILE
    )
示例#3
0
def group_equal(iter, equal=operator.eq):
    groups = []
    for x in iter:
        if not contains(list(concat(*groups)), x, equal=equal):
            groups += [[x]]
        else:
            i = [i for i, g in enumerate(groups) if contains(g, x, equal=equal)][0]
            groups[i] += [x]
    return groups
示例#4
0
 def _random_position(self, sigma):
     return first(
         filter(
             lambda p: all(
                 map(lambda b: (b.position - p).length >= 50,
                     concat(self.cars, self.obstacles, self.stars))),
             filter(
                 lambda p: 100 < p.length < 950,
                 repeatedly(lambda: pymunk.Vec2d(
                     self.game_random.gauss(0, sigma), 0).rotated(
                         self.game_random.uniform(0, pi * 2))))))
def main():
    (x_train, y_train), (x_validation, y_validation) = load_data()

    model = Model(*juxt(identity, computational_graph(y_train.shape[1]))(Input(
        shape=x_train.shape[1:])))
    model.compile(loss='categorical_crossentropy',
                  optimizer=SGD(momentum=0.9),
                  metrics=['accuracy'
                           ])  # 論文にはnesterov=Trueだと書いてあったけど、コードだとFalseだった……。

    model.summary()
    # plot_model(model, to_file='./results/model.png')

    train_data = ImageDataGenerator(featurewise_center=True,
                                    featurewise_std_normalization=True,
                                    width_shift_range=0.125,
                                    height_shift_range=0.125,
                                    horizontal_flip=True)
    validation_data = ImageDataGenerator(featurewise_center=True,
                                         featurewise_std_normalization=True)

    for data in (train_data, validation_data):
        data.fit(x_train)  # 実用を考えると、x_validationでのfeaturewiseのfitは無理だと思う……。

    batch_size = 128
    epoch_size = 200

    results = model.fit_generator(
        train_data.flow(x_train, y_train, batch_size=batch_size),
        steps_per_epoch=x_train.shape[0] // batch_size,
        epochs=epoch_size,
        callbacks=[
            LearningRateScheduler(
                partial(
                    getitem,
                    tuple(
                        take(
                            epoch_size,
                            concat(repeat(0.1, 60), repeat(0.02, 60),
                                   repeat(0.004, 40), repeat(0.0008))))))
        ],
        validation_data=validation_data.flow(x_validation,
                                             y_validation,
                                             batch_size=batch_size),
        validation_steps=x_validation.shape[0] // batch_size)

    with open('./results/history.pickle', 'wb') as f:
        pickle.dump(results.history, f)

    save_model(model, './results/model.h5')

    del model
示例#6
0
def get_interesting_repos(g: Github, session: Any) -> List[Repository]:
    repos: List[Repository] = []

    grepos = g.search_repositories(query='stars:>250 forks:>50',
                                   sort='stars',
                                   order='desc')
    records = zip(grepos, fy.repeat('most_stars'))

    grepos = g.search_repositories(query='forks:>5 topic:kaggle-competition',
                                   sort='stars',
                                   order='desc')
    records = fy.concat(records, zip(grepos, fy.repeat('kaggle')))

    grepos = g.search_repositories(query='forks:>5 topic:tensorflow-model',
                                   sort='stars',
                                   order='desc')
    records = fy.concat(records, zip(grepos, fy.repeat('tensorflow-model')))

    grepos = g.search_repositories(
        query='cookiecutterdatascience in:readme forks:>5 stars:>0 fork:true',
        sort='stars',
        order='desc')
    records = fy.concat(records,
                        zip(grepos, fy.repeat('cookiecutterdatascience')))

    for grepo, search_method in tqdm(records):
        repo = (session.query(Repository).filter(
            Repository.id == grepo.full_name).one_or_none())
        if repo is None:
            repo = Repository(
                id=grepo.full_name,
                owner=grepo.owner.login,
                name=grepo.name,
                description=grepo.description,
                search_method=search_method,
            )
            repos.append(repo)

    return repos
示例#7
0
    def _append_car(self, position, angle):
        car = Car(self.space)
        car.set_position_and_angle(position, angle)
        car.crash_energy = 0
        car.score = 0

        for shape in concat(
                car.shapes,
                mapcat(lambda tire: tire.shapes,
                       (car.tire_lf, car.tire_rf, car.tire_lr, car.tire_rr))):
            shape.collision_type = 1

        self.cars.append(car)
示例#8
0
文件: run.py 项目: chaytanyasinha/dvc
def _get_file_path(kwargs):
    from dvc.dvcfile import DVC_FILE_SUFFIX, DVC_FILE

    out = first(
        concat(
            kwargs.get("outs", []),
            kwargs.get("outs_no_cache", []),
            kwargs.get("metrics", []),
            kwargs.get("metrics_no_cache", []),
            kwargs.get("outs_persist", []),
            kwargs.get("outs_persist_no_cache", []),
        ))

    return (os.path.basename(os.path.normpath(out)) +
            DVC_FILE_SUFFIX if out else DVC_FILE)
示例#9
0
    def _create_observation(cls, game):
        def get_values(observation):
            return flatten(concat(
                observation['my_car'].values(),
                mapcat(methodcaller('values'), sorted(observation['other_cars'], key=itemgetter('position_length'))),  # 距離が近い順にソートします。前後も分けたほうが良い?
                mapcat(methodcaller('values'), sorted(observation['obstacles' ], key=itemgetter('position_length'))),  # noqa: E202
                mapcat(methodcaller('values'), sorted(observation['stars'     ], key=itemgetter('position_length')))   # noqa: E202
            ))

        observation = (
            np.array(tuple(get_values(game.create_observation(game.cars[0]))), np.float32) /  # noqa: W504
            np.array(tuple(concat(
                (
                    1000,                   # my_car.position.x
                    1000,                   # my_car.position.y
                    np.pi,                  # my_car.angle
                    np.pi,                  # my_car.velocity_angle
                    MAX_SPEED / FPS,        # my_car.velocity_length
                    np.pi,                  # my_car.steering_angle
                    10,                     # my_car.steering_torque
                    30,                     # my_car.score
                    10 * FPS,               # my_car.crash_energy
                ),
                mapcat(lambda _: (
                    np.pi,                  # other_car.position_angle
                    1000,                   # other_car.position_length
                    np.pi,                  # other_car.angle
                    np.pi,                  # other_car.velocity_angle
                    MAX_SPEED / FPS * 2,    # other_car.velocity_length
                    np.pi,                  # other_car.steering_angle
                    30,                     # other_car.score
                    10 * FPS,               # other_car.crash_energy
                ), range(7)),
                mapcat(lambda _: (
                    np.pi,                  # obstacle.position_angle
                    1000                    # obstacle.position_length
                ), range(OBSTACLE_COUNT)),
                mapcat(lambda _: (
                    np.pi,                  # star.position_angle
                    1000                    # star.position_length
                ), range(STAR_COUNT)),
            )), dtype=np.float32)
        )

        observation[observation < -1] = -1
        observation[observation >  1] =  1  # noqa: E222

        return observation
示例#10
0
    def step(self):
        self.elapse += 1
        self.actions = []

        for car, player in zip(self.cars, concat(self.players, repeat(None))):
            # アクションを取得します。
            acceleration, braking, steering = player.get_action(
                self.create_observation(car)) if player else (0, 0, 0)

            # アクションを正規化します。
            acceleration = self._clip(acceleration, -1, 1)
            braking = self._clip(braking, 0, 1)  # noqa: E221, E241
            steering = self._clip(steering, -1, 1)  # noqa: E221, E241

            # 正規化したアクションを記録します。
            self.actions.append((acceleration, braking, steering))

            # 衝突して故障した車は、修理が終わるまでは行動できません。
            if car.crash_energy > 0:
                car.crash_energy = max(car.crash_energy - 100000, 0)
                continue

            # ゆらぎを出すために、アクションに小さな正規乱数を加えます。スターの次の出現位置が変わると強化学習が難しくなりそうなので、別のRandomインスタンスを使用します。
            acceleration = self._clip(
                acceleration + self.control_random.gauss(0, 0.05), -1, 1)
            braking = self._clip(braking + self.control_random.gauss(0, 0.05),
                                 0, 1)  # noqa: E221, E241
            steering = self._clip(steering +
                                  self.control_random.gauss(0, 0.05), -1,
                                  1)  # noqa: E221

            # アクションを実行します。
            car.accelerate(acceleration * 20000)
            car.brake(braking * 200000)
            car.steer(steering * 20000)

        self.space.step(1 / FPS)

        for star in filter(lambda star: star.is_catched, self.stars):
            self._reset_star_position(star)
            star.is_catched = False

        return self.elapse >= GAME_PERIOD_SEC * FPS  # ゲームはGAME_PERIOD_SECで終了します。
示例#11
0
def _get_file_path(kwargs):
    from dvc.dvcfile import DVC_FILE, DVC_FILE_SUFFIX

    out = first(
        concat(
            kwargs.get("outs", []),
            kwargs.get("outs_no_cache", []),
            kwargs.get("metrics", []),
            kwargs.get("metrics_no_cache", []),
            kwargs.get("plots", []),
            kwargs.get("plots_no_cache", []),
            kwargs.get("outs_persist", []),
            kwargs.get("outs_persist_no_cache", []),
            kwargs.get("checkpoints", []),
            without([kwargs.get("live", None)], None),
        ))

    return (os.path.basename(os.path.normpath(out)) +
            DVC_FILE_SUFFIX if out else DVC_FILE)
示例#12
0
    def __init__(self):
        self._seed = None
        self.name = 'SelfDriving'

        self.action_space = gym.spaces.Box(np.array((-1, -1, -1),
                                                    dtype=np.float32),
                                           np.array((1, 1, 1),
                                                    dtype=np.float32),
                                           dtype=np.float32)

        self.observation_space = gym.spaces.Box(
            np.array(
                tuple(
                    concat(
                        (
                            -1,  # my_car.position.x
                            -1,  # my_car.position.y
                            -1,  # my_car.angle
                            -1,  # my_car.velocity_angle
                            0,  # my_car.velocity_length
                            -1,  # my_car.steering_angle
                            -1,  # my_car.steering_torque
                            0,  # my_car.score
                            0,  # my_car.crash_energy
                        ),
                        mapcat(
                            lambda _: (
                                -1,  # other_car.position_angle
                                0,  # other_car.position_length
                                -1,  # other_car.angle
                                -1,  # other_car.velocity_angle
                                0,  # other_car.velocity_length
                                -1,  # other_car.steering_angle
                                0,  # other_car.score
                                0,  # other_car.crash_energy
                            ),
                            range(7)),
                        mapcat(
                            lambda _: (
                                -1,  # obstacle.position_angle
                                0  # obstacle.position_length
                            ),
                            range(OBSTACLE_COUNT)),
                        mapcat(
                            lambda _: (
                                -1,  # star.position_angle
                                0  # star.position_length
                            ),
                            range(STAR_COUNT)),
                    )),
                dtype=np.float32),
            np.array(
                tuple(
                    concat(
                        (
                            1,  # my_car.position.x
                            1,  # my_car.position.y
                            1,  # my_car.angle
                            1,  # my_car.velocity_angle
                            1,  # my_car.velocity_length
                            1,  # my_car.steering_angle
                            1,  # my_car.steering_torque
                            1,  # my_car.score
                            1,  # my_car.crash_energy
                        ),
                        mapcat(
                            lambda _: (
                                1,  # other_car.position_angle
                                1,  # other_car.position_length
                                1,  # other_car.angle
                                1,  # other_car.velocity_angle
                                1,  # other_car.velocity_length
                                1,  # other_car.steering_angle
                                1,  # other_car.score
                                1,  # other_car.crash_energy
                            ),
                            range(7)),
                        mapcat(
                            lambda _: (
                                1,  # obstacle.position_angle
                                1  # obstacle.position_length
                            ),
                            range(OBSTACLE_COUNT)),
                        mapcat(
                            lambda _: (
                                1,  # star.position_angle
                                1  # star.position_length
                            ),
                            range(STAR_COUNT)),
                    )),
                dtype=np.float32),
            dtype=np.float32)

        self.screen = None

        self.reset()
示例#13
0
    def _process(
        self,
        named_cache,
        remote,
        jobs=None,
        show_checksums=False,
        download=False,
    ):
        logger.debug(
            "Preparing to {} '{}'".format(
                "download data from" if download else "upload data to",
                remote.path_info,
            )
        )

        if download:
            func = partial(
                remote.download,
                dir_mode=self._dir_mode,
                file_mode=self._file_mode,
            )
            status = STATUS_DELETED
            desc = "Downloading"
        else:
            func = remote.upload
            status = STATUS_NEW
            desc = "Uploading"

        if jobs is None:
            jobs = remote.JOBS

        dir_status, file_status, dir_contents = self._status(
            named_cache,
            remote,
            jobs=jobs,
            show_checksums=show_checksums,
            download=download,
        )

        dir_plans = self._get_plans(download, remote, dir_status, status)
        file_plans = self._get_plans(download, remote, file_status, status)

        total = len(dir_plans[0]) + len(file_plans[0])
        if total == 0:
            return 0

        with Tqdm(total=total, unit="file", desc=desc) as pbar:
            func = pbar.wrap_fn(func)
            with ThreadPoolExecutor(max_workers=jobs) as executor:
                if download:
                    fails = sum(executor.map(func, *dir_plans))
                    fails += sum(executor.map(func, *file_plans))
                else:
                    # for uploads, push files first, and any .dir files last

                    file_futures = {}
                    for from_info, to_info, name, checksum in zip(*file_plans):
                        file_futures[checksum] = executor.submit(
                            func, from_info, to_info, name
                        )
                    dir_futures = {}
                    for from_info, to_info, name, dir_checksum in zip(
                        *dir_plans
                    ):
                        wait_futures = {
                            future
                            for file_checksum, future in file_futures.items()
                            if file_checksum in dir_contents[dir_checksum]
                        }
                        dir_futures[dir_checksum] = executor.submit(
                            self._dir_upload,
                            func,
                            wait_futures,
                            from_info,
                            to_info,
                            name,
                        )
                    fails = sum(
                        future.result()
                        for future in concat(
                            file_futures.values(), dir_futures.values()
                        )
                    )

        if fails:
            if download:
                remote.index.clear()
                raise DownloadError(fails)
            raise UploadError(fails)

        if not download:
            # index successfully pushed dirs
            for dir_checksum, future in dir_futures.items():
                if future.result() == 0:
                    file_checksums = dir_contents[dir_checksum]
                    logger.debug(
                        "Indexing pushed dir '{}' with "
                        "'{}' nested files".format(
                            dir_checksum, len(file_checksums)
                        )
                    )
                    remote.index.update([dir_checksum], file_checksums)

        return len(dir_plans[0]) + len(file_plans[0])
示例#14
0
文件: local.py 项目: shizacat/dvc
    def _process(
        self,
        named_cache,
        remote,
        jobs=None,
        show_checksums=False,
        download=False,
    ):
        logger.debug("Preparing to {} '{}'".format(
            "download data from" if download else "upload data to",
            remote.path_info,
        ))

        if download:
            func = partial(
                remote.download,
                dir_mode=self._dir_mode,
                file_mode=self._file_mode,
            )
            status = STATUS_DELETED
        else:
            func = remote.upload
            status = STATUS_NEW

        if jobs is None:
            jobs = remote.JOBS

        dir_status, file_status, dir_paths = self._status(
            named_cache,
            remote,
            jobs=jobs,
            show_checksums=show_checksums,
            download=download,
        )

        dir_plans = self._get_plans(download, remote, dir_status, status)
        file_plans = self._get_plans(download, remote, file_status, status)

        if len(dir_plans[0]) + len(file_plans[0]) == 0:
            return 0

        with ThreadPoolExecutor(max_workers=jobs) as executor:
            if download:
                fails = sum(executor.map(func, *dir_plans))
                fails += sum(executor.map(func, *file_plans))
            else:
                # for uploads, push files first, and any .dir files last

                file_futures = {}
                for from_info, to_info, name in zip(*file_plans):
                    file_futures[to_info] = executor.submit(
                        func, from_info, to_info, name)
                dir_futures = {}
                for from_info, to_info, name in zip(*dir_plans):
                    wait_futures = {
                        future
                        for file_path, future in file_futures.items()
                        if file_path in dir_paths[to_info]
                    }
                    dir_futures[to_info] = executor.submit(
                        self._dir_upload,
                        func,
                        wait_futures,
                        from_info,
                        to_info,
                        name,
                    )
                fails = sum(future.result() for future in concat(
                    file_futures.values(), dir_futures.values()))

        if fails:
            if download:
                raise DownloadError(fails)
            raise UploadError(fails)

        return len(dir_plans[0]) + len(file_plans[0])
示例#15
0
 def compute_output_shape(self, input_shape):
     return tuple(concat(butlast(input_shape), (self.output_channel_size,)))
def run():
    batch_size = 32
    num_classes = 10
    epochs = 200

    with tf.device("/cpu:0"):
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        # Convert class vectors to binary class matrices.
        y_train = to_categorical(y_train, num_classes)
        y_test = to_categorical(y_test, num_classes)

        x_train = x_train.astype('float32', copy=False)
        x_test = x_test.astype('float32', copy=False)
        x_train /= 255
        x_test /= 255

    optimizer = Adam(lr=0.001)

    model = SqueezeNet(classes=num_classes)
    squeezenet_model_file = './sqz_log/model.h5'
    if os.path.exists(squeezenet_model_file):
        model.layers.pop()
        model = Model(name="sqzn_no_softmax",
                      inputs=model.input,
                      outputs=model.layers[-1].output)
        model.load_weights(squeezenet_model_file, by_name=True)
        # model.load_weights(squeezenet_model_file, by_name=True)
    else:
        # train a new SqueezeNet
        model.compile(loss='categorical_crossentropy',
                      optimizer=optimizer,
                      metrics=['accuracy'])

        # train_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True,
        #                                 width_shift_range=0.125, height_shift_range=0.125, horizontal_flip=True)
        # validation_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)
        train_data = ImageDataGenerator()
        validation_data = ImageDataGenerator()
        for data in (train_data, validation_data):
            data.fit(x_train)

        callbacks = [
            LearningRateScheduler(
                partial(
                    getitem,
                    tuple(
                        take(
                            epochs,
                            concat(repeat(0.01, 1), repeat(0.1, 99),
                                   repeat(0.01, 50), repeat(0.001)))))),
            ModelCheckpoint(filepath=squeezenet_model_file),
            TensorBoard(log_dir="./sqz_log", batch_size=batch_size)
        ]
        results = model.fit_generator(
            train_data.flow(x_train, y_train, batch_size=batch_size),
            steps_per_epoch=x_train.shape[0] // batch_size,
            epochs=epochs,
            callbacks=callbacks,
            validation_data=validation_data.flow(x_test,
                                                 y_test,
                                                 batch_size=batch_size),
            validation_steps=x_test.shape[0] // batch_size)

        with open('./sqz_log/history.pickle', 'wb') as f:
            pickle.dump(results.history, f)
        save_model(model, squeezenet_model_file)

    # Build the siamese architecture
    # model_cut = Model(name="sqzn_no_softmax", inputs=model.input, outputs=model.layers[-1].output)
    # model_cut.load_weights(squeezenet_model_file, by_name=True)
    # with tf.device("/cpu:0"):
    #     model_cut.summary()

    input_shape = x_train.shape[1:]

    im_in1 = Input(shape=input_shape)
    im_in2 = Input(shape=input_shape)
    feat_x1 = model(im_in1)
    feat_x2 = model(im_in2)
    lambda_merge = Lambda(euclidean_distance,
                          output_shape=(1, ))([feat_x1, feat_x2])

    siamese = Model(name="siamese",
                    inputs=[im_in1, im_in2],
                    outputs=lambda_merge)
    with tf.device("/cpu:0"):
        siamese.summary()

    optimizer = RMSprop()  # SGD(momentum=0.9)
    siamese.compile(optimizer=optimizer,
                    loss=contrastive_loss,
                    metrics=[accuracy])

    def make_img_pair(identical, from_train):
        """Select the image pairs"""
        label = np.random.randint(0, num_classes)
        if identical:
            if from_train:
                idx = np.nonzero(y_train[:, label] == 1)[0]
            else:
                idx = np.nonzero(y_test[:, label] == 1)[0]

            # pick any two indexes randomly
            id1 = np.random.randint(0, idx.shape[0])
            id2 = np.random.randint(0, idx.shape[0])
            while id1 == id2:
                id2 = np.random.randint(0, idx.shape[0])
        else:
            if from_train:
                idx1 = np.nonzero(y_train[:, label] == 1)[0]
                idx2 = np.nonzero(y_train[:,
                                          (label + 1) % num_classes] == 1)[0]
            else:
                idx1 = np.nonzero(y_test[:, label] == 1)[0]
                idx2 = np.nonzero(y_train[:,
                                          (label + 1) % num_classes] == 1)[0]

            # pick any two indexes randomly
            id1 = np.random.randint(0, idx1.shape[0])
            id2 = np.random.randint(0, idx2.shape[0])

        if from_train:
            return np.array([x_train[id1], x_train[id2]])
        else:
            return np.array([x_test[id1], x_test[id2]])

    def generator(from_train):
        while True:
            X = [[None, None]] * batch_size
            y = [[None]] * batch_size
            indexes = np.arange(batch_size)
            identical = True
            for i in indexes:
                X[i] = make_img_pair(identical, from_train)
                y[i] = [1 if identical else 0]
                identical = not identical
            np.random.shuffle(indexes)
            X = np.asarray(X)[indexes]
            y = np.asarray(y)[indexes]
            # print("generator: from_train:", from_train, " - X:", X.shape, "- y:", y.shape)
            yield [X[:, 0], X[:, 1]], y

    siamese_model_file = "./siam_log/siamese.h5"
    epochs = 100
    callbacks = [
        LearningRateScheduler(
            partial(
                getitem,
                tuple(
                    take(
                        epochs,
                        concat(repeat(0.01, 1), repeat(0.1, 99),
                               repeat(0.01, 50), repeat(0.001)))))),
        ModelCheckpoint(filepath=siamese_model_file),
        TensorBoard(log_dir="./siam_log", batch_size=batch_size)
    ]
    outputs = siamese.fit_generator(
        generator(from_train=True),
        initial_epoch=0,
        steps_per_epoch=x_train.shape[0] // batch_size,
        epochs=epochs,
        validation_data=generator(from_train=False),
        validation_steps=x_test.shape[0] // batch_size,
        callbacks=callbacks)

    with open('./siam_log/history.pickle', 'wb') as f:
        pickle.dump(outputs.history, f)
    save_model(siamese, siamese_model_file)
def main():
    import os
    with tf.device("/cpu:0"):
        (x_train, y_train), (x_validation, y_validation) = load_data()

    batch_size = 32
    epochs = 200
    input_shape = Input(shape=x_train.shape[1:])
    model_file = './results/model.h5'
    if os.path.exists(model_file):
        model = load_model(model_file)
        # with tf.device("/cpu:0"):
        #     validation_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)
    else:
        model = Model(*juxt(identity, computational_graph(y_train.shape[1]))(
            input_shape))
        model.compile(loss='categorical_crossentropy',
                      optimizer=SGD(momentum=0.9),
                      metrics=['accuracy'])

        with tf.device("/cpu:0"):
            train_data = ImageDataGenerator(featurewise_center=True,
                                            featurewise_std_normalization=True,
                                            width_shift_range=0.125,
                                            height_shift_range=0.125,
                                            horizontal_flip=True)
            validation_data = ImageDataGenerator(
                featurewise_center=True, featurewise_std_normalization=True)

        for data in (train_data, validation_data):
            data.fit(
                x_train)  # 実用を考えると、x_validationでのfeaturewiseのfitは無理だと思う……。

        results = model.fit_generator(
            train_data.flow(x_train, y_train, batch_size=batch_size),
            steps_per_epoch=x_train.shape[0] // batch_size,
            epochs=epochs,
            callbacks=[
                LearningRateScheduler(
                    partial(
                        getitem,
                        tuple(
                            take(
                                epochs,
                                concat(repeat(0.01, 1), repeat(0.1, 99),
                                       repeat(0.01, 50), repeat(0.001))))))
            ],
            validation_data=validation_data.flow(x_validation,
                                                 y_validation,
                                                 batch_size=batch_size),
            validation_steps=x_validation.shape[0] // batch_size)

        with open('./results/history.pickle', 'wb') as f:
            pickle.dump(results.history, f)
        save_model(model, model_file)

    try:
        with tf.device("/cpu:0"):
            # model.summary()
            # print("=== AFTER POPPING THE LAST ===")
            model.layers.pop()
            # model.summary()
            # generate_confusion_matrix(model, x_validation, y_validation, batch_size)
            # plot_model(model, to_file='./results/model.png')
    except Exception as ex:
        print("plot_model failed with error:", repr(ex), "\nMoving on...")

    siamese(input_shape, model)
示例#18
0
文件: local.py 项目: VanaMartin/dvc
    def _process(
        self,
        named_cache,
        remote,
        jobs=None,
        show_checksums=False,
        download=False,
    ):
        logger.debug("Preparing to {} '{}'".format(
            "download data from" if download else "upload data to",
            remote.tree.path_info,
        ))

        if download:
            func = partial(
                _log_exceptions(remote.tree.download, "download"),
                dir_mode=self.tree.dir_mode,
                file_mode=self.tree.file_mode,
            )
            status = STATUS_DELETED
            desc = "Downloading"
        else:
            func = _log_exceptions(remote.tree.upload, "upload")
            status = STATUS_NEW
            desc = "Uploading"

        if jobs is None:
            jobs = remote.tree.JOBS

        dir_status, file_status, dir_contents = self._status(
            named_cache,
            remote,
            jobs=jobs,
            show_checksums=show_checksums,
            download=download,
        )

        dir_plans, _ = self._get_plans(download, remote, dir_status, status)
        file_plans, missing_files = self._get_plans(download, remote,
                                                    file_status, status)

        total = len(dir_plans[0]) + len(file_plans[0])
        if total == 0:
            return 0

        with Tqdm(total=total, unit="file", desc=desc) as pbar:
            func = pbar.wrap_fn(func)
            with ThreadPoolExecutor(max_workers=jobs) as executor:
                if download:
                    from_infos, to_infos, names, _ = (
                        d + f for d, f in zip(dir_plans, file_plans))
                    fails = sum(executor.map(func, from_infos, to_infos,
                                             names))
                else:
                    # for uploads, push files first, and any .dir files last

                    file_futures = {}
                    for from_info, to_info, name, hash_ in zip(*file_plans):
                        file_futures[hash_] = executor.submit(
                            func, from_info, to_info, name)
                    dir_futures = {}
                    for from_info, to_info, name, dir_hash in zip(*dir_plans):
                        # if for some reason a file contained in this dir is
                        # missing both locally and in the remote, we want to
                        # push whatever file content we have, but should not
                        # push .dir file
                        for file_hash in missing_files:
                            if file_hash in dir_contents[dir_hash]:
                                logger.debug(
                                    "directory '%s' contains missing files,"
                                    "skipping .dir file upload",
                                    name,
                                )
                                break
                        else:
                            wait_futures = {
                                future
                                for file_hash, future in file_futures.items()
                                if file_hash in dir_contents[dir_hash]
                            }
                            dir_futures[dir_hash] = executor.submit(
                                self._dir_upload,
                                func,
                                wait_futures,
                                from_info,
                                to_info,
                                name,
                            )
                    fails = sum(future.result() for future in concat(
                        file_futures.values(), dir_futures.values()))

        if fails:
            if download:
                remote.index.clear()
                raise DownloadError(fails)
            raise UploadError(fails)

        if not download:
            # index successfully pushed dirs
            for dir_hash, future in dir_futures.items():
                if future.result() == 0:
                    file_hashes = dir_contents[dir_hash]
                    logger.debug("Indexing pushed dir '{}' with "
                                 "'{}' nested files".format(
                                     dir_hash, len(file_hashes)))
                    remote.index.update([dir_hash], file_hashes)

        return len(dir_plans[0]) + len(file_plans[0])
def export_twitterUser_emotion_analysis(db='UserPost',collection="user_post"):
    client = MongoClient()
    db_tweets = client['%s' % db]
    collect_tweets = db_tweets['%s' % collection]
    db_user = client['Twitter']
    collect_user = db_user['twitter']
    from funcy import flatten,concat,group_by
    # 根据现有的文章提取出用户
    pipline = [
        {"$match": {
            "site": "twitter"
        }},
        {"$group": {
            "_id": "$user.id_str",
            "count": {"$sum": 1}
        }}
    ]
    result = list(collect_tweets.aggregate(pipline))
    formatDocs = []
    for id in list(map(lambda x: x['_id'], result)):
        # 查找该永和的用户信息
        user_for_id = collect_user.find_one({'id_str': id})
        # 查找该用户下的所有文章
        user_for_id_tweets_count = collect_tweets.count({"user.id_str": id, "site": 'twitter'})
        # print(user_for_id_tweets_count)
        if (user_for_id_tweets_count > 0):
            aggregate_for_user_tweets = collect_tweets.aggregate([
                {
                    "$match": {
                        "user.id_str": id,
                        "site": 'twitter'
                    }
                },
                {"$group": {
                    "_id": "$user.id_str",
                    "text":{"$push":"$text"}
                }}
            ])

            user_tweets_texts = list(aggregate_for_user_tweets)[0]
            # print(len(user_tweets_texts['text']))

            # print(texts)
            if len(user_tweets_texts['text'])>300:
                ops = [{'url':'https://tone-analyzer-demo.ng.bluemix.net/api/tone','data':''.join(user_tweets_texts['text'][i:i+300])} for i in range(0,len(user_tweets_texts['text']),300)]
            else:
                texts = ''.join(user_tweets_texts['text'])
                ops = [{'url':'https://tone-analyzer-demo.ng.bluemix.net/api/tone','data':texts}]
            # print(ops)
            analyzer = asynchronous_request_facebook_api(ops)
            # print(analyzer[0])
            final_result  = list(concat(list(flatten(list(map(lambda x:x['document_tone']['tones'],analyzer))))))
            group_result = group_by(lambda x:x['tone_name'],final_result)
            
            formatDocs.append({})
            print(len(formatDocs))
        else:
            print(id)
    df2 = pd.DataFrame(formatDocs)
    df2 = df2.applymap(lambda x: x.encode('unicode_escape').
                       decode('utf-8') if isinstance(x, str) else x)
    # print(docs)
    df2.to_excel('./export_data/%s/user_summary/%s.xlsx' % ("twitter", "twitter_user_summary"),
                 sheet_name='Sheet1')
示例#20
0
def	main():

	#
	# CIFAR-10
	#
	cifar = CIFAR_10()

	#
	# x_train.shape		= (50000, 32, 32, 3)
	# y_train.shape		= (50000, 10)
	# x_validation.shape= (10000, 32, 32, 3)
	# y_validation.shape= (10000, 10)
	#
	data			= cifar.load_data()
	x_train			= data['training_data']
	y_train			= data['training_label']
	x_validation	= data['validation_data']
	y_validation	= data['validation_label']
	print("x_train.shape=", x_train.shape)
	print("y_train.shape=", y_train.shape)
	print("x_validation.shape=", x_validation.shape)
	print("y_validation.shape=", y_validation.shape)


	#
	# SqueezeNet
	#
	squeeze = SqueezeNet()
	i = Input(shape=x_train.shape[1:])
	o = squeeze.make_graph(y_train.shape[1])(i)

	#
	# model
	#
	model = Model(inputs=i, outputs=o)

	#
	# compile model
	#
	model.compile(
			loss='categorical_crossentropy',
			optimizer=SGD(momentum=0.9),
			metrics=['accuracy']
			)

	#
	# generator in ImageDataGenerator by keras
	#
	train_data = ImageDataGenerator(
			featurewise_center=True,
			featurewise_std_normalization=True,
			width_shift_range=0.125,
			height_shift_range=0.125,
			horizontal_flip=True
			)
	validation_data = ImageDataGenerator(
			featurewise_center=True,
			featurewise_std_normalization=True
			)
	for data in (train_data, validation_data):
		data.fit(x_train)  # 実用を考えると、x_validationでのfeaturewiseのfitは無理だと思う… … 。

	#
	# check pickle
	#
	# file_pickle = "./results/history.pickle"
	model_path		= "./results"
	model_file  	= model_path + "/model.h5"
	model_weights	= model_path + "/weights.h5"
	print(f"models: model={model_file}, weight={model_weights}" )
	# print(f"models: arch  =", options['file_arch'])
	# print(f"models: weight=", options['model_weights'])
	if not path.exists(model_path):
		os.mkdir(model_path)

	#
	# print model
	#
	from lib_utils import print_model_summary
	print_model_summary(model, "./results/network.txt", "model.png")


	#
	# check model, if not exist trained model, we have to make trained parameters for model.
	#
	if not path.exists(model_file):

		#
		# fit generator
		#
		batch_size = 1000	# 100
		epochs     = 1		# 200
		results = model.fit_generator(
			#
			# generate train data (ImageDataGenerator by keras)
			#
			train_data.flow(x_train, y_train, batch_size=batch_size),

			#
			# steps/epoch
			#
			steps_per_epoch=x_train.shape[0] // batch_size,

			#
			# epoch
			#
			epochs=epochs,

			#
			# callbacks
			#
			callbacks = [
				LearningRateScheduler(
					partial(
						getitem,
						tuple(take(epochs, concat(repeat(0.010, 1), repeat(0.100, 99), repeat(0.010, 50), repeat(0.001))))
						)
					)
				],
			#
			# generate validation data (ImageDataGenerator by keras)
			#
			validation_data=validation_data.flow(x_validation, y_validation, batch_size=batch_size),

			#
			# validation step
			#
			validation_steps=x_validation.shape[0] // batch_size,

			#
			# max_queue_size
			#
			max_queue_size=4
			)

		#
		# save keras model
		#
		from lib_utils import save_model_by_keras
		save_model_by_keras(model, model_file, model_weights)

		# del model

	else:
		#
		# load keras model
		#
		if path.exists(model_file):
			print("load model...")
			from lib_utils import load_model_by_keras
			model = load_model_by_keras(model_file, model_weights)
			print("load model...done")
		else:
			print("load model...: not found=", model_file, model_weights )

	#
	# check version
	#
	from lib_utils import get_version
	get_version(model_file)

		
	#
	# evaluate
	#
	"""
	print("model evaluate...")
	score = lmodel.evaluate(x_validation, y_validation, verbose=1)
	print("model evaluate: loss=", score[0])
	print("model evaluate: accuracy=", score[1])
	"""

	#
	# prediction
	#
	print("model prediction...")
	# lmodel.predict(y_validation.shape[1])
	# lmodel.predict(x_train.shape[1:])
	print("x_validation.shape=", x_validation.shape)
	print("x_validation.shape[0]=", x_validation.shape[0])
	print("x_validation.shape[1]=", x_validation.shape[1])
	print("x_validation.shape[2]=", x_validation.shape[2])
	print("x_validation.shape[3]=", x_validation.shape[3])
	i0 = x_validation[0:1]
	i1 = x_validation.reshape(10000,32,32,3)
	i2 = i1[0]
	print("i0.shape=", i0.shape)
	print("i1.shape=", i1.shape)
	print("i2.shape=", i2.shape)
	# lmodel.predict(i0, verbose=1)
	predo = model.predict(x_validation, verbose=1)[0]
	print(predo)

	"""
	"""
	preds = model.predict(x_validation, verbose=1)

	# for pre in preds:
	# 	y = pre.argmax()
	# 	print("label: ", y_validation[y])

	print('done')