Exemplo n.º 1
0
    def __init__(self):
        self.replay_buffer = replaybuffer.ReplayBuffer(5000)

        self.env = PendulumEnv()

        observation = self.env.reset()

        self.device = torch.device("cuda")

        #INSTANCIATE MODELS
        state_size = 3
        action_size = 1
        self.state_dreamer = models.StateDreamer(state_size, action_size)
        self.reward_dreamer = models.RewardDreamer(state_size)
        self.actor = models.Actor(state_size, action_size)
        self.critic = models.Critic(state_size, action_size)

        #put models on device
        self.state_dreamer.to(self.device)
        self.reward_dreamer.to(self.device)
        self.actor.to(self.device)
        self.critic.to(self.device)

        #create optimiser for each model
        self.state_dreamer_optimizer = optim.SGD(
            self.state_dreamer.parameters(), lr=0.01, momentum=0.9)
        self.reward_dreamer_optimizer = optim.SGD(
            self.reward_dreamer.parameters(), lr=0.01, momentum=0.9)
        self.actor_optimizer = optim.SGD(self.actor.parameters(),
                                         lr=0.0001,
                                         momentum=0.9)
        self.critic_optimizer = optim.SGD(self.critic.parameters(),
                                          lr=0.001,
                                          momentum=0.9)
Exemplo n.º 2
0
 def post_new_actors(payload):
     req_data = json.loads(request.data.decode("utf-8"))
     try:
         model_format = models.Actor(name=req_data["name"],
                                     age=req_data["age"],
                                     gender=req_data["gender"])
         models.Actor.insert(model_format)
     except Exception as e:
         print(e)
         print("New actor failed to post to database.")
     return json.dumps(req_data), 200
Exemplo n.º 3
0
    def scrape_site(self, nzaa_id):
        """All the functions to update a site record.

        Login to ArchSite (if necessary). Go to the record, get the
        source, process this into an extract, then process this into
        the site record structures.

        Extract values for actor, feature and period. If no record for
        each value is found, create one and add this site to it's
        relations table.

        Compute an MD5 checksum for the values and compare this with
        the stored version.

        Check for the existence of a site record, and an update0 record.

        Save the records into the db apopropriately.

        """

        message = "Scraping site " + nzaa_id
        if self.VERBOSE:
            print message

        s = None
        u = None
        update_id = nzaa_id + '-0'
        html = None
        extract = None

        now = datetime.datetime.now(pytz.timezone('NZ'))
        unchanged = False

        if self.login():
            html = self.visit_site(nzaa_id)
            if html:
                extract = self.extract_values(html)

        if not extract:
            message = "No site record found for " + nzaa_id
            if self.VERBOSE:
                print message
            return None

        self.extract = extract

        #       Create MD5 checksum of extracted values, replacing unknown
        #       unicode characters.
        hash = hashlib.md5()
        checksum = ''
        for k in sorted(extract.keys()):
            try:
                hash.update(
                    unicode(extract[k]).replace(u'\u0101', 'ā').replace(
                        u'\u012b', 'ī').replace(u'\u016b', 'ū').
                    replace(u'\u02bc', 'ʼ').replace(
                        u'\u02c6', 'ˆ').replace(u'\u02da', '˚ ').
                    replace(u'\u2013', '–').replace(
                        u'\u2014', '—').replace(u'\u2018', '‘').
                    replace(u'\u2019', '’').replace(
                        u'\u201c', '“').replace(u'\u201d', '”').
                    replace(u'\u2022', '•').replace(
                        u'\u2026', '…').replace(u'\u20a4', '₤').
                    replace(u'\u2154', '⅔').replace(u'\uf644', '').
                    replace(u'\uf64b', '').replace(u'\uf64c', '').replace(
                        u'\xa0', ' ').replace(u'\xa3', '£').replace(
                            u'\xa9', '©').replace(u'\xac', '¬').
                    replace(u'\xad', '­').replace(u'\xb0', '°').
                    replace(u'\xb1', '±').replace(
                        u'\xb2', '²').replace(u'\xb3', '³').replace(
                            u'\xb4', '´').replace(u'\xb7', '·').
                    replace(u'\xba', 'º').replace(u'\xbc', '¼').
                    replace(u'\xbd', '½').replace(u'\xbe', '¾').
                    replace(u'\xe6', 'æ').replace(u'\xe7', 'ç').
                    replace(u'\xe8', 'è').replace(u'\xe9', 'é') +
                    "\n")
            except UnicodeEncodeError as e:
                line = nzaa_id + " " + str(e) + "\n"
                dumpfile = "/home/malcolm/tmp/scrape_dumps.txt"
                f = open(dumpfile, "a")
                f.write(line)
                f.close

        digest = hash.hexdigest()

        #       Build the structures necessary to affect database tables.
        (new_site, site, update0) = self.process_extract(extract)

        #       Try finding a site record.
        try:
            s = models.Site.objects.get(nzaa_id=nzaa_id)
            if digest == s.digest:
                unchanged = True
                s.extracted = now
                message = ("Record unchanged since " +
                           unicode(s.last_change.replace(microsecond=0)))
                log = (
                    '127.0.0.1',
                    'scrape',
                    message,
                )
                if self.VERBOSE:
                    print message

            else:
                message = "Updating existing site record for " + nzaa_id
                s.digest = digest
                s.last_change = now
                self.logging(message)
                if self.VERBOSE:
                    print message

                log = ('127.0.0.1', 'scrape',
                       "Updating record from from ArchSite.")
                s.__dict__.update(**site)

        except models.Site.DoesNotExist:
            message = "Creating site record for " + nzaa_id
            self.logging(message)
            if self.VERBOSE:
                print message
            data = new_site.copy()
            data.update(new_site)
            s = models.Site(**data)

            s.created = datetime.datetime.now(pytz.utc)
            s.created_by = 'scrape'
            s.digest = digest
            s.last_change = now

            log = (
                '127.0.0.1',
                'scrape',
                "Creating record from from ArchSite.",
            )

            point = Point(s.easting, s.northing, 2913)
            if not s.region:
                s.region = s.get_region()

            if not s.tla:
                s.tla = s.get_tla()

            if not s.island:
                s.island = s.get_island()

            if self.VERBOSE:
                print "Saving site record", s

        s.save(log=log)

        #       If there are no changes, then we have done all we have to do.
        if unchanged:
            return None

#       Deal with actors.
        sourcenames = s.list_actors()
        for sourcename in sourcenames:
            try:
                a = models.Actor.objects.get(sourcename=sourcename)
            except:
                a = models.Actor(sourcename=sourcename)
                a.save()
            a.sites.add(s)
#       Deal with features.
        features = s.list_features()
        if features:
            for feature in features:
                try:
                    f = models.Feature.objects.get(name=feature)
                except:
                    f = models.Feature(name=feature)
                    f.save()
                f.sites.add(s)
#       Deal with period.
        periods = s.list_periods()
        for period in periods:
            try:
                p = models.Periods.objects.get(name=period)
            except models.Periods.DoesNotExist:
                p = models.Periods(name=period)
                p.save()
            p.sites.add(s)

        update0['site'] = s
        update0['update_id'] = update_id

        try:
            u = models.Update.objects.get(update_id=update_id)
            u.nzaa_id = s
            message = "Updating existing record for " + update_id
            self.logging(message)
            if self.VERBOSE:
                print message
            u.__dict__.update(**update0)

        except models.Update.DoesNotExist:
            message = "Creating update record for " + update_id
            self.logging(message)
            if self.VERBOSE:
                print message
            u = models.Update(**update0)
            u.nzaa_id_id = s
            u.created = datetime.datetime.now(pytz.utc)
            u.created_by = 'scrape'

        u.save(log=log)
Exemplo n.º 4
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # * Step 1: init data folders
    print("init data folders")

    # * Init character folders for dataset construction
    metatrain_character_folders, metatest_character_folders = tg.mini_imagenet_folders(
    )

    # * Step 2: init neural networks
    print("init neural networks")

    feature_encoder = models.CNNEncoder()
    actor = models.Actor(FEATURE_DIM, RELATION_DIM, CLASS_NUM)
    critic = models.Critic(FEATURE_DIM, RELATION_DIM)

    #feature_encoder = torch.nn.DataParallel(feature_encoder)
    #actor = torch.nn.DataParallel(actor)
    #critic = torch.nn.DataParallel(critic)

    feature_encoder.train()
    actor.train()
    critic.train()

    feature_encoder.apply(models.weights_init)
    actor.apply(models.weights_init)
    critic.apply(models.weights_init)

    feature_encoder.to(device)
    actor.to(device)
    critic.to(device)

    agent = a2cAgent.A2CAgent(actor, critic, GAMMA, ENTROPY_WEIGHT,
                              FEATURE_DIM, RELATION_DIM, CLASS_NUM, device)

    #feature_encoder.eval()
    #relation_network.eval()

    if os.path.exists(
            str("./models/miniimagenet_feature_encoder_" + str(CLASS_NUM) +
                "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")):
        feature_encoder.load_state_dict(
            torch.load(
                str("./models/miniimagenet_feature_encoder_" + str(CLASS_NUM) +
                    "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")))
        print("load feature encoder success")

    if os.path.exists(
            str("./models/miniimagenet_actor_network_" + str(CLASS_NUM) +
                "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")):
        actor.load_state_dict(
            torch.load(
                str("./models/miniimagenet_actor_network_" + str(CLASS_NUM) +
                    "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")))
        print("load actor network success")

    if os.path.exists(
            str("./models/miniimagenet_critic_network_" + str(CLASS_NUM) +
                "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")):
        critic.load_state_dict(
            torch.load(
                str("./models/miniimagenet_critic_network_" + str(CLASS_NUM) +
                    "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")))
        print("load critic network success")

    max_accuracy_list = []
    mean_accuracy_list = []
    for episode in range(1):
        total_accuracy = []
        for i in range(TEST_EPISODE):
            # * Generate env
            env_states_list = []
            env_labels_list = []
            number_of_query_image = 15
            task = tg.MiniImagenetTask(metatest_character_folders, CLASS_NUM,
                                       SAMPLE_NUM_PER_CLASS,
                                       number_of_query_image)
            sample_dataloader = tg.get_mini_imagenet_data_loader(
                task,
                num_per_class=SAMPLE_NUM_PER_CLASS,
                split="train",
                shuffle=False)
            test_dataloader = tg.get_mini_imagenet_data_loader(
                task,
                num_per_class=number_of_query_image,
                split="test",
                shuffle=True)

            sample_images, sample_labels = next(iter(sample_dataloader))
            test_images, test_labels = next(iter(test_dataloader))

            sample_images, sample_labels = sample_images.to(
                device), sample_labels.to(device)
            test_images, test_labels = test_images.to(device), test_labels.to(
                device)

            # * calculate features
            sample_features = feature_encoder(sample_images)
            sample_features = sample_features.view(CLASS_NUM,
                                                   SAMPLE_NUM_PER_CLASS,
                                                   FEATURE_DIM, 19, 19)
            sample_features = torch.sum(sample_features, 1).squeeze(1)
            test_features = feature_encoder(test_images)

            # * calculate relations
            # * each batch sample link to every samples to calculate relations
            # * to form a 100x128 matrix for relation network

            sample_features_ext = sample_features.unsqueeze(0).repeat(
                number_of_query_image * CLASS_NUM, 1, 1, 1, 1)
            test_features_ext = test_features.unsqueeze(0).repeat(
                CLASS_NUM, 1, 1, 1, 1)
            test_features_ext = torch.transpose(test_features_ext, 0, 1)

            relation_pairs = torch.cat(
                (sample_features_ext, test_features_ext),
                2).view(-1, FEATURE_DIM * 2, 19, 19)
            env_states_list.append(relation_pairs)
            env_labels_list.append(test_labels)

            test_env = a2cAgent.env(env_states_list, env_labels_list)
            rewards = agent.test(test_env)
            test_accuracy = rewards / len(test_labels)
            print(test_accuracy)
            total_accuracy.append(test_accuracy)

        mean_accuracy, conf_int = mean_confidence_interval(total_accuracy)
        print(f"Total accuracy : {mean_accuracy:.4f}")
        print(f"confidence interval : {conf_int:.4f}")
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # * Step 1: init data folders
    print("init data folders")

    # * Init character folders for dataset construction
    metatrain_character_folders, metatest_character_folders = tg.mini_imagenet_folders(
    )

    # * Step 2: init neural networks
    print("init neural networks")

    feature_encoder = models.CNNEncoder()
    actor = models.Actor(FEATURE_DIM, RELATION_DIM, CLASS_NUM)
    critic = models.Critic(FEATURE_DIM, RELATION_DIM)

    #feature_encoder = torch.nn.DataParallel(feature_encoder)
    #actor = torch.nn.DataParallel(actor)
    #critic = torch.nn.DataParallel(critic)

    feature_encoder.train()
    actor.train()
    critic.train()

    feature_encoder.apply(models.weights_init)
    actor.apply(models.weights_init)
    critic.apply(models.weights_init)

    feature_encoder.to(device)
    actor.to(device)
    critic.to(device)

    cross_entropy = nn.CrossEntropyLoss()

    feature_encoder_optim = torch.optim.Adam(feature_encoder.parameters(),
                                             lr=LEARNING_RATE)
    feature_encoder_scheduler = StepLR(feature_encoder_optim,
                                       step_size=10000,
                                       gamma=0.5)

    actor_optim = torch.optim.Adam(actor.parameters(), lr=2.5 * LEARNING_RATE)
    actor_scheduler = StepLR(actor_optim, step_size=10000, gamma=0.5)

    critic_optim = torch.optim.Adam(critic.parameters(),
                                    lr=2.5 * LEARNING_RATE * 10)
    critic_scheduler = StepLR(critic_optim, step_size=10000, gamma=0.5)

    agent = a2cAgent.A2CAgent(actor, critic, GAMMA, ENTROPY_WEIGHT, CLASS_NUM,
                              device)

    if os.path.exists(
            str("./models/miniimagenet_feature_encoder_" + str(CLASS_NUM) +
                "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")):
        feature_encoder.load_state_dict(
            torch.load(
                str("./models/miniimagenet_feature_encoder_" + str(CLASS_NUM) +
                    "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")))
        print("load feature encoder success")

    if os.path.exists(
            str("./models/miniimagenet_actor_network_" + str(CLASS_NUM) +
                "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")):
        actor.load_state_dict(
            torch.load(
                str("./models/miniimagenet_actor_network_" + str(CLASS_NUM) +
                    "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")))
        print("load actor network success")

    if os.path.exists(
            str("./models/miniimagenet_critic_network_" + str(CLASS_NUM) +
                "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")):
        critic.load_state_dict(
            torch.load(
                str("./models/miniimagenet_critic_network_" + str(CLASS_NUM) +
                    "way_" + str(SAMPLE_NUM_PER_CLASS) + "shot.pkl")))
        print("load critic network success")

    # * Step 3: build graph
    print("Training...")

    last_accuracy = 0.0
    mbal_loss_list = []
    mbcl_loss_list = []
    loss_list = []
    number_of_query_image = 15
    for episode in range(EPISODE):
        #print(f"EPISODE : {episode}")
        policy_losses = []
        value_losses = []

        for meta_batch in range(META_BATCH_RANGE):
            meta_env_states_list = []
            meta_env_labels_list = []
            for inner_batch in range(INNER_BATCH_RANGE):
                # * Generate environment
                env_states_list = []
                env_labels_list = []
                for env in range(ENV_LENGTH):
                    task = tg.MiniImagenetTask(metatrain_character_folders,
                                               CLASS_NUM, SAMPLE_NUM_PER_CLASS,
                                               number_of_query_image)
                    sample_dataloader = tg.get_mini_imagenet_data_loader(
                        task,
                        num_per_class=SAMPLE_NUM_PER_CLASS,
                        split="train",
                        shuffle=False)
                    batch_dataloader = tg.get_mini_imagenet_data_loader(
                        task, num_per_class=5, split="test", shuffle=True)

                    samples, sample_labels = next(iter(sample_dataloader))
                    samples, sample_labels = samples.to(
                        device), sample_labels.to(device)
                    for batches, batch_labels in batch_dataloader:
                        batches, batch_labels = batches.to(
                            device), batch_labels.to(device)

                        inner_sample_features = feature_encoder(samples)
                        inner_sample_features = inner_sample_features.view(
                            CLASS_NUM, SAMPLE_NUM_PER_CLASS, FEATURE_DIM, 19,
                            19)
                        inner_sample_features = torch.sum(
                            inner_sample_features, 1).squeeze(1)

                        inner_batch_features = feature_encoder(batches)
                        inner_sample_feature_ext = inner_sample_features.unsqueeze(
                            0).repeat(5 * CLASS_NUM, 1, 1, 1, 1)
                        inner_batch_features_ext = inner_batch_features.unsqueeze(
                            0).repeat(CLASS_NUM, 1, 1, 1, 1)
                        inner_batch_features_ext = torch.transpose(
                            inner_batch_features_ext, 0, 1)

                        inner_relation_pairs = torch.cat(
                            (inner_sample_feature_ext,
                             inner_batch_features_ext),
                            2).view(-1, FEATURE_DIM * 2, 19, 19)
                        env_states_list.append(inner_relation_pairs)
                        env_labels_list.append(batch_labels)

                inner_env = a2cAgent.env(env_states_list, env_labels_list)
                agent.train(inner_env, inner_update=True)

            for meta_env in range(META_ENV_LENGTH):
                task = tg.MiniImagenetTask(metatrain_character_folders,
                                           CLASS_NUM, SAMPLE_NUM_PER_CLASS,
                                           number_of_query_image)
                sample_dataloader = tg.get_mini_imagenet_data_loader(
                    task,
                    num_per_class=SAMPLE_NUM_PER_CLASS,
                    split="train",
                    shuffle=False)
                batch_dataloader = tg.get_mini_imagenet_data_loader(
                    task,
                    num_per_class=number_of_query_image,
                    split="test",
                    shuffle=True)
                # * num_per_class : number of query images

                # * sample datas
                samples, sample_labels = next(iter(sample_dataloader))
                samples, sample_labels = samples.to(device), sample_labels.to(
                    device)
                # * Generate env for meta update
                batches, batch_labels = next(iter(batch_dataloader))
                # * init dataset
                # * sample_dataloader is to obtain previous samples for compare
                # * batch_dataloader is to batch samples for training
                batches, batch_labels = batches.to(device), batch_labels.to(
                    device)

                # * calculates features
                #feature_encoder.weight = feature_fast_weights

                sample_features = feature_encoder(samples)
                sample_features = sample_features.view(CLASS_NUM,
                                                       SAMPLE_NUM_PER_CLASS,
                                                       FEATURE_DIM, 19, 19)
                sample_features = torch.sum(sample_features, 1).squeeze(1)
                batch_features = feature_encoder(batches)

                # * calculate relations
                # * each batch sample link to every samples to calculate relations
                # * to form a 100 * 128 matrix for relation network
                sample_features_ext = sample_features.unsqueeze(0).repeat(
                    number_of_query_image * CLASS_NUM, 1, 1, 1, 1)
                batch_features_ext = batch_features.unsqueeze(0).repeat(
                    CLASS_NUM, 1, 1, 1, 1)
                batch_features_ext = torch.transpose(batch_features_ext, 0, 1)
                relation_pairs = torch.cat(
                    (sample_features_ext, batch_features_ext),
                    2).view(-1, FEATURE_DIM * 2, 19, 19)

                meta_env_states_list.append(relation_pairs)
                meta_env_labels_list.append(batch_labels)

            meta_env = a2cAgent.env(meta_env_states_list, meta_env_labels_list)
            agent.train(meta_env,
                        policy_loss_list=policy_losses,
                        value_loss_list=value_losses)

        feature_encoder_optim.zero_grad()
        actor_optim.zero_grad()
        critic_optim.zero_grad()

        torch.nn.utils.clip_grad_norm_(feature_encoder.parameters(), 0.5)
        torch.nn.utils.clip_grad_norm_(actor.parameters(), 0.5)
        torch.nn.utils.clip_grad_norm_(critic.parameters(), 0.5)

        meta_batch_actor_loss = torch.stack(policy_losses).mean()
        meta_batch_critic_loss = torch.stack(value_losses).mean()

        meta_batch_actor_loss.backward(retain_graph=True)
        meta_batch_critic_loss.backward()

        feature_encoder_optim.step()
        actor_optim.step()
        critic_optim.step()

        feature_encoder_scheduler.step()
        actor_scheduler.step()
        critic_scheduler.step()

        if (episode + 1) % 100 == 0:
            mbal = meta_batch_actor_loss.cpu().detach().numpy()
            mbcl = meta_batch_critic_loss.cpu().detach().numpy()
            print(
                f"episode : {episode+1}, meta_batch_actor_loss : {mbal:.4f}, meta_batch_critic_loss : {mbcl:.4f}"
            )

            mbal_loss_list.append(mbal)
            mbcl_loss_list.append(mbcl)
            loss_list.append(mbal + mbcl)

        if (episode + 1) % 500 == 0:
            print("Testing...")
            total_reward = 0

            total_num_of_test_samples = 0
            for i in range(TEST_EPISODE):
                # * Generate env
                env_states_list = []
                env_labels_list = []

                number_of_query_image = 10
                task = tg.MiniImagenetTask(metatest_character_folders,
                                           CLASS_NUM, SAMPLE_NUM_PER_CLASS,
                                           number_of_query_image)
                sample_dataloader = tg.get_mini_imagenet_data_loader(
                    task,
                    num_per_class=SAMPLE_NUM_PER_CLASS,
                    split="train",
                    shuffle=False)
                test_dataloader = tg.get_mini_imagenet_data_loader(
                    task,
                    num_per_class=number_of_query_image,
                    split="test",
                    shuffle=True)
                sample_images, sample_labels = next(iter(sample_dataloader))
                sample_images, sample_labels = sample_images.to(
                    device), sample_labels.to(device)

                test_images, test_labels = next(iter(test_dataloader))
                total_num_of_test_samples += len(test_labels)
                test_images, test_labels = test_images.to(
                    device), test_labels.to(device)

                # * calculate features
                sample_features = feature_encoder(sample_images)
                sample_features = sample_features.view(CLASS_NUM,
                                                       SAMPLE_NUM_PER_CLASS,
                                                       FEATURE_DIM, 19, 19)
                sample_features = torch.sum(sample_features, 1).squeeze(1)
                test_features = feature_encoder(test_images)

                # * calculate relations
                # * each batch sample link to every samples to calculate relations
                # * to form a 100x128 matrix for relation network

                sample_features_ext = sample_features.unsqueeze(0).repeat(
                    number_of_query_image * CLASS_NUM, 1, 1, 1, 1)
                test_features_ext = test_features.unsqueeze(0).repeat(
                    CLASS_NUM, 1, 1, 1, 1)
                test_features_ext = torch.transpose(test_features_ext, 0, 1)

                relation_pairs = torch.cat(
                    (sample_features_ext, test_features_ext),
                    2).view(-1, FEATURE_DIM * 2, 19, 19)
                env_states_list.append(relation_pairs)
                env_labels_list.append(test_labels)

                test_env = a2cAgent.env(env_states_list, env_labels_list)
                rewards = agent.test(test_env)
                total_reward += rewards

            test_accuracy = total_reward / (1.0 * total_num_of_test_samples)

            mean_loss = np.mean(loss_list)
            mean_actor_loss = np.mean(mbal_loss_list)
            mean_critic_loss = np.mean(mbcl_loss_list)

            print(f'mean loss : {mean_loss}')
            print("test accuracy : ", test_accuracy)

            writer.add_scalar('1.loss', mean_loss, episode + 1)
            writer.add_scalar('2.mean_actor_loss', mean_actor_loss,
                              episode + 1)
            writer.add_scalar('3.mean_critic_loss', mean_critic_loss,
                              episode + 1)
            writer.add_scalar('4.test accuracy', test_accuracy, episode + 1)

            loss_list = []
            mbal_loss_list = []
            mbcl_loss_list = []

            if test_accuracy > last_accuracy:
                # save networks
                torch.save(
                    feature_encoder.state_dict(),
                    str("./models/miniimagenet_feature_encoder_" +
                        str(CLASS_NUM) + "way_" + str(SAMPLE_NUM_PER_CLASS) +
                        "shot.pkl"))
                torch.save(
                    actor.state_dict(),
                    str("./models/miniimagenet_actor_network_" +
                        str(CLASS_NUM) + "way_" + str(SAMPLE_NUM_PER_CLASS) +
                        "shot.pkl"))

                torch.save(
                    critic.state_dict(),
                    str("./models/miniimagenet_critic_network_" +
                        str(CLASS_NUM) + "way_" + str(SAMPLE_NUM_PER_CLASS) +
                        "shot.pkl"))
                print("save networks for episode:", episode)
                last_accuracy = test_accuracy
    def __init__(self,
                 config,
                 state_size,
                 action_size,
                 num_agents,
                 seed,
                 per=True):
        """Initialize an Agent object.
        
        Params
        ======
            config (config): instance of a config-class, which stores all the hyperparameters
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """

        self.config = config
        self.epsilon = self.config.EPSILON_START

        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.seed = seed

        # Initialize bins
        self.v_min = 0
        self.v_max = 5
        self.n_atoms = 51
        self.delta = (self.v_max - self.v_min) / float(self.n_atoms - 1)
        self.bin_centers = torch.from_numpy(
            np.array([
                self.v_min + i * self.delta for i in range(self.n_atoms)
            ]).reshape(-1, 1)).to(self.config.device)

        # Initialize the Actor and Critic Networks
        self.actor_local = models.Actor(state_size,
                                        action_size).to(self.config.device)
        self.actor_target = models.Actor(state_size,
                                         action_size).to(self.config.device)
        self.actor_optimizer = torch.optim.Adam(self.actor_local.parameters(),
                                                self.config.LR_actor)

        self.critic_local = models.Critic(state_size, action_size,
                                          self.n_atoms).to(self.config.device)
        self.critic_target = models.Critic(state_size, action_size,
                                           self.n_atoms).to(self.config.device)
        self.critic_optimizer = torch.optim.Adam(
            self.critic_local.parameters(),
            self.config.LR_critic,
            weight_decay=self.config.weight_decay)

        # Initialize the random-noise-process for action-noise
        self.is_training = True
        self.noise = OUNoise((self.num_agents, self.action_size), self.seed)

        # Hard update the target networks to have the same parameters as the local networks
        for target_param, param in zip(self.actor_target.parameters(),
                                       self.actor_local.parameters()):
            target_param.data.copy_(param.data)
        for target_param, param in zip(self.critic_target.parameters(),
                                       self.critic_local.parameters()):
            target_param.data.copy_(param.data)

        # Initialize the replay-buffer according to `per`
        self.memory = ReplayBuffer(self.config.BUFFER_SIZE,
                                   self.config.BATCH_SIZE, seed,
                                   self.config.device, self.config.N_BOOTSTRAP)

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Exemplo n.º 7
0
    # Orchin beldeh
    env = gym.make('BipedalWalker-v3')

    state_dimension = env.observation_space.shape[0]
    action_dimension = env.action_space.shape[0]
    action_max = env.action_space.high[0]

    print("State dimension: {}".format(state_dimension))
    print("Action dimension: {}".format(action_dimension))
    print("Action max: {}".format(action_max))

    load_models = False

    # Actor network, critic network uusgeh

    actor = models.Actor(state_dimension, action_dimension, action_max)
    target_actor = models.Actor(state_dimension, action_dimension, action_max)
    actor_optimizer = torch.optim.Adam(actor.parameters(),
                                       lr=ACTOR_LEARNING_RATE)

    critic = models.Critic(state_dimension, action_dimension)
    target_critic = models.Critic(state_dimension, action_dimension)
    critic_optimizer = torch.optim.Adam(critic.parameters(),
                                        lr=CRITIC_LEARNING_RATE)

    # Target network-g huulah

    for target_param, param in zip(target_actor.parameters(),
                                   actor.parameters()):
        target_param.data.copy_(param.data)
Exemplo n.º 8
0
def post_actors(request: schemas.Actor, db: Session = Depends(get_db)):
    new_actor = models.Actor(actor_name=request.actor_name)
    db.add(new_actor)
    db.commit()
    db.refresh(new_actor)
    return new_actor