Пример #1
0
    def check_rank(self, av_id: int, times=0):
        rank_list = self.rank_map[av_id] if av_id in self.rank_map else []
        changeHeaders({'Referer': self.BASIC_AV_URL % av_id})
        if len(rank_list):
            score = int(rank_list[1])
            rank = int(rank_list[0])

        url = self.ARCHIVE_STAT_URL % av_id
        json_req = get_request_proxy(url, 1)

        if not self.have_error(json_req):
            if times < 3:
                self.check_rank(av_id, times + 1)
            return
        json_req = json_req['data']
        need = [
            'view', 'like', 'coin', 'favorite', 'reply', 'share', 'danmaku'
        ]
        data = [json_req[index] for index in need]
        if not self.check_view(av_id, data[0]):
            if times < 3:
                self.check_rank(av_id, times + 1)
            return
        if len(rank_list):
            data = [time_str(), *data, *rank_list[:2], *rank_list[3:5]]
        else:
            data = [time_str(), *data]

        with codecs.open('%s%d.csv' % (history_dir, av_id),
                         'a',
                         encoding='utf-8') as f:
            f.write(','.join([str(index) for index in data]) + '\n')

        if self.check_rank_list(av_id, rank_list):
            av_id_id = int(av_id) * 10 + int(rank_list[-1])
            if av_id_id not in self.rank:
                self.rank[av_id_id] = [rank_list[0] // 10]
            else:
                self.rank[av_id_id].append(rank_list[0] // 10)
            self.last_rank[av_id_id] = rank_list[0]
            send_email(
                '%dday List || Rank: %d Score: %d' %
                (int(rank_list[-1]), rank, score),
                '%dday List || Rank: %d Score: %d' %
                (int(rank_list[-1]), rank, score))
        if av_id in self.last_check and self.last_check[av_id] - int(
                time.time()) > one_day:
            del self.rank_map[av_id]
        elif av_id not in self.last_check and int(
                time.time()) > one_day + self.begin_timestamp:
            del self.rank_map[av_id]
        self.last_view[av_id] = data[1]
Пример #2
0
 def get_star_num(self, mid: int, times: int, load_disk=False):
     ''' get star num'''
     url = self.RELATION_STAT_URL % mid
     header = {
         **headers,
         **{
             'Origin': self.BILIBILI_URL,
             'Referer': self.AV_URL
         }
     }
     if 'Host' in header:
         del header['Host']
     req = get_request_proxy(url, 2, header=header)
     if req is None or req.status_code != 200 or len(
             req.text) < 8 or not '{' in req.text:
         if times < 3:
             self.get_star_num(mid, times + 1, load_disk)
         return
     try:
         json_req = json.loads(req.text[7:-1])
         self.star[mid] = json_req['data']['follower']
         if load_disk and self.check_star(mid, self.star[mid]):
             self.last_star[mid] = self.star[mid]
             with open('{}star.csv'.format(data_dir), 'a') as f:
                 f.write('%s,%d\n' % (time_str(), self.star[mid]))
     except:
         pass
Пример #3
0
    def public_monitor(self, av_id: int, times: int):
        ''' a monitor '''
        self.public_list.append(av_id)
        data_time, mid = self.public[av_id]
        self.get_star_num(mid, 0)
        self.check_rank_v2(av_id, 0)
        time.sleep(5)
        follower = self.star[mid] if mid in self.star else 0
        origin_data = self.data_v2[av_id] if av_id in self.data_v2 else []
        sleep_time = data_time + one_day - int(time.time())
        if sleep_time < 0:
            return
        print('Monitor Begin %d' % (av_id))
        time.sleep(sleep_time)
        self.get_star_num(mid, 0)
        self.check_rank_v2(av_id, 0)

        time.sleep(5)
        follower_2 = self.star[mid] if mid in self.star else 0
        one_day_data = self.data_v2[av_id] if av_id in self.data_v2 else []

        data = [
            time_str(data_time), av_id, follower, follower_2, *origin_data,
            *one_day_data
        ]
        with codecs.open(data_dir + 'public.csv', 'a', encoding='utf-8') as f:
            f.write(','.join([str(ii) for ii in data]) + '\n')
Пример #4
0
 def generate_other_params(self,
                           hotel_id: int = 4889292,
                           city_id: int = 2,
                           startDate: str = time_str(-1, '%Y-%m-%d'),
                           depDate: str = time_str(
                               int(time.time() + one_day), '%Y-%m-%d')):
     ''' generate other params '''
     params = {
         'psid': None,
         'MasterHotelID': hotel_id,
         'hotel': hotel_id,
         'EDM': 'F',
         'roomId': None,
         'IncludeRoom': None,
         'city': city_id,
         'showspothotel': 'T',
         'supplier': None,
         'IsDecoupleSpotHotelAndGroup': 'F',
         'contrast': 0,
         'brand': 776,
         'startDate': startDate,
         'depDate': depDate,
         'IsFlash': 'F',
         'RequestTravelMoney': 'F',
         'hsids': None,
         'IsJustConfirm': None,
         'contyped': 0,
         'priceInfo': -1,
         'equip': None,
         'filter': None,
         'productcode': None,
         'couponList': None,
         'abForHuaZhu': None,
         'defaultLoad': 'T',
         'esfiltertag': None,
         'estagid': None,
         'Currency': None,
         'Exchange': None,
         'minRoomId': 0,
         'maskDiscount': 0,
         'TmFromList': 'F',
         'th': 119,
         'RoomGuestCount': '1,1,0'
     }
     return params
Пример #5
0
    def get_check(self):
        ''' check comment '''
        now_hour = int(time_str(format='%H'))
        now_min = int(time_str(format='%M'))
        now_time = now_hour + now_min / 60
        if now_time > 0.5 and now_time < 8.5:
            return
        if os.path.exists('{}comment.pkl'.format(comment_dir)):
            with codecs.open('{}comment.pkl'.format(comment_dir), 'rb') as f:
                self.comment = pickle.load(f)
        if self.assign_up_mid == -1:
            return
        url = self.MEMBER_SUBMIT_URL % self.assign_up_mid
        json_req = get_request_proxy(url, 1)
        if json_req is None or not 'data' in json_req or not 'vlist' in json_req[
                'data']:
            if can_retry(url):
                self.get_check()
            return
        av_id_list = [[ii['aid'], ii['comment']]
                      for ii in json_req['data']['vlist']]
        if self.basic_av_id not in [ii[0] for ii in av_id_list]:
            if can_retry(url):
                self.get_check()
            return

        threading_list = []
        for (ii, jj) in av_id_list:
            if ii not in self.comment:
                self.comment[ii] = {}
            work = threading.Thread(target=self.comment_check_schedule,
                                    args=(
                                        ii,
                                        jj,
                                    ))
            threading_list.append(work)
        for work in threading_list:
            work.start()
        for work in threading_list:
            work.join()
        with codecs.open('{}comment.pkl'.format(comment_dir), 'wb') as f:
            pickle.dump(self.comment, f)
        return av_id_list
Пример #6
0
    def check_rank_v2(self, av_id: int, times=0):
        rank_list = self.rank_map[av_id] if av_id in self.rank_map else []
        changeHeaders({'Referer': self.BASIC_AV_URL % av_id})

        url = self.ARCHIVE_STAT_URL % av_id
        json_req = get_request_proxy(url, 1)

        if not self.have_error(json_req):
            if times < 3:
                self.check_rank_v2(av_id, times + 1)
            return
        json_req = json_req['data']
        need = [
            'view', 'like', 'coin', 'favorite', 'reply', 'share', 'danmaku'
        ]
        data = [json_req[index] for index in need]
        if len(rank_list):
            data = [time_str(), *data, *rank_list[:2], *rank_list[-2:]]
        else:
            data = [time_str(), *data]
        self.data_v2[av_id] = data
Пример #7
0
 def get_comment_detail(self,
                        comment: dict,
                        av_id: int,
                        pn: int,
                        parent_floor=None):
     ''' get comment detail '''
     ctime = time_str(comment['ctime'])
     wait_list = ['floor', 'member', 'content', 'like']
     wait_list_mem = ['uname', 'sex', 'sign', 'level_info']
     wait_list_content = ['message', 'plat']
     floor, member, content, like = [comment[ii] for ii in wait_list]
     uname, sex, sign, level = [member[ii] for ii in wait_list_mem]
     current_level = level['current_level']
     content, plat = [content[ii] for ii in wait_list_content]
     req_list = [
         floor, ctime, like, plat, current_level, uname, sex, content, sign
     ]
     self.have_bad_comment(req_list, av_id, pn, parent_floor)
     req_list[-1] = req_list[-1].replace(',', ' ').replace('\n', ' ')
     req_list[-2] = req_list[-2].replace(',', ' ').replace('\n', ' ')
     return req_list
Пример #8
0
    def _setup_default_logging(self, default_level=logging.INFO):

        format = "%(asctime)s - %(levelname)s - %(name)s -   %(message)s"
        dataset_name = get_dataset_name(self.config.dataset_path)
        output_dir = os.path.join(dataset_name, f'x{self.L}')
        os.makedirs(output_dir, exist_ok=True)

        writer = SummaryWriter(comment=f'{dataset_name}_{self.L}')

        logger = logging.getLogger('train')
        logger.setLevel(default_level)

        time_stamp = time_str()
        logging.basicConfig(  # unlike the root logger, a custom logger can’t be configured using basicConfig()
            filename=os.path.join(output_dir, f'{time_stamp}_{self.L}_labelled_instances.log'),
            format=format,
            datefmt="%m/%d/%Y %H:%M:%S",
            level=default_level)
        # to avoid double printing when creating new instances of class
        if not logger.handlers:
            console_handler = logging.StreamHandler(sys.stdout)
            console_handler.setLevel(default_level)
            console_handler.setFormatter(logging.Formatter(format))
            logger.addHandler(console_handler)
        #
        logger.info(dict(self.config._get_kwargs()))
        if self.device != 'cpu':
            logger.info(f'Device used: {self.device}_{torch.cuda.get_device_name(self.device)}')
        logger.info(f'Model:  {self.model.module.__class__ if isinstance(self.model, torch.nn.DataParallel) else self.model.__class__}')
        logger.info(f'Num_labels: {self.L}')
        logger.info(f'Image_size: {self.config.im_size}')
        logger.info(f'Cropsize: {self.config.cropsize}')
        logger.info("Total params: {:.2f}M".format(
            sum(p.numel() for p in self.model.parameters()) / 1e6))

        return logger, writer, time_stamp
Пример #9
0
 def log_write(self, url):
     """
     failure log
     """
     with codecs.open("proxy.log", 'a', encoding='utf-8') as f:
         f.write(time_str() + url + '\n')
Пример #10
0
    def __init__(self):

        parser = argparse.ArgumentParser()
        parser.add_argument('-d', '--sys_device_ids', type=eval, default=(0, ))
        parser.add_argument('-r', '--run', type=int, default=1)
        parser.add_argument('--set_seed', type=str2bool, default=False)
        parser.add_argument(
            '--dataset',
            type=str,
            default='market1501',
            choices=['market1501', 'cuhk03', 'duke', 'combined'])
        parser.add_argument('--trainset_part',
                            type=str,
                            default='trainval',
                            choices=['trainval', 'train'])
        parser.add_argument('--model_type',
                            type=str,
                            default='densenet121',
                            choices=[
                                'resnet50', 'densenet121', 'preActResnet50',
                                'resnet50mid'
                            ])
        parser.add_argument('--apply_random_erasing',
                            type=str2bool,
                            default=False)
        parser.add_argument('--resize_h_w', type=eval, default=(256, 128))
        # These several only for training set
        parser.add_argument('--crop_prob', type=float, default=0)
        parser.add_argument('--crop_ratio', type=float, default=1)
        parser.add_argument('--mirror', type=str2bool, default=True)
        parser.add_argument('--ids_per_batch', type=int, default=32)
        parser.add_argument('--ims_per_id', type=int, default=4)

        parser.add_argument('--log_to_file', type=str2bool, default=True)
        parser.add_argument('--steps_per_log', type=int, default=20)
        parser.add_argument('--epochs_per_val', type=int, default=1e10)

        parser.add_argument('--last_conv_stride',
                            type=int,
                            default=1,
                            choices=[1, 2])
        parser.add_argument('--normalize_feature',
                            type=str2bool,
                            default=False)
        parser.add_argument('--margin', type=float, default=0.3)

        parser.add_argument('--only_test', type=str2bool, default=False)
        parser.add_argument('--resume', type=str2bool, default=False)
        parser.add_argument('--exp_dir', type=str, default='')
        parser.add_argument('--model_weight_file', type=str, default='')

        parser.add_argument('--base_lr', type=float, default=2e-4)
        parser.add_argument('--lr_decay_type',
                            type=str,
                            default='exp',
                            choices=['exp', 'staircase'])
        parser.add_argument('--exp_decay_at_epoch', type=int, default=151)
        parser.add_argument('--staircase_decay_at_epochs',
                            type=eval,
                            default=(
                                101,
                                201,
                            ))
        parser.add_argument('--staircase_decay_multiply_factor',
                            type=float,
                            default=0.1)
        parser.add_argument('--total_epochs', type=int, default=300)
        parser.add_argument(
            '--softmax_loss_weight',
            default=0.9,
            type=float,
            help='weight assign to softmax loss between 0 and 1')
        parser.add_argument(
            '--add_softmax_loss',
            default=True,
            type=bool,
            help='loss will be combination of triplet and softmax loss')

        args = parser.parse_args()

        # gpu ids
        self.sys_device_ids = args.sys_device_ids

        # If you want to make your results exactly reproducible, you have
        # to fix a random seed.
        if args.set_seed:
            self.seed = 1
        else:
            self.seed = None

        # The experiments can be run for several times and performances be averaged.
        # `run` starts from `1`, not `0`.
        self.run = args.run

        ###########
        # Dataset #
        ###########

        # If you want to make your results exactly reproducible, you have
        # to also set num of threads to 1 during training.
        if self.seed is not None:
            self.prefetch_threads = 1
        else:
            self.prefetch_threads = 2

        self.dataset = args.dataset
        self.trainset_part = args.trainset_part

        # Image Processing

        # Just for training set
        self.crop_prob = args.crop_prob
        self.crop_ratio = args.crop_ratio
        self.resize_h_w = args.resize_h_w

        # Whether to scale by 1/255
        self.scale_im = True
        self.im_mean = [0.486, 0.459, 0.408]
        self.im_std = [0.229, 0.224, 0.225]

        self.train_mirror_type = 'random' if args.mirror else None

        self.ids_per_batch = args.ids_per_batch
        self.ims_per_id = args.ims_per_id
        self.train_final_batch = False
        self.train_shuffle = True
        self.random_erasing = args.apply_random_erasing

        self.test_batch_size = 32
        self.test_final_batch = True
        self.test_mirror_type = None
        self.test_shuffle = False

        dataset_kwargs = dict(name=self.dataset,
                              resize_h_w=self.resize_h_w,
                              scale=self.scale_im,
                              im_mean=self.im_mean,
                              im_std=self.im_std,
                              batch_dims='NCHW',
                              num_prefetch_threads=self.prefetch_threads)

        prng = np.random
        if self.seed is not None:
            prng = np.random.RandomState(self.seed)
        self.train_set_kwargs = dict(part=self.trainset_part,
                                     ids_per_batch=self.ids_per_batch,
                                     ims_per_id=self.ims_per_id,
                                     final_batch=self.train_final_batch,
                                     shuffle=self.train_shuffle,
                                     crop_prob=self.crop_prob,
                                     crop_ratio=self.crop_ratio,
                                     mirror_type=self.train_mirror_type,
                                     is_random_erasing=self.random_erasing,
                                     prng=prng)
        self.train_set_kwargs.update(dataset_kwargs)

        prng = np.random
        if self.seed is not None:
            prng = np.random.RandomState(self.seed)
        self.val_set_kwargs = dict(part='val',
                                   batch_size=self.test_batch_size,
                                   final_batch=self.test_final_batch,
                                   shuffle=self.test_shuffle,
                                   mirror_type=self.test_mirror_type,
                                   prng=prng)
        self.val_set_kwargs.update(dataset_kwargs)

        ###############
        # ReID Model  #
        ###############

        # The last block of ResNet has stride 2. We can set the stride to 1 so that
        # the spatial resolution before global pooling is doubled.
        self.last_conv_stride = args.last_conv_stride

        # Whether to normalize feature to unit length along the Channel dimension,
        # before computing distance
        self.normalize_feature = args.normalize_feature

        # Margin of triplet loss
        self.margin = args.margin

        #############
        # Training  #
        #############

        self.weight_decay = 0.0005

        # Initial learning rate
        self.base_lr = args.base_lr
        self.lr_decay_type = args.lr_decay_type
        self.exp_decay_at_epoch = args.exp_decay_at_epoch
        self.staircase_decay_at_epochs = args.staircase_decay_at_epochs
        self.staircase_decay_multiply_factor = args.staircase_decay_multiply_factor
        # Number of epochs to train
        self.total_epochs = args.total_epochs

        # How often (in epochs) to test on val set.
        self.epochs_per_val = args.epochs_per_val

        # How often (in batches) to log. If only need to log the average
        # information for each epoch, set this to a large value, e.g. 1e10.
        self.steps_per_log = args.steps_per_log

        # Only test and without training.
        self.only_test = args.only_test

        self.resume = args.resume

        #######
        # Log #
        #######

        # If True,
        # 1) stdout and stderr will be redirected to file,
        # 2) training loss etc will be written to tensorboard,
        # 3) checkpoint will be saved
        self.log_to_file = args.log_to_file

        # The root dir of logs.
        if args.exp_dir == '':
            self.exp_dir = osp.join(
                'exp/train',
                '{}'.format(self.dataset),
                #
                'lcs_{}_'.format(self.last_conv_stride) +
                ('nf_' if self.normalize_feature else 'not_nf_') +
                'margin_{}_'.format(tfs(self.margin)) +
                'lr_{}_'.format(tfs(self.base_lr)) +
                '{}_'.format(self.lr_decay_type) +
                ('decay_at_{}_'.format(self.exp_decay_at_epoch) if self.
                 lr_decay_type == 'exp' else 'decay_at_{}_factor_{}_'.format(
                     '_'.join([str(e)
                               for e in args.staircase_decay_at_epochs]),
                     tfs(self.staircase_decay_multiply_factor))) +
                'total_{}'.format(self.total_epochs),
                #
                'run{}'.format(self.run),
            )
        else:
            self.exp_dir = args.exp_dir

        self.stdout_file = osp.join(self.exp_dir,
                                    'stdout_{}.txt'.format(time_str()))
        self.stderr_file = osp.join(self.exp_dir,
                                    'stderr_{}.txt'.format(time_str()))

        # Saving model weights and optimizer states, for resuming.
        self.ckpt_file = osp.join(self.exp_dir, 'ckpt.pth')
        self.model_type = args.model_type
        # Just for loading a pretrained model; no optimizer states is needed.
        self.model_weight_file = args.model_weight_file

        # usage of softmax
        self.softmax_loss_weight = args.softmax_loss_weight
        self.add_softmax_loss = args.add_softmax_loss
Пример #11
0
    def __init__(self):

        parser = argparse.ArgumentParser()
        parser.add_argument('-d', '--sys_device_ids', type=eval, default=(0, ))
        parser.add_argument('--dataset',
                            type=str,
                            default='market1501',
                            choices=['market1501', 'cuhk03', 'duke'])

        parser.add_argument('--num_queries', type=int, default=16)
        parser.add_argument('--rank_list_size', type=int, default=10)

        parser.add_argument('--resize_h_w', type=eval, default=(256, 128))
        parser.add_argument('--last_conv_stride',
                            type=int,
                            default=1,
                            choices=[1, 2])
        parser.add_argument('--normalize_feature',
                            type=str2bool,
                            default=False)

        parser.add_argument('--log_to_file', type=str2bool, default=True)
        parser.add_argument('--exp_dir', type=str, default='')
        parser.add_argument('--ckpt_file', type=str, default='')
        parser.add_argument('--model_weight_file', type=str, default='')
        parser.add_argument('--opt-level',
                            type=str,
                            default='O0',
                            choices=['O0', 'O1', 'O2', 'O3'])

        parser.add_argument(
            '--net',
            type=str,
            default='mobilenetV2',
            choices=['resnet50', 'shuffelnetV2', 'mobilenetV2'])

        args = parser.parse_args()

        # gpu ids
        self.sys_device_ids = args.sys_device_ids

        self.num_queries = args.num_queries
        self.rank_list_size = args.rank_list_size

        ###########
        # Dataset #
        ###########

        self.dataset = args.dataset
        self.prefetch_threads = 2

        # Image Processing

        self.resize_h_w = args.resize_h_w

        # Whether to scale by 1/255
        self.scale_im = True
        self.im_mean = [0.486, 0.459, 0.408]
        self.im_std = [0.229, 0.224, 0.225]

        self.test_mirror_type = None
        self.test_batch_size = 32
        self.test_final_batch = True
        self.test_shuffle = False

        dataset_kwargs = dict(name=self.dataset,
                              resize_h_w=self.resize_h_w,
                              scale=self.scale_im,
                              im_mean=self.im_mean,
                              im_std=self.im_std,
                              batch_dims='NCHW',
                              num_prefetch_threads=self.prefetch_threads)

        prng = np.random
        self.test_set_kwargs = dict(part='test',
                                    batch_size=self.test_batch_size,
                                    final_batch=self.test_final_batch,
                                    shuffle=self.test_shuffle,
                                    mirror_type=self.test_mirror_type,
                                    prng=prng)
        self.test_set_kwargs.update(dataset_kwargs)

        ###############
        # ReID Model  #
        ###############

        # The last block of ResNet has stride 2. We can set the stride to 1 so that
        # the spatial resolution before global pooling is doubled.
        self.last_conv_stride = args.last_conv_stride
        self.opt_level = args.opt_level
        self.net = args.net

        # Whether to normalize feature to unit length along the Channel dimension,
        # before computing distance
        self.normalize_feature = args.normalize_feature

        #######
        # Log #
        #######

        # If True, stdout and stderr will be redirected to file
        self.log_to_file = args.log_to_file

        # The root dir of logs.
        if args.exp_dir == '':
            self.exp_dir = osp.join(
                'exp/visualize_rank_list',
                '{}'.format(self.dataset),
            )
        else:
            self.exp_dir = args.exp_dir

        self.stdout_file = osp.join(self.exp_dir,
                                    'stdout_{}.txt'.format(time_str()))
        self.stderr_file = osp.join(self.exp_dir,
                                    'stderr_{}.txt'.format(time_str()))

        # Model weights and optimizer states, for resuming.
        self.ckpt_file = args.ckpt_file
        # Just for loading a pretrained model; no optimizer states is needed.
        self.model_weight_file = args.model_weight_file
Пример #12
0
        # update weights; a gradient descent step
        optimizer.step()

        ############
        # step log #
        ############
        # log loss for this batch
        loss_meter.update(to_scalar(loss))
        # write to tensorboard; used for visualization
        writer.add_scalar('train/total_loss_iter', to_scalar(loss),
                          step + 1 + dataset_L * epoch)
        # print the log for every "steps_per_log" batches or the final batch
        if (step + 1) % cfg.steps_per_log == 0 or (step +
                                                   1) % len(train_loader) == 0:
            log = '{}, Step {}/{} in Ep {}, {:.2f}s, loss:{:.4f}'.format( \
            time_str(), step+1, dataset_L, epoch+1, time.time()-step_st, loss_meter.val)
            print(log)

    # update the learning rate
    scheduler.step()
    ##############
    # epoch log  #
    ##############
    # add tensorboard log for this epoch
    writer.add_scalar('train/total_avgloss_epoch', loss_meter.avg, epoch + 1)
    # print the log for this epoch
    log = 'Ep{}, {:.2f}s, loss {:.4f}'.format(epoch + 1,
                                              time.time() - ep_st,
                                              loss_meter.avg)
    print(log)
Пример #13
0
    def __init__(self):

        parser = argparse.ArgumentParser()
        # Seed for generating random numbers
        parser.add_argument('--set_seed', type=str2bool, default=False)
        # Specify the device, gpu/cpu; -1 for cpu ; >0 for gpu
        parser.add_argument('-d', '--sys_device_ids', type=eval, default=())
        ## dataset parameter
        # the data file that has not been spliited into train, val, test set
        parser.add_argument('--raw_data', type=str, default='')
        # the path the the folder that saves the data file
        parser.add_argument('--data_root', type=str, default='')
        # the dataset name
        parser.add_argument('--dataset',
                            type=str,
                            default='Beyer1986v1',
                            choices=[
                                'Beyer1986v1', 'Beyer1986v2', 'Beyer1986v3',
                                'data_FF', 'values_FAB', 'data_tot'
                            ])
        # batchsize for training the data
        parser.add_argument('--batch_size', type=int, default=4)
        # Number of workers to fetch the data parallely
        parser.add_argument('--workers', type=int, default=2)
        # How many data will be used for training set
        parser.add_argument('--portion_train', type=float, default=0.7)
        # How many data will be used for validation set
        parser.add_argument('--portion_val', type=float, default=0.15)
        ## model
        # the dim of inputs
        parser.add_argument('--num_input', type=int, default=3)
        # the dim of output
        parser.add_argument('--num_output', type=int, default=2)
        # how many hidden layers does the model have
        parser.add_argument('--num_h', type=int, default=1)
        # wight decay for stochastic gradient descent algorithm
        parser.add_argument('--sgd_weight_decay', type=float, default=0.0005)
        # momentum for for stochastic gradient descent algorithm
        parser.add_argument('--sgd_momentum', type=float, default=0.9)
        # learning rate
        parser.add_argument('--lr', type=float, default=0.001)
        # the number of neurons per hidden layer
        parser.add_argument('--neurons_per_hlayer',
                            type=int,
                            nargs='+',
                            default=[3, 8])
        # decay the learning rate for every "--epochs_per_decay" epochs
        parser.add_argument('--epochs_per_decay',
                            type=int,
                            nargs='+',
                            default=[10, 15])
        parser.add_argument('--staircase_decay_multiple_factor',
                            type=float,
                            default=0.1)
        # total number of epochs to train
        parser.add_argument('--total_epochs', type=int, default=10)
        ## utils
        # the name for the saved model
        parser.add_argument('--ckpt_file', type=str, default='')
        # whether load the model weight from an existing ckpt file
        parser.add_argument('--load_model_weight',
                            type=str2bool,
                            default=False)
        # only test the model without training; you need to provide model path
        parser.add_argument('--test_only', type=str2bool, default=False)
        # the folder to store the experiment logs, model weights, etc
        parser.add_argument('--exp_dir', type=str, default='')
        # whether save the generated logs during training to a file
        parser.add_argument('--log_to_file', type=str2bool, default=True)
        #
        parser.add_argument('--steps_per_log', type=int, default=2)
        # validation the current model for every '--epochs_per_val' epochs
        parser.add_argument('--epochs_per_val', type=int, default=10)
        # save the model to disk for every '--epochs_per_val' epochs
        parser.add_argument('--epochs_per_save', type=int, default=50)
        # the index for the expeiments. integer required
        parser.add_argument('--run', type=int, default=1)
        # parse the arguments
        args = parser.parse_args()

        # add those arguemnt as attributes of the class
        self.sys_device_ids = args.sys_device_ids
        # random
        self.set_seed = args.set_seed
        if self.set_seed:
            self.rand_seed = 0
        else:
            self.rand_seed = None
        # run time index
        self.run = args.run
        # Dataset #
        self.data_root = args.data_root
        self.dataset_name = args.dataset
        self.raw_data = args.raw_data
        self.mean = [0.485, 0.456, 0.406]
        self.std = [0.229, 0.224, 0.225]
        self.batch_size = args.batch_size
        self.workers = args.workers
        self.input_size = args.num_input
        self.output_size = args.num_output
        self.portion_train = args.portion_train
        self.portion_val = args.portion_val
        # optimization
        self.sgd_momentum = args.sgd_momentum
        self.sgd_weight_decay = args.sgd_weight_decay
        self.lr = args.lr
        self.epochs_per_decay = args.epochs_per_decay
        self.staircase_decay_multiple_factor = args.staircase_decay_multiple_factor
        self.total_epochs = args.total_epochs

        # utils
        self.ckpt_file = args.ckpt_file
        self.load_model_weight = args.load_model_weight
        if self.load_model_weight:
            if self.ckpt_file == '':
                print(
                    'Please input the ckpt_file if you want to resume training'
                )
                raise ValueError
        self.test_only = args.test_only
        self.exp_dir = args.exp_dir
        self.log_to_file = args.log_to_file
        self.steps_per_log = args.steps_per_log
        self.epochs_per_val = args.epochs_per_val
        self.epochs_per_save = args.epochs_per_save
        self.run = args.run

        # for model
        model_kwargs = dict()
        model_kwargs['input_size'] = self.input_size
        model_kwargs['output_size'] = self.output_size
        model_kwargs['num_h'] = args.num_h
        model_kwargs['hidden_size'] = args.neurons_per_hlayer
        self.model_kwargs = model_kwargs
        # for evaluation

        # create folder for experiments
        if self.exp_dir == '':
            self.exp_dir = os.path.join('exp', '{}'.format(self.dataset_name),
                                        'run{}'.format(self.run))
        # the txt file the store the logs printed during training
        self.stdout_file = os.path.join(self.exp_dir, \
            'log', 'stdout_{}.txt'.format(time_str()))
        self.stderr_file = os.path.join(self.exp_dir, \
            'log', 'stderr_{}.txt'.format(time_str()))
        may_mkdir(self.stdout_file)
Пример #14
0
 def get_data(self, origin_data: list) -> str:
     ''' get data '''
     return ','.join(str(ii) for ii in [time_str(), *origin_data]) + '\n'