def get_tweet(self): Logger().warning(__name__ + ':TwGreb:get_tweet') params = ( ('include_profile_interstitial_type', '1'), ('include_blocking', '1'), ('include_blocked_by', '1'), ('include_followed_by', '1'), ('include_want_retweets', '1'), ('include_mute_edge', '1'), ('include_can_dm', '1'), ('include_can_media_tag', '1'), ('skip_status', '1'), ('cards_platform', 'Web-12'), ('include_cards', '1'), ('include_ext_alt_text', 'true'), ('include_quote_count', 'true'), ('include_reply_count', '1'), ('tweet_mode', 'extended'), ('include_entities', 'true'), ('include_user_entities', 'true'), ('include_ext_media_color', 'true'), ('include_ext_media_availability', 'true'), ('send_error_codes', 'true'), ('simple_quoted_tweet', 'true'), ('include_tweet_replies', 'true'), ('count', f'{self.tweet_count}'), # 数量 ('userId', self.config.User_id), ('ext', 'mediaStats,highlightedLabel'), ) try: response = loads( requests.get(f'https://api.twitter.com/2/timeline/profile/{self.config.User_id}.json', headers=self.header, params=params, proxies=self.proxy).text) except Exception as e: raise RequestException(f'Request url failed:{e}') else: Logger().info( '共抓取用户{}-{}条tweet'.format(self.username, len(response['timeline']['instructions'][0]['addEntries']['entries']))) for entrryid in response['timeline']['instructions'][0]['addEntries']['entries']: try: entrryid = str(entrryid['sortIndex']) tweet_single = response['globalObjects']['tweets'][entrryid] yield _Tweet(tw=tweet_single, username=self.username) # TODO : future implementation except: pass
def Tweet(config): Logger().warning(__name__ + ':Tweet') config.Following = False config.Followers = False config.Profile = False config.Topic_Profile = False config.Tweet = True return TwGreb(config).run()
def process_request(request): """ :param request: :return: """ # print "start", time.time() if "login" in request.path: return # request.COOKIES["sid"] = "9342c00a6cb65a2d35e2bd48cc2ab163" sid = request.COOKIES.get("sid") content = cache.get(sid) if content: chinese_name = content.get("chinese_name") Logger.debug("{0}: request, url is: {1}".format( username, request.path.encode("utf-8"))) request.COOKIES["chinese_name"] = chinese_name else: return my_response(code=-1, msg="登录超时!")
def following(self, cursor=None): ''' 获取正在关注(需要登录cookie)强制等待1s避免封号 :return: ''' time.sleep(1) Logger().warning(__name__ + ':TwGreb:following') if cursor: params = ( ('variables', '{"userId":"%s","count":20,"cursor":"%s","withHighlightedLabel":false,"withTweetQuoteCount":false,"includePromotedContent":false,"withTweetResult":false,"withUserResult":false}' % ( self.config.User_id, cursor)), ) else: params = ( ('variables', '{"userId":"%s","count":20,"withHighlightedLabel":false,"withTweetQuoteCount":false,"includePromotedContent":false,"withTweetResult":false,"withUserResult":false}' % ( self.config.User_id)), ) headers = self.header headers[ 'cookie'] = self.config.Cookie try: headers['x-csrf-token'] = str(re.findall(r'ct0=(.*?);', headers['cookie'])[0]).strip() except: headers['x-csrf-token'] = headers['cookie'].split('ct0=')[1].strip() if headers['x-csrf-token'] == '': raise TwGrebtException('Please enter the correct cookie') try: response = requests.get('https://twitter.com/i/api/graphql/kr_QEk14pqAKwSD8DIWycA/Following', proxies=self.proxy, headers=headers, params=params) except Exception as e: raise RequestException(f'Request url failed:{e}') else: if 'errors' in response.text: msg = f'Error response:{response.text}' raise TwGrebtException(msg) else: users_msg = list() if len(response.json()['data']['user']['following_timeline']['timeline']['instructions']) > 1: for usermsg in response.json()['data']['user']['following_timeline']['timeline']['instructions'][2][ 'entries'][:20]: users_msg.append(User(usermsg, user_tpye='followers')) cursors = \ response.json()['data']['user']['following_timeline']['timeline']['instructions'][2]['entries'][ 20]['content']['value'] else: for usermsg in response.json()['data']['user']['following_timeline']['timeline']['instructions'][0][ 'entries'][:20]: users_msg.append(User(usermsg, user_tpye='followers')) cursors = \ response.json()['data']['user']['following_timeline']['timeline']['instructions'][0]['entries'][ 20]['content']['value'] return users_msg, cursors
def get_following(self): Logger().warning(__name__ + ':TwGreb:get_following') cursor = None follower_msg = list() if self.followers_count: for _ in range(int(self.followers_count / 20) + 1): users_msg, cursors = self.following(cursor) follower_msg.extend(users_msg) cursor = cursors if self.followers_count < len(follower_msg): return follower_msg else: return follower_msg[:self.followers_count]
def __init__(self, config): Logger().warning(__name__ + ':TwGreb:__init__') self.config = config self.token = guest_token.Token(self.config) self.token.refresh() self.header = { 'authorization': bearer, 'x-guest-token': self.config.Guest_token } self.proxy = { 'http': self.config.Proxy, 'https': self.config.Proxy } self.username = self.config.Username self.tweet_count = self.config.Tweet_count self.cookie = self.config.Cookie self.followers_count = self.config.Followers_count
def get_topic_user(self): ''' 获取某话题下的N个user信息(N代表显示tweet数量,对应返回的用户数通常不等于N) :param search: :return:User类 ''' Logger().warning(__name__ + ':TwGreb:get_topic_user') params = ( ('include_profile_interstitial_type', '1'), ('include_blocking', '1'), ('include_blocked_by', '1'), ('include_followed_by', '1'), ('include_want_retweets', '1'), ('include_mute_edge', '1'), ('include_can_dm', '1'), ('include_can_media_tag', '1'), ('skip_status', '1'), ('cards_platform', 'Web-12'), ('include_cards', '1'), ('include_ext_alt_text', 'true'), ('include_quote_count', 'true'), ('include_reply_count', '1'), ('tweet_mode', 'extended'), ('include_entities', 'true'), ('include_user_entities', 'true'), ('include_ext_media_color', 'true'), ('include_ext_media_availability', 'true'), ('send_error_codes', 'true'), ('simple_quoted_tweet', 'true'), ('q', self.config.Topic_search), ('count', f'{self.config.Topic_count}'), ('query_source', 'typed_query'), ('pc', '1'), ('spelling_corrections', '1'), ('ext', 'mediaStats,highlightedLabel'), ) try: response = loads( requests.get('https://twitter.com/i/api/2/search/adaptive.json', headers=self.header, params=params, proxies=self.proxy).text) except Exception as e: raise RequestException(f'Request url failed:{e}') else: for user_data in response['globalObjects']['users'].values(): yield User(ur=user_data, user_tpye='topic')
def get_user_id(self): ''' 获取单个用户信息 :return: 返回User类 ''' Logger().warning(__name__ + ':TwGreb:get_user_id') params = {'screen_name': self.username, 'withHighlightedLabel': False} _url = 'https://api.twitter.com/graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName?variables={}' \ .format(self.dict_to_url(params)) try: response = requests.get(_url, headers=self.header, proxies=self.proxy) except Exception as e: raise RequestException(f'Request url failed:{e}') else: if 'errors' in response.text: msg = f'Error response:{response.text}' raise TwGrebtException(msg) return [User(loads(response.text))]
def run(config): model = get_model(config).to(device) # model_params = [{'params': model.encoder.parameters(), 'lr': config.OPTIMIZER.ENCODER_LR}, # {'params': model.decoder.parameters(), 'lr': config.OPTIMIZER.DECODER_LR}] optimizer = get_optimizer(config, model.parameters()) # optimizer = get_optimizer(config, model_params) checkpoint = utils.checkpoint.get_initial_checkpoint(config) if checkpoint is not None: last_epoch, score, loss = utils.checkpoint.load_checkpoint(config, model, checkpoint) else: print('[*] no checkpoint found') last_epoch, score, loss = -1, -1, float('inf') print('last epoch:{} score:{:.4f} loss:{:.4f}'.format(last_epoch, score, loss)) optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR # optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.ENCODER_LR # optimizer.param_groups[1]['initial_lr'] = config.OPTIMIZER.DECODER_LR scheduler = get_scheduler(config, optimizer, last_epoch) if config.SCHEDULER.NAME == 'multi_step': milestones = scheduler.state_dict()['milestones'] step_count = len([i for i in milestones if i < last_epoch]) optimizer.param_groups[0]['lr'] *= scheduler.state_dict()['gamma'] ** step_count # optimizer.param_groups[0]['lr'] *= scheduler.state_dict()['gamma'] ** step_count # optimizer.param_groups[1]['lr'] *= scheduler.state_dict()['gamma'] ** step_count if last_epoch != -1: scheduler.step() log_train = Logger() log_val = Logger() log_train.open(os.path.join(config.TRAIN_DIR, 'log_train.txt'), mode='a') log_val.open(os.path.join(config.TRAIN_DIR, 'log_val.txt'), mode='a') train_loader = get_dataloader(config, 'train', transform=Albu(config.ALBU)) val_loader = get_dataloader(config, 'val') train(config, model, train_loader, val_loader, optimizer, scheduler, log_train, log_val, last_epoch+1, score, loss)
def run(config): model = get_model(config).to(device) optimizer = get_optimizer(config, model.parameters()) checkpoint = utils.checkpoint.get_initial_checkpoint(config) if checkpoint is not None: last_epoch, score, loss = utils.checkpoint.load_checkpoint( config, model, checkpoint) else: print('[*] no checkpoint found') last_epoch, score, loss = -1, -1, float('inf') print('last epoch:{} score:{:.4f} loss:{:.4f}'.format( last_epoch, score, loss)) optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR scheduler = get_scheduler(config, optimizer, last_epoch) if config.SCHEDULER.NAME == 'multi_step': milestones = scheduler.state_dict()['milestones'] step_count = len([i for i in milestones if i < last_epoch]) optimizer.param_groups[0]['lr'] *= scheduler.state_dict( )['gamma']**step_count if last_epoch != -1: scheduler.step() # writer = SummaryWriter(os.path.join(config.TRAIN_DIR, 'logs')) log_train = Logger() log_val = Logger() log_train.open(os.path.join(config.TRAIN_DIR, 'log_train.txt'), mode='a') log_val.open(os.path.join(config.TRAIN_DIR, 'log_val.txt'), mode='a') augmentation = Albu_Seg() if config.TASK == 'seg' else Albu_Cls() train_loader = get_dataloader(config, 'train', transform=augmentation) val_loader = get_dataloader(config, 'val') train(config, model, train_loader, val_loader, optimizer, scheduler, log_train, log_val, last_epoch + 1, score, loss)
def main(): device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model = efficientnet(True, image_size=image_size, num_classes=len(labels)) # 断点续训 # load_params(model, "XXX.pth") # freeze_layers(model, ["conv_first", "layer1"]) pg0, pg1, pg2 = [], [], [] # bn_weight, weight, bias for k, v in model.named_modules(): # bias if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): pg2.append(v.bias) # biases. no decay # weight if isinstance(v, nn.BatchNorm2d): pg0.append(v.weight) # no decay elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): pg1.append(v.weight) # apply decay optim = torch.optim.SGD(pg0, 0, 0.9) # bn_weight optim.add_param_group({'params': pg1, 'weight_decay': 1e-4}) # add pg1 with weight_decay optim.add_param_group({'params': pg2}) # add pg2 (biases) del pg0, pg1, pg2 train_dataset = get_dataset_from_pickle(os.path.join(dataset_dir, pkl_folder, train_pickle_fname), train_transform) val_dataset = get_dataset_from_pickle(os.path.join(dataset_dir, pkl_folder, val_pickle_fname), test_transform) train_loader = DataLoader(train_dataset, batch_size, True, pin_memory=True, num_workers=num_workers) val_loader = DataLoader(val_dataset, batch_size, False, pin_memory=True, num_workers=num_workers) acc_counter = AccCounter(labels) saver = Saver(model) save_dir = saver.save_dir print("配置: %s" % comment, flush=True) with open(os.path.join(save_dir, "config.txt"), "w") as f: for k, v in comment.items(): f.write("%s: %s\n" % (k, v)) writer = SummaryWriter(logdir=save_dir) logger = Logger(50, writer) checker = Checker({"Test": Tester(model, val_loader, device, acc_counter, "all")}, saver, 1, 0, logger) lr_scheduler = LRScheduler(optim, lr_func) trainer = Trainer(model, train_loader, loss_fn, optim, device, lr_scheduler, logger, checker) trainer.train(epochs) writer.close()
def run(self): if self.config.Username is not None and self.config.User_id is None: Logger().warning(__name__ + ':TwGreb:main:username') self.config.User_id = self.get_user_id()[0].id if self.config.User_id is None: raise ValueError("Cannot find twitter account with name = " + self.config.Username) # todo if self.config.Following: Logger().warning(__name__ + ':TwGreb:main:follow') return self.get_following() elif self.config.Profile: Logger().warning(__name__ + ':TwGreb:main:profile') return self.get_user_id() elif self.config.Topic_Profile: Logger().warning(__name__ + ':TwGreb:main:profile') return self.get_topic_user() elif self.config.Followers: return Logger().warning(__name__ + ':TwGreb:main:Followers Not online') elif self.config.Tweet: Logger().warning(__name__ + ':TwGreb:main:Tweet') return self.get_tweet() else: Logger().warning(__name__ + ':TwGreb:main:no-more-tweets')
def run(config): ## TODO change to get model sm.set_framework('tf.keras') ## segmentation_model 2.0 support feature.. backbone = 'mobilenetv2' model = sm.Unet( backbone, input_shape=(256, 256, 3), encoder_weights=None, activation='sigmoid' ) #activation='identity')#, decoder_attention_type='scse') # 'imagenet') model.summary() ## TODO optimizer change # optimizer = tf.keras.optimizers.Adam(learning_rate_schedule)#learning_rate=config.OPTIMIZER.LR) #get_optimizer(config, model.parameters()) optimizer = tf.keras.optimizers.Adam( learning_rate=config.OPTIMIZER.LR ) #config.OPTIMIZER.LR) #get_optimizer(config, model.parameters()) ##loss ## criterion = FocalLoss() #DiceLoss()#tf.keras.losses.BinaryCrossentropy() checkpoint = None # checkpoint = utils.checkpoint.get_initial_checkpoint(config) if checkpoint is not None: last_epoch, score, loss = utils.checkpoint.load_checkpoint( config, model, checkpoint) # utils.checkpoint.load_checkpoint_legacy(config, model, checkpoint) else: print('[*] no checkpoint found') last_epoch, score, loss = -1, -1, float('inf') print('last epoch:{} score:{:.4f} loss:{:.4f}'.format( last_epoch, score, loss)) # optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR writer = SummaryWriter( os.path.join(config.TRAIN_DIR + config.RECIPE, 'logs')) log_train = Logger() log_val = Logger() log_train.open(os.path.join(config.TRAIN_DIR + config.RECIPE, 'log_train.txt'), mode='a') log_val.open(os.path.join(config.TRAIN_DIR + config.RECIPE, 'log_val.txt'), mode='a') train_loader = BatchGenerator(config, 'train', config.TRAIN.BATCH_SIZE, None) # train_dataset = Dataset(config, 'train', None) # train_loader = train_dataset.DataGenerator(config.DATA_DIR, batch_size=config.TRAIN.BATCH_SIZE, shuffle = True) train_datasize = len(train_loader) #train_dataset.get_length() # val_dataset = Dataset(config, 'val', None) # val_loader = val_dataset.DataGenerator(config.DATA_DIR, batch_size=config.TRAIN.BATCH_SIZE, shuffle=False) val_loader = BatchGenerator(config, 'val', config.EVAL.BATCH_SIZE, None) val_datasize = len(val_loader) #val_dataset.get_length() ### TODO: add transform train(config, model, train_loader, val_loader, optimizer, log_train, log_val, last_epoch + 1, score, loss, writer, (train_datasize, val_datasize), criterion) model.save_weights("model.h5")
def followers(self): Logger().warning(__name__ + ':TwGreb:followers') # TODO : future implementation pass