def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) REQ.get('https://facebook.com/') form = REQ.form(action='/login/') if form: data = { 'email': conf.FACEBOOK_USERNAME, 'pass': conf.FACEBOOK_PASSWORD, } REQ.submit_form(data=data, form=form) form = REQ.form(action='/login/') if form and 'validate-password' in form['url']: REQ.submit_form(data=data, form=form)
def _get(url, lock=Lock()): attempt = 0 while True: attempt += 1 try: page = REQ.get(url) if 'id="id_login"' in page and 'id="id_password"' in page: with lock: if not Statistic.LOGGED_IN: page = REQ.get(Statistic.LOGIN_URL_) page = REQ.submit_form( { 'login': conf.HACKEREARTH_USERNAME, 'password': conf.HACKEREARTH_PASSWORD, 'signin': 'Log In', }, limit=0, ) Statistic.LOGGED_IN = True if 'AJAX' in url: headers = {'x-requested-with': 'XMLHttpRequest'} csrftoken = REQ.get_cookie('csrftoken') if csrftoken: headers['x-csrftoken'] = csrftoken else: headers = {} return REQ.get(url, headers=headers) except FailOnGetResponse as e: if attempt == 15 or getattr(e.args[0], 'code', None) != 500: raise ExceptionParseStandings(e.args[0]) sleep(2 * attempt)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) page = REQ.get('https://auth.geeksforgeeks.org/') form = REQ.form(page=page, action=None, fid='Login') if form: REQ.get('https://auth.geeksforgeeks.org/setLoginToken.php') page = REQ.submit_form( url='https://auth.geeksforgeeks.org/auth.php', data={ 'user': conf.GEEKSFORGEEKS_USERNAME, 'pass': conf.GEEKSFORGEEKS_PASSWORD, }, form=form, )
def get_standings(self, users=None, statistics=None): if not self.standings_url: self.standings_url = f'https://projecteuler.net/fastest={self.key}' user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' # noqa page = REQ.get(self.standings_url, headers={'User-Agent': user_agent}) sign_out = re.search('<form[^>]*action="sign_out"[^>]*>', page) if not sign_out: for attempt in range(20): while True: value = f'{random.random():.16f}' image_bytes = REQ.get(f'https://projecteuler.net/captcha/show_captcha.php?{value}') image_stream = io.BytesIO(image_bytes) image_rgb = Image.open(image_stream) text = pytesseract.image_to_string(image_rgb, config='--oem 0 --psm 13 digits') text = text.strip() if re.match('^[0-9]{5}$', text): break REQ.get('https://projecteuler.net/sign_in') page = REQ.submit_form( name='sign_in_form', action=None, data={ 'username': conf.PROJECTEULER_USERNAME, 'password': conf.PROJECTEULER_PASSWORD, 'captcha': text, 'remember_me': '1', }, ) match = re.search('<p[^>]*class="warning"[^>]*>(?P<message>[^<]*)</p>', page) if match: REQ.print(match.group('message')) else: break else: raise ExceptionParseStandings('Did not recognize captcha for sign in') page = REQ.get(self.standings_url) result = {} problem_name = self.name.split('.', 1)[1].strip() problems_info = [{'name': problem_name, 'url': self.url}] regex = '<table[^>]*>.*?</table>' html_table = re.search(regex, page, re.DOTALL) if html_table: table = parsed_table.ParsedTable(html_table.group(0)) for r in table: row = OrderedDict() row['solving'] = 1 for k, v in r.items(): if isinstance(v, list): place, country = v row['place'] = re.match('[0-9]+', place.value).group(0) country = first(country.column.node.xpath('.//@title')) if country: row['country'] = country elif k == 'Time To Solve': params = {} for x in v.value.split(', '): value, field = x.split() if field[-1] != 's': field += 's' params[field] = int(value) rel_delta = relativedelta(**params) now = timezone.now() delta = now - (now - rel_delta) row['penalty'] = f'{delta.total_seconds() / 60:.2f}' elif k == 'User': member = first(v.column.node.xpath('.//@title')) or v.value row['member'] = member else: row[k.lower()] = v.value problems = row.setdefault('problems', {}) problem = problems.setdefault(problem_name, {}) problem['result'] = '+' problem['binary'] = True row['_skip_for_problem_stat'] = True if 'member' not in row: continue result[row['member']] = row standings = { 'result': result, 'url': self.standings_url, 'problems': problems_info, } if len(result) < 100: delta = timezone.now() - self.start_time if delta < timedelta(days=1): standings['timing_statistic_delta'] = timedelta(minutes=60) elif delta < timedelta(days=30): standings['timing_statistic_delta'] = timedelta(days=1) return standings
def get_users_infos(users, resource, accounts, pbar=None): page = REQ.get(urljoin(resource.profile_url, Statistic.SETTINGS_URL_)) form = REQ.form(action=r'login.php\?action=login') if form: data = { 'username': conf.BESTCODER_AUTHORID, 'password': conf.BESTCODER_PASSWORD, 'remember': 'on', } page = REQ.submit_form(data=data, form=form) match = re.search('<select[^>]*id="country"[^>]*>.*?</select>', page, re.DOTALL) countries = dict( re.findall('<option[^>]*value="([0-9]+)"[^>]*>([^<]*)</option>', match.group(0))) @RateLimiter(max_calls=5, period=1) def fetch_user(user): url = resource.profile_url.format(account=user) page = REQ.get(url) info = {} matches = re.findall( r'<span[^>]*>([A-Z]+)</span>\s*<span[^>]*>([0-9]+)</span>', page) for k, v in matches: info[k.lower()] = int(v) match = re.search( '<img[^>]*src="[^"]*country[^"]*([0-9]+)[^"]*"[^>]*alt="country"[^>]*>', page) if match: info['country'] = countries.get(match.group(1)) match = re.search( '<img[^>]*class="img-circle"[^>]*src="([^"]*getAvatar.php[^"]*)"[^>]*>', page) if match: info['avatar_url'] = urljoin(url, match.group(1)) page = REQ.get(Statistic.USER_RATING_API_URL_.format(user)) data = json.loads(page) ratings = {} old_rating = None for stat in data: rating = ratings.setdefault(stat['contestid'], collections.OrderedDict()) new_rating = int(stat['rating']) if old_rating is not None: rating['old_rating'] = old_rating rating['rating_change'] = new_rating - old_rating rating['new_rating'] = new_rating old_rating = new_rating info['rating'] = new_rating if not ratings: info.pop('rating', None) return user, info, ratings with PoolExecutor(max_workers=8) as executor: for user, info, ratings in executor.map(fetch_user, users): if pbar: pbar.update() if not info: if info is None: yield {'info': None} else: yield {'skip': True} continue info = { 'info': info, 'contest_addition_update_params': { 'update': ratings, 'by': 'key', }, } yield info