def handle(self): self.header = self.request.recv(1024).strip() #print "{} wrote:".format(self.client_address[0]) user_agent = self.header[self.header.find('User-Agent:'): ] user_agent = user_agent[ user_agent.find(' ') + 1 : user_agent.find('\r')] user_agent = user_agent_parser.Parse(user_agent) if mode == 0: learn(self.client_address[0], user_agent['os']['family']) else: protect(self.client_address[0], user_agent['os']['family'])
def _render_error_page(status_code): tx_id = get_tx_id() user_agent = user_agent_parser.Parse(request.headers.get('User-Agent', '')) return render_theme_template( 'default', 'errors/error.html', status_code=status_code, analytics_ua_id=current_app.config['EQ_UA_ID'], account_service_url=cookie_session.get('account_service_url'), ua=user_agent, tx_id=tx_id), status_code
def is_safari(request): try: from ua_parser import user_agent_parser except ImportError: # pragma: no cover return None else: user_agent = request.META.get("HTTP_USER_AGENT", "") parsed_ua = user_agent_parser.Parse(user_agent) return ( parsed_ua["user_agent"]["family"] == "Safari" or parsed_ua["os"]["family"] == "iOS" )
def new_refresh_token(uci): """Authorisation point - used to get a refresh token""" try: client_ip = request.environ.get("REMOTE_ADDR") host = request.environ.get("HTTP_HOST") devices = get_devices(uci, True) try: device_id = next( device["id"] for device in devices if "ipAddress" in device and device["ipAddress"] == client_ip) except StopIteration: device_id = None ua_dict = user_agent_parser.Parse( request.environ.get("HTTP_USER_AGENT")) created_token = { ".type": "refresh_token", "id": uuid4().hex, "user": str(request.get_user()["id"]), "token": uuid4().hex, "ua_brand": ua_dict["device"]["brand"] or "Other", "ua_family": ua_dict["device"]["family"] or "Other", "ua_model": ua_dict["device"]["model"] or "Other", "ua_os": ua_dict["os"]["family"] or "Other", "ua_agent": ua_dict["user_agent"]["family"] or "Other", "device_id": device_id if device_id else "None" } uci.add_config(REST_API_PKG, created_token) uci.persist(REST_API_PKG) # next line - see https://github.com/bottlepy/bottle/pull/983 until 0.13 release Morsel._reserved["same-site"] = "SameSite" # pylint: disable=protected-access response.set_cookie(f"refresh_{AUTH_APP.ib_identifier}", created_token["token"], path="/api/auth/access_token", domain=host, httponly=True, same_site="strict") return { "id": created_token["id"], "user": created_token["user"], "userAgent": { "device": { "brand": created_token["ua_brand"], "family": created_token["ua_family"], "model": created_token["ua_model"] }, "os": created_token["ua_os"], "agent": created_token["ua_agent"] }, "deviceId": device_id } except (UciException, TypeError): response.status = 400 return "Error creating a refresh token"
def get_contents(self, path, version): s3 = self.get_s3() prefix = self.get_prefix() file = prefix + path + '/' + version obj = None file_obj = None should_translate = False try: file_obj = self.get_file(s3, file) except: pass if file_obj is None: file = prefix + path + ".j2/" + version try: file_obj = self.get_file(s3, file, max_size=None) obj = file_obj.get('file', None) if file_obj.get('match', False): return '', 304 else: contents = obj['Body'].read( obj['ContentLength']).decode('UTF-8') user_agent_string = request.headers.get('User-Agent') parsed_user_agent = user_agent_parser.Parse( user_agent_string) j2_dict = {} j2_dict['userAgent'] = parsed_user_agent jinja_env = Environment(loader=BaseLoader) template = jinja_env.from_string(contents) parsed_contents = template.render(**j2_dict) r = make_response(parsed_contents) r.headers["Content-Type"] = obj['ContentType'] r.headers['ETag'] = obj['ETag'] return r except Exception as e: return None elif file_obj.get('match', False): return '', 304 elif file_obj.get('url', None) is not None: return redirect(file_obj.get('url'), 302) else: obj = file_obj.get('file') contents = obj['Body'].read(obj['ContentLength']) r = make_response(contents) r.headers["Content-Type"] = obj['ContentType'] r.headers['ETag'] = obj['ETag'] return r
def get_segments(uid, ua): parsed_ua = user_agent_parser.Parse(ua) result = [] if parsed_ua['device']['family'] == 'iPhone': result.append(('seg_iphone', uid)) if parsed_ua['user_agent']['family'] == 'Firefox': result.append(('seg_firefox', uid)) if parsed_ua['os']['family'] == 'Windows': result.append(('seg_windows', uid)) return result
def parse_ua(line): """ Extracts from user agent info about divice, os, browser. :param line: city_id and user_agent, separated by \t :return: city_id, device, os, browser separated by \t """ city_id, ua = line.split('\t') parsed = user_agent_parser.Parse(ua) ua_os = parsed['os']['family'] ua_browser = parsed['user_agent']['family'] ua_device = parsed['device']['family'] print('{}\t{}\t{}\t{}'.format(city_id, ua_device, ua_os, ua_browser))
def featurize(fingerprint, target): fingerprint = copy(fingerprint) target = copy(target) parsed = user_agent_parser.Parse(fingerprint["ua"]) fingerprint["os"] = parsed["os"] fingerprint["browser"] = parsed["user_agent"] parsed = user_agent_parser.Parse(target["ua"]) target["os"] = parsed["os"] target["browser"] = parsed["user_agent"] matches = {} matches["uid"] = fingerprint["uid"] == target["uid"] matches["os_family"] = fingerprint["os"]["family"] == target["os"][ "family"] matches["os_major"] = (fingerprint["os"]["major"] == target["os"]["major"]) \ and matches["os_family"] matches["os_minor"] = (fingerprint["os"]["minor"] == target["os"]["minor"]) \ and matches["os_major"] matches["os_exact"] = (fingerprint["os"]["patch"] == target["os"]["patch"]) \ and matches["os_minor"] matches["browser_family"] = fingerprint["browser"]["family"] == \ target["browser"]["family"] matches["browser_major"] = (fingerprint["browser"]["major"] == target["browser"]["major"]) \ and matches["browser_family"] matches["browser_minor"] = (fingerprint["browser"]["minor"] == target["browser"]["minor"]) \ and matches["browser_major"] matches["browser_exact"] = (fingerprint["browser"]["patch"] == target["browser"]["patch"]) \ and matches["browser_minor"] parse_ip = lambda ip, n: ".".join(ip.split(".")[:n]) matches["ip_exact"] = fingerprint["ip"] == target["ip"] matches["ip_3"] = parse_ip(fingerprint["ip"], 3) == parse_ip(target["ip"], 3) matches["ip_2"] = parse_ip(fingerprint["ip"], 2) == parse_ip(target["ip"], 2) matches["screen"] = (fingerprint["swidth"] == target["swidth"]) and \ (fingerprint["sheight"] == target["sheight"]) feature_vector = [1 if x else 0 for x in matches.values()] return matches, feature_vector
def xiamiplayer(id): ua = user_agent_parser.Parse(request.headers.get('User-Agent')) try: version = int(ua['user_agent'].get('major')) except: version = 9 if ua['user_agent'].get('family') == 'IE' and version < 9: flashurl = 'http://www.xiami.com/res/app/img/swf/weibo.swf?dataUrl=http://www.xiami.com/app/player/song/id/{0}/type/7/uid/0'.format( id) return redirect(flashurl, code=303) headers = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X; en-us) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53', 'Referer': 'http://www.xiami.com/song/playlist/id/' + id } url = 'http://www.xiami.com/song/playlist/id/' + id try: r = requests.get(url, headers=headers) r.encode = 'uft-8' except: return '连接虾米服务器失败' try: info = xmltodict.parse(r.text) #print r.text songurl = info['playlist']['trackList']['track'].get('location') songurl = xiamidecode(songurl) songpic = info['playlist']['trackList']['track'].get( 'album_pic').replace('.jpg', '_2.jpg') title = info['playlist']['trackList']['track'].get('title') singer = info['playlist']['trackList']['track'].get('artist') lyricurl = info['playlist']['trackList']['track'].get('lyric') except: return '获取歌词信息失败,请检查是否有该歌曲ID' try: lyric = requests.get(lyricurl, headers=headers) #print r.text if '[' not in lyric: lyric = "[00:00.00]" + title lyric = lyric.replace('''\'''', ''' \\\' ''').replace( '\n', '#').replace('\r', '').replace('######', '#').replace( '######', '#').replace('####', '#').replace('###', '#').replace('##', '#') except: lyric = "[00:00.00]" + title return render_template('xiamiplayer.html', songurl=songurl, songpic=songpic, title=title, singer=singer, lyric=lyric, id=id)
def apply_ua(row): try: ua = user_agent_parser.Parse(row["Request_UserAgent"]) except: return for outer_key in ua: for inner_key in ua[outer_key]: key = outer_key+"_"+inner_key if ua_dic.get(key) is None: continue ua_dic[key].append(shrink_hash(ua[outer_key][inner_key])) ua_dic["IsBot"].append(row["IsBot"])
def render_template(template_name): tx_id = get_tx_id() user_agent = user_agent_parser.Parse(request.headers.get('User-Agent', '')) return render_theme_template( cookie_session.get('theme', 'default'), template_name=template_name, analytics_ua_id=current_app.config['EQ_UA_ID'], ua=user_agent, tx_id=tx_id, account_service_url=cookie_session.get('account_service_url'), survey_title=TemplateRenderer.safe_content( cookie_session.get('survey_title', '')))
def get_browser_name(user_agent_string): '''Return just the browser name. unknown user agents will be reported as "other". ''' ua_dict = user_agent_parser.Parse(user_agent_string) name = ua_dict.get('user_agent').get('family').lower() device = ua_dict.get('device').get('model') if (name == 'firefox mobile' and ua_dict.get('os').get('family') == 'Firefox OS'): name = 'other' if device == 'Tablet': name += " " + device.lower() return name
def check_redirect_needed(original_action, *args, **kw): if hasattr(request, 'blueprint'): # flask controller ua_dict = useragent.Parse(request.user_agent.string) is_flask = True else: # pylons controller ua_dict = useragent.Parse( request.user_agent if request.user_agent else '') is_flask = False os = ua_dict.get('os', {}).get('family') # type: str path = request.full_path if is_flask else request.path_qs ua_is_mobile = os and os.lower() in {'android', 'ios'} should_redirect = __should_redirect(path, ua_is_mobile) if should_redirect: light_url = switch_url_path(path, False) return redirect(light_url) else: result = original_action(*args, **kw) new_cookie_value = __cookie_value_to_set() if is_flask and new_cookie_value: result = make_response(result) result.set_cookie(FORCE_REDIRECT_COOKIE, new_cookie_value) if not is_flask and new_cookie_value: response.set_cookie(FORCE_REDIRECT_COOKIE, new_cookie_value) return result
def is_ancient_browser(request): ua = request.headers.get('User-Agent') if ua is None: return True ua = user_agent_parser.Parse(ua) ua = ua.get('user_agent') if ua is None: return True family = ua.get('family') if family in MinimumFamilyVersion: major = int(ua['major']) if major >= MinimumFamilyVersion[family]: return False return True
def handle_device_record(stats, row): stats_data = stats['data'] device_data = user_agent_parser.Parse(row['log_agent']) os_key = device_data['os']['family'] if device_data['os']['major']: os_key += ' {major}.{minor}'.format(**device_data['os']) ua_key = '{family} {major}.{minor}'.format(**device_data['user_agent']) merged_key = os_key + ' ' + ua_key if row['device_model']: stats['devices'].add(row['device_model']) if row['api_supported']: if os_key not in stats_data: stats_data[os_key] = OrderedDict() if ua_key not in stats_data[os_key]: stats_data[os_key][ua_key] = { 'api_supported': 0, 'supported_devices': set(), 'connection_type': set(), 'downlink_max': set() } stats['supported'].add(merged_key) stats['connection_types'].add(row['connection_type']) downlink_max = row['downlink_max'] if downlink_max == -1: downlink_max = 'undefined' elif downlink_max == -2: downlink_max = 'infinity' stats['downlink_max'].add(downlink_max) stats_data[os_key][ua_key]['api_supported'] += row['api_supported'] stats_data[os_key][ua_key]['connection_type'].add( row['connection_type']) stats_data[os_key][ua_key]['downlink_max'].add(downlink_max) if row['device_model']: stats_data[os_key][ua_key]['supported_devices'].add( row['device_model']) return stats stats['unsupported'].add(merged_key) return stats
def process_input(datafile, archive_path, database): """Reads files into a dataframe and extract and add country, city & user agent parsing. Write data frame to Postgres DB. Parameters: datafile (str): data file to ingest and process archive_path (str): full filepath drop location for processed file(s) Returns: None Side effect: write to Postgres DB """ df = pd.read_csv( datafile, compression='gzip', names=['date', 'time', 'user_id', 'url', 'ip', 'user_agent'], header=None, sep='\t') df = df[:1000] df['parsed'] = df['user_agent'].apply(lambda x: uap.Parse(x)) df_parsed = (pd.json_normalize(df['parsed'])) df_parsed[['country', 'city']] = pd.DataFrame( df['ip'].apply(lambda x: get_country_city(x)).to_list()) # combine data frames df_combined = df.join(df_parsed) # normalise column names df_combined.columns = df_combined.columns.str.replace(".", "_") # print(df_combined) log_df = df_combined[[ 'date', 'time', 'user_id', 'url', 'ip', 'user_agent_family', 'os_family', 'device_family', 'device_brand', 'device_model', 'country', 'city' ]] # log_df = df_combined[['date', 'time', 'hash', 'country', 'city']] engine = create_engine(f'postgresql+psycopg2://localhost:5432/{database}') log_df.to_sql('logs', con=engine, if_exists='append', chunksize=10000, index=False) shutil.move(datafile, archive_path) print('\n') print(f'"{datafile}" >>> ./archive') return None
def parse_ua(user_agent, max_length=800): """ Returns a dict containing the parsed User Agent data from a request's UA string. Uses the following format: { "device_family": "Other", "browser_family": "IE", "browser_major": "11", "browser_major": "0", "os_family": "Windows Vista", "os_major": null, "os_minor": null, "wmf_app_version": "-" } App version in user agents is parsed as follows: WikipediaApp/5.3.1.1011 (iOS 10.0.2; Phone) "wmf_app_version":"5.3.1.1011" WikipediaApp/2.4.160-r-2016-10-14 (Android 4.4.2; Phone) Google Play "wmf_app_version":"2.4.160-r-2016-10-14" """ if len(user_agent) > max_length: raise RuntimeError("User Agent string length ({}) longer " "than the allowed {} chars".format( len(user_agent), max_length)) formatted_ua = {} parsed_ua = user_agent_parser.Parse(user_agent) formatted_ua['device_family'] = parsed_ua['device']['family'] formatted_ua['browser_family'] = parsed_ua['user_agent']['family'] formatted_ua['browser_major'] = parsed_ua['user_agent']['major'] formatted_ua['browser_minor'] = parsed_ua['user_agent']['minor'] formatted_ua['os_family'] = parsed_ua['os']['family'] formatted_ua['os_major'] = parsed_ua['os']['major'] formatted_ua['os_minor'] = parsed_ua['os']['minor'] # default wmf_app_version is '-' formatted_ua['wmf_app_version'] = '-' # is request a bot/spider? formatted_ua['is_bot'] = is_bot(formatted_ua['device_family'], user_agent) # does the request come from MediaWiki? formatted_ua['is_mediawiki'] = is_mediawiki(user_agent) app_ua = 'WikipediaApp/' if app_ua in user_agent: items = user_agent.split() version = items[0].split("/")[1] formatted_ua['wmf_app_version'] = version return formatted_ua
def user_agent_information(): """Function to get user agent information. :returns: Dictionary with user agent information. :rtype: dict """ uap = user_agent_parser.Parse(str(request.user_agent)) os = format_uap_info(uap.get('os')) browser = format_uap_info(uap.get('user_agent')) device = ' '.join([ v for v in (uap.get('device').get('family'), uap.get('device').get('brand'), uap.get('device').get('model')) if v ]) return dict(os=os, browser=browser, device=device)
def analyse_client(url, ip, ua, collection): visit = { 'time': datetime.now(), 'location': get_loc_from_ip(ip), 'client': user_agent_parser.Parse(ua) } if collection.find_one({'url': url}): collection.update({'url': url}, {'$push': {'visit': visit}}) else: data = { 'url': url, 'name': '', 'visit': [visit] } collection.insert(data)
def mapper(self, _, line): # Yields only if line is correct and price is greater than PRICE_LIMIT. # Key is composite: {city_id}{KEY_FIELD_SEPARATOR}{OS}. # Such key is used in KeyFieldBasedPartitioner; # using such partitioner we can choose reducer number by OS name. matches = re.match(regex, line) if matches: groups = matches.groups() city_id = groups[7] bid_price = int(groups[19]) os = user_agent_parser.Parse(groups[4])['os']['family'] if bid_price > PRICE_LIMIT: yield self.KEY_FIELD_SEPARATOR.join((city_id, os)), 1 else: self.increment_counter('Incorrect data', 'Incorrect input line', 1)
def get(self): retval = {} if self.__args.get('code') is None: url = url_for('login', src='linkedin', status='failed') return redirect(url) if self.__args.get('ip', None) is None: self.__args['ip'] = request.remote_addr device = user_agent_parser.Parse(self.__args['ua']) self.__args['device'] = device['os']['family'] auth = Auth().authenticate(self.__args) access_token = auth.get('access_token') if access_token is not None: profile = Profile(access_token) user = profile.get() user['src'] = 'linkedin' if current_user.is_authenticated: retval = url_for('index') else: login_params = { 'acct_code': current_app.config['GROUP_CODE'], 'user_type': 'WEB' } self.__args.pop('code') self.__args.pop('state') self.__args['id'] = profile.id self.__args['accountcode'] = current_app.config['GROUP_CODE'] self.__args['company_code'] = 'PONOS' self.__args['user_type'] = 'WEB' self.__args['login_params'] = login_params this_user = UserAuth.authenticate(**self.__args) if this_user.is_authenticated: login_user(this_user) retval = url_for('index', src='linkedin', status='success') else: is_registered = this_user.check_is_registered(user['email']) if is_registered: retval = url_for('signin', src='linkedin', status='used', email=user['email']) else: user['code'] = request.args.get('code') retval = url_for('register', **user) else: retval = url_for('signin', src='linkedin', status='failed') return redirect(retval)
def add_ua_features(request): request = copy.deepcopy(request) device = request['device'] = request.get('device', {}) ua = device['ua'] = device.get('ua') if ua is not None: parsed_ua = user_agent_parser.Parse(ua) flat_ua = flatten(parsed_ua) flat_ua.pop('string', None) flat_ua.pop('user_agent_minor', None) flat_ua.pop('user_agent_patch', None) flat_ua.pop('os_patch', None) flat_ua.pop('os_patch_minor', None) device['ua'] = flat_ua return request else: return request