def parse_pyua(self): try: self.df_p['pyua_browser'] = map(lambda x : parse(x).browser.family, self.ua_in) self.df_p['pyua_device'] = map(lambda x : parse(x).device.family, self.ua_in) self.df_p['pyua_pc'] = map(lambda x : parse(x).is_pc, self.ua_in) self.df_p['pyua_mob'] = map(lambda x : parse(x).is_mobile, self.ua_in) self.df_p['pyua_tab'] = map(lambda x : parse(x).is_tablet, self.ua_in) except Exception as err: print "pyua can't parse this string", self.ua_in print err return self.df_p
def get_user_agent(request): # Tries to get UserAgent objects from cache before constructing a UserAgent # from scratch because parsing regexes.yaml/json (ua-parser) is slow ua_string = request.META.get('HTTP_USER_AGENT', '') if cache: key = get_cache_key(ua_string) user_agent = cache.get(key) if user_agent is None: user_agent = parse(ua_string) cache.set(key, user_agent) else: user_agent = parse(ua_string) return user_agent
def test(request): reg_b = re.compile(r"android|avantgo|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino", re.I|re.M) reg_v = re.compile(r"1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|e\\-|e\\/|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(di|rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|xda(\\-|2|g)|yas\\-|your|zeto|zte\\-", re.I|re.M) # iPhone's user agent string ua_string = request.META['HTTP_USER_AGENT'] user_agent = parse(ua_string) # Accessing user agent's browser attributes browser = user_agent.browser.family + ' ' + user_agent.browser.version_string # Accessing user agent's operating system properties os = user_agent.os.family + ' ' + user_agent.os.version_string # Accessing user agent's device properties device = user_agent.device.family # returns 'iPhone' # Checking if its mobile b = reg_b.search(ua_string) v = reg_v.search(ua_string[0:4]) is_mobile = False if b or v: is_mobile = True return HttpResponse("hello " + str(user_agent))
def register_device(request): registration_id = request.POST.get('registration_id') android_id = request.POST.get('android_id') version = request.POST.get('version', '0.3.0') user_agent = parse(request.META['HTTP_USER_AGENT']) model = user_agent.device.family os_version = user_agent.os.version_string device_type = 'Mobile' if user_agent.is_mobile else 'Tablet' ip = get_client_ip(request) logger.debug( 'register request from %s\nandroid_id = %s\napp_version = %s\nuser_agent = %s\nmodel = %s\nos_version = %s', ip, android_id, version, user_agent, model, os_version) try: device = Device.objects.get(android_id=android_id) device.registration_id = registration_id device.version = version device.model = model device.os_version = os_version device.type = device_type device.active = True device.last_seen = timezone.localtime(timezone.now()) device.save() logger.debug('Device %s marked as last seen on %s', device.android_id, device.last_seen) except Device.DoesNotExist: Device.objects.create(registration_id=registration_id, android_id=android_id, version=version, model=model, os_version=os_version, type=device_type) logger.info('Registered new device: %s', android_id) return HttpResponse(status=200)
def emailOpen(e): d = {} if request.cookies.get('LATrackingID'): a = modules.getModel(models.App, appid = request.cookies.get('LATrackingID')) d['app_id'] = a.id d['private_ip'] = request.environ.get('REMOTE_ADDR') d['public_ip'] = request.environ.get('HTTP_X_FORWARDED_FOR') d['full_url'] = request.environ.get('HTTP_REFERER', '').strip().lower() email = db.session.query(models.Email).filter_by(emailid=e).first() if email: d['email_id'] = email.id else: return jsonify(**{'status':'failure', 'description':'no such email found'}) if d['public_ip']: g = geocoder.ip(d['public_ip']) d['lat'], d['lng'] = g.latlng d['city'] = g.city d['country'] = g.country d['state'] = g.state d['user_agent'] = request.environ.get('HTTP_USER_AGENT') if d['user_agent']: user_agent = parse(d['user_agent']) d['browser'] = user_agent.browser.family d['is_bot'], d['is_mobile'], d['is_tablet'], d['is_pc'] = user_agent.is_bot, user_agent.is_mobile, user_agent.is_tablet, user_agent.is_pc p = models.Visit(**d) p.date = datetime.now() db.session.add(p) db.session.commit() return jsonify(success=True, description='successfully tracked email')
def getBrowser(userAgent): # check for empty or null uri if userAgent: user_agent = parse(userAgent) return user_agent.browser.family.lower() else: return None
def default(): user_agent_string = request.user_agent.string user_agent = parse(user_agent_string) if user_agent.is_bot: return "Bot query" if not session.get('userdata'): return redirect("https://apps.facebook.com/mytoptenapp"+url_for('index', _external=False)) fbdata = session['userdata'] fb = facebook.GraphAPI(session['token']) new_user = False user = pg.query(TopTenUser).filter(TopTenUser.facebook_id == str(fbdata['id'])).first() if not user: user = createUser(fbdata) new_user = True user.last_login = datetime.now() pg.commit() topten = pg.query(TopTen).join(TopTenUser).filter(TopTenUser.facebook_id == user.facebook_id).filter(TopTen.active == True).first() if not topten: topten = createTopTen(fbdata) songlist = topten.songs if (len(songlist) < NUMSONGS): if user_agent.is_mobile: return redirect(url_for('makeSongsMob', facebook_id=user.facebook_id, new_user=new_user)) else: return redirect(url_for('makeSongs', facebook_id=user.facebook_id, new_user=new_user)) else: if user_agent.is_mobile: return redirect(url_for('showSongsMob', facebook_id=user.facebook_id)) else: return redirect(url_for('showSongs', facebook_id=user.facebook_id)) return 'Hello World!'
def update_browser_num(list_br, packet): if packet is None: return [0,0,0,0,0] else: if(len(list_br) == 0): list_br = [0,0,0,0,0] if(not(is_http_get(packet))): return list_br else: raw_ua = packet[get_http_layer(packet)].user_agent parse_ua = parse(raw_ua) ua_browser = parse_ua.browser.family if "Firefox" in ua_browser: list_br[0] = 1 else: if "Chrome" in ua_browser: list_br[1] = 1 else: if "Safari" in ua_browser: list_br[2] = 1 else: if "Internet Exlorer" in ua_browser: list_br[3] = 1 else: list_br[4] = 1 return list_br
def serve(self): """Handle the request and serve the response""" super(self.__class__, self).serve() contexts = ( ('General INI', self._gather_from_ini( FILE_CONFIGURATION, 'APPLICATION', None)), ('Sys', self._gather_members(sys, ( 'builtin_module_names', 'copyright', 'modules', 'meta_path', 'path_importer_cache'))), ('App', self._gather_members(bottle.request.app, ( 'routes', 'plugins'))), ('Routes', self._gather_from_list(bottle.request.app.routes, None)), ('Browser', self._gather_members(user_agents.parse(bottle.request.environ['HTTP_USER_AGENT']), None)), ('Request', self._gather_members(bottle.request, ( 'environ', 'headers', 'route', 'urlparts'))), ('Environment', self._gather_from_dict(bottle.request.environ, ( 'beaker.sessions', 'bottle.request', 'bottle.request.json', 'bottle.request.urlparts', 'bottle.route', 'route.handle', 'wsgi.errors'))), ('Session', self._gather_from_dict(self.session, None)), ('Session Object', self._gather_members(self.session.session._sess, ( 'accessed_dict', 'cookie', 'request', 'namespace'))), ('Headers', self._gather_from_dict(bottle.request.headers, None)), ('Cookies', self._gather_from_dict(bottle.request.cookies, None)), ('Query', self._gather_from_dict(bottle.request.query, None)), ('Forms', self._gather_from_dict(bottle.request.forms, None)), ) return self.get_template('info.tpl', CONTEXTS = contexts)
def addUserAgentInfo(db): uaCompleted = set() for ua in list(db.execute("""select useragent from access except select useragent from uainfo""")): userAgent = ua[0] if userAgent in uaCompleted: continue uaRec = user_agents.parse(userAgent) if not uaRec: continue if uaRec.is_pc: browserType = 'Browser' elif uaRec.is_mobile: browserType = 'Mobile Browser' elif uaRec.is_bot: browserType = 'Robot' else: browserType = 'unknown' db.execute("""insert or replace into uainfo(useragent, browser_type, ua_name, os_name, os_family) values(?, ?, ?, ?, ?)""", (userAgent, browserType, uaRec.browser.family, '%s %s' % (uaRec.os.family, uaRec.os.version_string), uaRec.os.family)) uaCompleted.add(userAgent) db.commit() # commit per record in case we exit return
def parse_data(self,data): # Get User agent details and parse it ua = user_agents.parse(data['user_agent']) data['browser'] = ua.browser.family + ua.browser.version_string data['os'] = ua.os.family data['user_agent_name'] = data['user_agent'].split()[0] # Get Request name separately from the request data['request_name'] = data['request'].split()[0] # Convert time in to datettime time = data['time'].split()[0] try: date = datetime.strptime(time, "%d/%b/%Y:%H:%M:%S") except ValueError: date = "NULL" data['time'] = date if data["user"] == "-": data["user"] = None data["status"] = int(data["status"]) if data["size"] == "-": data["size"] = 0 else: data["size"] = int(data["size"]) if data["referer"] == "-": data["referer"] = None return data
def connectionMade(self): log.debug("HTTP connection made.") try: user_agent = parse(self.headers['user-agent']) self.clientInfo["clientos"] = user_agent.os.family self.clientInfo["browser"] = user_agent.browser.family try: self.clientInfo["browserv"] = user_agent.browser.version[0] except IndexError: self.clientInfo["browserv"] = "Other" except KeyError: self.clientInfo["clientos"] = "Other" self.clientInfo["browser"] = "Other" self.clientInfo["browserv"] = "Other" self.clientInfo["clientip"] = self.client.getClientIP() self.plugins.hook() self.sendRequest() self.sendHeaders() if (self.command == 'POST'): self.sendPostData()
def is_ios(request): ua = request.META.get('HTTP_USER_AGENT') user_agent = parse(ua) os = user_agent.os.family if os == 'iOS': return True return False
def format_record(r, recursive): path = r.req["path"] if "twostream" in path: try: path = r.req["referrer"].replace("https://www.govtrack.us", "") except: pass if "?" in path: path = path[:path.index("?")] # ensure no qsargs if r.req.get("query"): path += "?" + urllib.parse.urlencode({ k.encode("utf8"): v.encode("utf8") for k,v in list(r.req["query"].items()) }) if r.req['agent']: ua = str(user_agents.parse(r.req['agent'])) if ua == "Other / Other / Other": ua = "bot" ua = re.sub(r"(\d+)(\.[\d\.]+)", r"\1", ua) # remove minor version numbers else: ua = "unknown" ret = { "reqid": r.id, "when": r.when.strftime("%b %-d, %Y %-I:%M:%S %p"), "netblock": get_netblock_label(r.req['ip']) if r.req['ip'] else None, "path": path, "query": r.req.get('query', {}), "ua": ua, } if recursive: ret["netblock"] = ", ".join(sorted(set( get_netblock_label(rr.req["ip"]) for rr in Sousveillance.objects.filter(subject=r.subject) if rr.req["ip"] ))) ret["recent"] = [format_record(rr, False) for rr in Sousveillance.objects.filter(subject=r.subject, id__lt=r.id).order_by('-when')[0:15]] return ret
def allItems(request, page): itemCount = Item.objects.raw('select id, count(*) count from Item')[0].count #pageが数字なら try: page = int(request.GET['page']) page = pager.pagerInt(page, itemCount)['page'] start_id = pager.pagerInt(page, itemCount)['start_id'] prevNext = pager.pagerInt(page, itemCount)['prevNext'] #pageが数字じゃなかったら except: page = pager.pagerNotInt()['page'] start_id = pager.pagerNotInt()['start_id'] prevNext = pager.pagerNotInt()['prevNext'] itemList = Item.objects.raw('select id, itemName, itemPrice, mediumImageUrls from Item ORDER BY itemPrice DESC limit %s, 30;' % start_id) c = Context({'typicalCategories':typicalCategories, 'searchForm':searchForm, 'itemList':itemList, 'itemCount':itemCount/30 + 1, 'prevNext':prevNext, 'page': page,}) if parse(request.META['HTTP_USER_AGENT']).is_mobile: t = loader.get_template('rakuten/spAllItem.html') else: t = loader.get_template('rakuten/allItem.html') return HttpResponse(t.render(c))
def check_cookie_present(request): #TODO we need to check the number of redirects in case we end up in a loop for some reason if HTTPS_IFRAME_COOKIESETTER_URL_TO_CHECK in request.path \ and not ((settings.MEDIA_URL and request.path.startswith(settings.MEDIA_URL)) or request.path.startswith(settings.STATIC_URL)) \ and (not HTTPS_IFRAME_COOKIESETTER_ONLY_HTTPS or(HTTPS_IFRAME_COOKIESETTER_ONLY_HTTPS and request.is_secure())) \ and call_additional_checks(): #get the url to the cookiesetter view cookiesetter_view_path = urlpath() user_agent = parse(request.META.get('HTTP_USER_AGENT', '')) if user_agent.browser.family in HTTPS_IFRAME_COOKIESETTER_BROWSERS \ and cookiesetter_view_path not in request.path:#these are after the initial check as it is an expensive lookup current_absolute_url = urllib2.quote(request.build_absolute_uri().encode("utf8")) cookies_present = True for cookie_string in HTTPS_IFRAME_COOKIESETTER_COOKIES: try: cookie_token = request.COOKIES[cookie_string] except KeyError: cookies_present = False if not cookies_present: #ehck url scheme to http redirect_url = '%s?absurl=%s' %(cookiesetter_view_path, current_absolute_url) redirect_url = request.build_absolute_uri(redirect_url) parsed = urlparse(redirect_url) redirect_url = '%s://%s%s?%s' % ('http',parsed.netloc, parsed.path, parsed.query) return False,redirect_url requested_url = request.build_absolute_uri() return True, requested_url
def supported_browser(request): if 'supported_browser' not in request.session: user_agent = parse(request.META.get('HTTP_USER_AGENT', '')) request.session['supported_browser'] = all( pass_browser_entry(user_agent, entry) for entry in browser_requirements) return {'supported_browser': request.session['supported_browser']}
def index(request, page): #ページネーション作成 #itemをcount itemCount = Item.objects.raw('select id, count(*) count from Item where genreId in (510914, 294456, 101480,204122,506438,511121,101483,563353,200164,511068,204030,101467,511073,511018,563381,204064,506475,510927,204049,563350,204078,204262,204119,204088,204260,563379,511191,302804,563490,563378,101479,204081,204055,563455,563355,200045,563477,563373,204069,204084,101469,553329,204072,511248,204279,511026,563377,101484,511009,563349,101478,511046,204283,302801,510930,563380,563451,563364,563481,204263,200044,507977,204120,101476,553328,506498,506443,204130,204091,200048,510943,563406,201318,563461,204027,204086,204068,511199,204121,563359,101481,302800,510923,563463,506439,204071,204034,510973,563452,563387,204060,204061,563391,511182,511025,204080,563358,563474,563402,511155,511013,563356,563469,511072,563457,563352,204085,563489,563454,511048,200041,563375,204118,204275,204089,563398,563362,510993,510947,204039,563460,204036,511156,563482,563401,511111,511091,204048,563371,511134,510989,553310,510974,563465,511104,511187,200040,563464,101477,563486,101472,506454,204135,204092,204259,511069,204131,506497,510985,563483,563383,563466,510939,563470,204037,510938,200049,563453,563476,563366,511001,204065,302803,204265,506484,511021,204052,563456,204271,511178,563475,204083,563405,511129,204042,511057,563478,511249,302802,204063,204075,511194,563473,510922,563365,563497,553315,511163,511198,204051,511122,563479,511125,511160,563462,563459,511171,511110,511060,553314,511190,510931,511168,563392,204074,563496,563390,511099,510977,204266,506449,511130,510935,204126,511183,511233,563360,511076,563363,563393,511002,563385,563399,510997,506468,101468,511092,204123,511222,511221,506494,563374,511038,511042,563351,204127,510994,563416,563492,563491,563415,563369,510981,511037,101473,302807,511041,510986,506495,563493,204046,510982,510978,511022,511077,511264,204133,204066,563495,563376,511195,563446,506453,511126,511214,511283,511164,511159,511179,563494,511049,563458,511252,511237,511279,511151,506444,563370,511238,563386,302806,511053,511251,204134,506458,302805,511271,511089,506485,511066,510942,511229,511065,511005,506459,563372,511276,563357,506480,563346,511218,511100,511140,563485,510946,553319,563421,563480,510971,511056,204128,511275,511288,553299,563472,563394,204136,511259,510926,553309,563354,563467,563445,511209,511210,506469,511230,563468,511203,204077,506489,506479,511010,511267,511272,511136,511061,511284,511170,510918,511225,553321,563471,510934,511268,553320,204132,511186,511119,563345,563361);')[0].count #pageが数字なら try: page = int(request.GET['page']) page = pager.pagerInt(page, itemCount)['page'] start_id = pager.pagerInt(page, itemCount)['start_id'] prevNext = pager.pagerInt(page, itemCount)['prevNext'] #pageが数字じゃなかったら except: page = pager.pagerNotInt()['page'] start_id = pager.pagerNotInt()['start_id'] prevNext = pager.pagerNotInt()['prevNext'] itemList = Item.objects.raw('select id, itemName, itemPrice, mediumImageUrls from Item where genreId in (510914, 294456, 101480,204122,506438,511121,101483,563353,200164,511068,204030,101467,511073,511018,563381,204064,506475,510927,204049,563350,204078,204262,204119,204088,204260,563379,511191,302804,563490,563378,101479,204081,204055,563455,563355,200045,563477,563373,204069,204084,101469,553329,204072,511248,204279,511026,563377,101484,511009,563349,101478,511046,204283,302801,510930,563380,563451,563364,563481,204263,200044,507977,204120,101476,553328,506498,506443,204130,204091,200048,510943,563406,201318,563461,204027,204086,204068,511199,204121,563359,101481,302800,510923,563463,506439,204071,204034,510973,563452,563387,204060,204061,563391,511182,511025,204080,563358,563474,563402,511155,511013,563356,563469,511072,563457,563352,204085,563489,563454,511048,200041,563375,204118,204275,204089,563398,563362,510993,510947,204039,563460,204036,511156,563482,563401,511111,511091,204048,563371,511134,510989,553310,510974,563465,511104,511187,200040,563464,101477,563486,101472,506454,204135,204092,204259,511069,204131,506497,510985,563483,563383,563466,510939,563470,204037,510938,200049,563453,563476,563366,511001,204065,302803,204265,506484,511021,204052,563456,204271,511178,563475,204083,563405,511129,204042,511057,563478,511249,302802,204063,204075,511194,563473,510922,563365,563497,553315,511163,511198,204051,511122,563479,511125,511160,563462,563459,511171,511110,511060,553314,511190,510931,511168,563392,204074,563496,563390,511099,510977,204266,506449,511130,510935,204126,511183,511233,563360,511076,563363,563393,511002,563385,563399,510997,506468,101468,511092,204123,511222,511221,506494,563374,511038,511042,563351,204127,510994,563416,563492,563491,563415,563369,510981,511037,101473,302807,511041,510986,506495,563493,204046,510982,510978,511022,511077,511264,204133,204066,563495,563376,511195,563446,506453,511126,511214,511283,511164,511159,511179,563494,511049,563458,511252,511237,511279,511151,506444,563370,511238,563386,302806,511053,511251,204134,506458,302805,511271,511089,506485,511066,510942,511229,511065,511005,506459,563372,511276,563357,506480,563346,511218,511100,511140,563485,510946,553319,563421,563480,510971,511056,204128,511275,511288,553299,563472,563394,204136,511259,510926,553309,563354,563467,563445,511209,511210,506469,511230,563468,511203,204077,506489,506479,511010,511267,511272,511136,511061,511284,511170,510918,511225,553321,563471,510934,511268,553320,204132,511186,511119,563345,563361) ORDER BY itemPrice DESC limit %s, 30;' % start_id) c = Context({'typicalCategories':typicalCategories, 'searchForm':searchForm, 'itemList':itemList, 'itemCount':itemCount/30 + 1, 'prevNext':prevNext, 'page': page,}) if parse(request.META['HTTP_USER_AGENT']).is_mobile: t = loader.get_template('rakuten/spIndex.html') else: t = loader.get_template('rakuten/index.html') return HttpResponse(t.render(c))
def get(self): results = [] agents = Counter() categories = Counter() total = 0 for req in _requests: total += 1 agent = user_agents.parse(req.get('agent', ['-'])[0].replace('"', '')) agents[agent.browser.family] += 1 if agent.is_mobile: categories['mobile'] += 1 elif agent.is_tablet: categories['tablet'] += 1 elif agent.is_pc: categories['pc'] += 1 elif agent.is_bot: categories['bot'] += 1 for key, val in agents.iteritems(): results.append({ 'name': key, 'count': val }) return { 'data': { 'count': sum(categories.values()), 'categories': categories, 'agents': results } }
def search(request): query = request.GET['query'] #ページネーション作成 #itemをcount sql = 'select id, count(*) count from Item where itemName like "%%{}%%"'.format(query.encode('utf-8')) itemCount = Item.objects.raw(sql)[0].count #pageが数字なら try: page = int(request.GET['page']) page = pager.pagerInt(page, itemCount)['page'] start_id = pager.pagerInt(page, itemCount)['start_id'] prevNext = pager.pagerInt(page, itemCount)['prevNext'] #pageが数字じゃなかったら except: page = pager.pagerNotInt()['page'] start_id = pager.pagerNotInt()['start_id'] prevNext = pager.pagerNotInt()['prevNext'] sql = 'select id, itemName, mediumImageUrls, itemPrice from Item where itemName like "%%{}%%"'.format(query.encode('utf-8')) searchedItems = Item.objects.raw(sql + ' limit %s, 30;' % start_id) if parse(request.META['HTTP_USER_AGENT']).is_mobile: t = loader.get_template('rakuten/spSearch.html') else: t = loader.get_template('rakuten/search.html') c = Context({'typicalCategories':typicalCategories, 'query':query, 'searchedItems':searchedItems, 'searchForm':searchForm, 'itemCount':itemCount/30 + 1, 'page':page, 'start_id':start_id, 'prevNext':prevNext, }) return HttpResponse(t.render(c))
def detail(request, item_id): # 商品詳細情報 item = Item.objects.raw('select id, itemName, itemCaption, mediumImageUrls, format(itemPrice, 0) itemPrice, affiliateUrl, genreId, getTime, price_sentence, alchol_sentence, capacity_sentence, twenty_sentence ,tweetSentence from Item where id = %s;' % item_id)[0] # 同カテゴリの商品リスト category_id = item.genreid relatedItems = Item.objects.raw('select id, itemName, format(itemPrice,0) itemPrice, affiliateUrl, mediumImageUrls from Item where genreId = %s limit 30;' % category_id) # 現在の階層を確認 nowCategory = Rakutencategory.objects.raw('select * from RakutenCategory where Category_id = %s' % category_id) #パンくずと関連カテゴリ生成 try: breadCrumb = createBreadCrumb.makeBreadCrumb(nowCategory)['breadCrumb'] bottomCategories = createBreadCrumb.makeBreadCrumb(nowCategory)['bottomCategories'] # カテゴリがなかった場合、空のパンくず,関連カテゴリ配列を作成 except IndexError: bottomCategories = [] breadCrumb = [] if parse(request.META['HTTP_USER_AGENT']).is_mobile: t = loader.get_template('rakuten/spDetail.html') else: t = loader.get_template('rakuten/detail.html') c = Context({'typicalCategories':typicalCategories, 'item':item, 'bottomCategories':bottomCategories, 'relatedItems':relatedItems, 'breadCrumb':breadCrumb, 'searchForm':searchForm}) return HttpResponse(t.render(c))
def parse(self, event): if self.field in event: ua_str = event[self.field] ua = user_agents.parse(ua_str) ua_data = { 'browser': { 'family': ua.browser.family, 'version': ua.browser.version_string, }, 'os': { 'family': ua.os.family, 'version': ua.os.version_string, }, 'device': { 'family': ua.device.family, 'brand': ua.device.brand, 'model': ua.device.model, }, 'is_mobile': ua.is_mobile, 'is_tablet': ua.is_tablet, 'is_touch_capable': ua.is_touch_capable, 'is_pc': ua.is_pc, 'is_bot': ua.is_bot, } if self.out_field: event[self.out_field] = ua_data else: event.update(ua_data) return event
def Analytics(REQ): from datetime import datetime user_agent = parse(REQ.META.get('HTTP_USER_AGENT')) Code = REQ.GET.get("sn","") inurl = REQ.get_full_path() os = user_agent.os.family browser = user_agent.browser.family ip = REQ.META.get('HTTP_X_FORWARDED_FORMETA') and REQ.META.get('HTTP_X_FORWARDED_FORMETA') or REQ.META.get('REMOTE_ADDR') indata = {'ip': ip, 'browser': browser, 'os': os} try: ebusiness = ebusiness_members.objects.get(code=Code) isIn = ebusiness.flow_analytics_set.filter(**indata).order_by('-intime') ebusiness_flow = ebusiness.flow_analytics_set except ObjectDoesNotExist: isIn = flow_analytics.objects.filter(**indata).filter(ebusiness=None).order_by('-intime') ebusiness_flow = flow_analytics.objects if isIn.count() < 1: indata.update({"inurl":inurl,"endurl":inurl}) CreateIn = ebusiness_flow.create(**indata) CreateIn.save() else: oldTime = isIn.values()[0]['intime'].strftime('%Y%m%d') newTime = datetime.now().strftime('%Y%m%d') if newTime == oldTime: oldID = isIn.values()[0]['id'] UpdataIn = ebusiness_flow.get(id=oldID) UpdataIn.num = UpdataIn.num + 1 UpdataIn.endurl = inurl UpdataIn.save() else: indata.update({"inurl":inurl,"endurl":inurl}) CreateIn = ebusiness_flow.create(**indata) CreateIn.save() return ''
def make_activity_log_entry(user, correct, request): try: ip_address = request.remote_addr user_agent = parse(request.user_agent.string) request_json = request.json if user_agent.is_mobile: device_type = 0 elif user_agent.is_tablet: device_type = 1 elif user_agent.is_pc: device_type = 2 else: device_type = -1 new_activity_log_entry = ActivityLogEntry( correct=correct, current_word_index=user.current_word_index, datetime=datetime.datetime.now(), datetime_quest_started=user.datetime_quest_started, datetime_question_started=user.datetime_question_started, device_family=user_agent.device.family, device_model=user_agent.device.model, device_type=device_type, ip_address=ip_address, is_daily=user.is_on_daily, is_timed=user.is_timed, latitude=request_json['latitude'], longitude=request_json['longitude'], number_of_questions=user.number_of_questions, user_id=user.user_id ) new_activity_log_entry.save() except Exception as ex: print(ex) print("failed too make activity log entry.")
def make_quest_log_entry(user, request): try: ip_address = request.remote_addr user_agent = parse(request.user_agent.string) request_json = request.json if user_agent.is_mobile: device_type = 0 elif user_agent.is_tablet: device_type = 1 elif user_agent.is_pc: device_type = 2 else: device_type = -1 new_quest_log_entry = QuestLogEntry( chapter_index=user.chapter_index_id, cumulative=user.cumulative, datetime_quest_completed=datetime.datetime.now(), datetime_quest_started=user.datetime_quest_started, device_family=user_agent.device.family, device_model=user_agent.device.model, device_type=device_type, ip_address=ip_address, is_daily=user.is_on_daily, is_timed=user.is_timed, latitude=request_json['latitude'], longitude=request_json['longitude'], number_of_questions=user.number_of_questions, user_id=user.user_id, number_correct=user.number_correct ) new_quest_log_entry.save() except Exception as ex: print(ex) print("Failed to make quest log entry.")
def index(groupKey=None): if groupKey=='favicon.ico': abort(404) #세션리셋 session.clear() #ip = socket.gethostbyname(socket.gethostname()) ip = request.remote_addr #https://pypi.python.org/pypi/user-agents/ #pip install user-agents from user_agents import parse user_agent = parse(request.user_agent.string) if user_agent.is_mobile==True : platform = "mobile" else : platform = "web" accessLog = AccessLog(ip, platform) db_session.add(accessLog) try: db_session.commit() except exc.IntegrityError as e: db_session.rollback() debug = request.args.get('debug', '') email = request.args.get('email', '') resp = make_response( render_template('index.html', groupKey=groupKey) ) if email!='': resp.set_cookie('email', email) if debug!='': resp.set_cookie('debug', debug) if groupKey!=None: resp.set_cookie('groupKey', groupKey) return resp
def redirect(request, shorturl): #send a 500 error shortURL doesn't exist try: urlObject = ShortUrl.objects.get(shortid=shorturl) except: return HttpResponseServerError() #check UA of request #user_agents is awesome! (pip install pyyaml ua-parser user-agents) userAgent = parse(request.META.get('HTTP_USER_AGENT', '')) if urlObject: if userAgent.is_mobile: urlObject.mobileRedirectCount += 1 urlObject.save() return HttpResponseRedirect(urlObject.fullMobileUrl) elif userAgent.is_tablet: urlObject.tabletRedirectCount += 1 urlObject.save() return HttpResponseRedirect(urlObject.fullTabletUrl) elif userAgent.is_pc: urlObject.desktopRedirectCount += 1 urlObject.save() return HttpResponseRedirect(urlObject.fullDesktopUrl)
def index(): ua = request.headers.get('User-Agent') user_agent = parse(ua) user = g.user.nickname image = g.user.image user_id = g.user.id return render_template('index.html',title='home',user=user,image=image,id=user_id)
def verify_browser(request, min_versions=None): """ accepts a request and tags it with browser support info if `min_versions` is passed in, it skould be a dictionary of 'browser': version """ if not min_versions: min_versions = settings.MIN_BROWSER_VERSIONS request.user_agent = parse(request.META.get('HTTP_USER_AGENT', '')) request.browser_unknown = True request.browser_unsupported = False for family, version in min_versions.items(): if request.user_agent.browser.family == family: request.browser_unknown = False bversion = request.user_agent.browser.version if isinstance(bversion, (list, tuple)): if len(bversion) > 0: bversion = bversion[0] else: bversion = 0 else: bversion = bversion if bversion < version: request.browser_unsupported = True break
def publish(request, dispatcher): "initialise req from the Twisted reauest" # First we need to transform the request into our own format # our format is {key:value, .., cookies:{key:value}, request:request} req = Req() # retain multiple value args as a list req.update(dict([(i[0], len(i[1]) > 1 and i[1] or i[1][0]) for i in request.__dict__['args'].items()])) req.cookies = request.__dict__['received_cookies'] or {} # if we have the relevant modules then add user agent information if user_agents: ua_string = request.getHeader('user-agent') req.user_agent = user_agents.parse(ua_string or '') else: req.user_agent = None req.request = request # set up Session cache session = request.getSession() req.cache = ISessionCache(session) # get the domain and port req._v_domain = req.get_host().split(":")[0] # excludes port # Now process the request path = request.__dict__['path'] try: result = dispatcher.request(path, req) except: raise sys.stderr.write(DATE().time()) sys.stderr.write(path+'\n') result="request error..." return result
def get_browser(ua: str): return parse(ua).browser.family
def get_phone(ua: str): s = parse(ua).is_bot return parse(ua).device.family
def is_mobile(user_agent): user_agent = parse(user_agent) return user_agent.is_mobile
# !/usr/bin/env python # -*- coding: utf-8 -*- from user_agents import parse ua_string = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36' user_agent = parse(ua_string) bw = user_agent.browser.family #浏览器 s = user_agent.os.family #操作系统 juge_pc = user_agent.is_pc #判断是不是桌面系统 phone = user_agent.device.family print(bw, s, phone)
def ua_parse(ua): user_agent = parse(ua) return str(user_agent).split(' / ')
def user_agent_info(): return str(parse(request.headers.get('User-Agent')))
revgc.search([df.latitude[0], df.longitude[0]]) df['dow'] = df['created'].apply(lambda x: pd.to_datetime(x).weekday()) df['is_weekend'] = df['created'].apply(lambda x: 1 if pd.to_datetime(x).weekday() in (5, 6) else 0) def make_harmonic_features(value, period=24): value *= 2 * np.pi / period return np.cos(value), np.sin(value) from scipy.spatial import distance euclidean(make_harmonic_features(23), make_harmonic_features(1)) euclidean(make_harmonic_features(9), make_harmonic_features(11)) euclidean(make_harmonic_features(9), make_harmonic_features(21)) ### pip install -q pyyaml ua-parser user-agents import user_agents ua = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/56.0.2924.76 Chrome/56.0.2924.76 Safari/537.36' ua = user_agents.parse(ua) print('Is a bot? ', ua.is_bot) print('Is mobile? ', ua.is_mobile) print('Is PC? ',ua.is_pc) print('OS Family: ',ua.os.family) print('OS Version: ',ua.os.version) print('Browser Family: ',ua.browser.family) print('Browser Version: ',ua.browser.version) from sklearn.preprocessing import StandardScaler from scipy.stats import beta from scipy.stats import shapiro import numpy as np data = beta(1, 10).rvs(1000).reshape(-1, 1) shapiro(data)
def parse_useragent(x): ua_string = x user_agent = parse(ua_string) return user_agent
def __init__(self, request, placements, publisher, **kwargs): """ Initialize an ad decision based on the request data. :param request: the HttpRequest object with geo data attached from GeolocationMiddleware :param placements: possible positions for the ad to go :param kwargs: Any additional possible arguments for the backend """ self.request = request self.user_agent = parse(get_client_user_agent(request)) self.placements = placements self.publisher = publisher self.ad_types = [p["ad_type"] for p in self.placements] self.country_code = request.geo.country_code self.region_code = request.geo.region_code self.metro_code = request.geo.metro_code # Optional parameters self.keywords = kwargs.get("keywords", []) or [] requested_campaign_types = kwargs.get("campaign_types", []) or [] if not requested_campaign_types: requested_campaign_types = ALL_CAMPAIGN_TYPES # Add default keywords from publisher if self.publisher.keywords: log.debug( "Adding default keywords: publisher=%s keywords=%s", self.publisher.slug, self.publisher.keywords, ) merged_keywords = set(self.keywords) | set(self.publisher.keywords) self.keywords = list(merged_keywords) # Publishers can request certain campaign types # But only if those types are allowed by database settings self.campaign_types = [] if ( self.publisher.allow_paid_campaigns and PAID_CAMPAIGN in requested_campaign_types ): self.campaign_types.append(PAID_CAMPAIGN) if ( self.publisher.allow_affiliate_campaigns and AFFILIATE_CAMPAIGN in requested_campaign_types ): self.campaign_types.append(AFFILIATE_CAMPAIGN) if ( self.publisher.allow_community_campaigns and COMMUNITY_CAMPAIGN in requested_campaign_types ): self.campaign_types.append(COMMUNITY_CAMPAIGN) if ( self.publisher.allow_house_campaigns and HOUSE_CAMPAIGN in requested_campaign_types ): self.campaign_types.append(HOUSE_CAMPAIGN) # When set, only return a specific ad or ads from a campaign self.ad_slug = kwargs.get("ad_slug") self.campaign_slug = kwargs.get("campaign_slug")
def form_valid(self, form): obj = form.save(commit=False) if self.request.user.is_authenticated(): obj.user = self.request.user domain, created = Domain.objects.get_or_create( name=obj.domain_name.replace("www.", ""), defaults={'url': "http://" + obj.domain_name.replace("www.", "")}) obj.domain = domain if created and self.request.user.is_authenticated(): p = Points.objects.create(user=self.request.user, domain=domain, score=1) messages.success(self.request, 'Domain added! + 1') if self.request.POST.get('screenshot-hash'): reopen = default_storage.open( 'uploads\/' + self.request.POST.get('screenshot-hash') + '.png', 'rb') django_file = File(reopen) obj.screenshot.save(self.request.POST.get('screenshot-hash') + '.png', django_file, save=True) obj.user_agent = self.request.META.get('HTTP_USER_AGENT') obj.save() if self.request.user.is_authenticated(): total_issues = Issue.objects.filter(user=self.request.user).count() user_prof = UserProfile.objects.get(user=self.request.user) if total_issues <= 10: user_prof.title = 1 elif total_issues <= 50: user_prof.title = 2 elif total_issues <= 200: user_prof.title = 3 else: user_prof.title = 4 user_prof.save() if domain.github and os.environ.get("GITHUB_PASSWORD"): from giturlparse import parse from requests.auth import HTTPBasicAuth import json import requests github_url = domain.github.replace("https", "git").replace( "http", "git") + ".git" p = parse(github_url) url = 'https://api.github.com/repos/%s/%s/issues' % (p.owner, p.repo) auth = HTTPBasicAuth(os.environ.get("GITHUB_USERNAME"), os.environ.get("GITHUB_PASSWORD")) issue = { 'title': obj.description, 'body': "![0](" + obj.screenshot.url + ") http://bugheist.com/issue/" + str(obj.id), 'labels': ['bug', 'bugheist'] } r = requests.post(url, json.dumps(issue), auth=auth) response = r.json() obj.github_url = response['html_url'] obj.save() redirect_url = '/report' # redirect users to login if not self.request.user.is_authenticated(): # we store the issue id on the user session to assign it as soon as he login/register self.request.session['issue'] = obj.id self.request.session['created'] = created self.request.session['domain'] = domain.id login_url = reverse('account_login') return HttpResponseRedirect(u'{}?next={}'.format( login_url, redirect_url)) # assign issue self.process_issue(self.request.user, obj, created, domain) return HttpResponseRedirect(self.request.META.get('HTTP_REFERER'))
def lambda_handler(event, context): error_count = 0 ######################## #### Downloading the new RAW events from S3 bucket_name = event['Records'][0]['s3']['bucket']['name'] file_key = event['Records'][0]['s3']['object']['key'] logger.info('Reading {} from {}'.format(file_key, bucket_name)) s3.download_file(bucket_name, file_key, '/tmp/file.zip') ######################## #### Getting geolocation DB file geoippath = '/tmp/GeoLite2-City.mmdb' try: s3.download_file(bucket_name, 'GeoLite2-City.mmdb', '/tmp/GeoLite2-City.mmdb') except: url = "https://geolite.maxmind.com/download/geoip/database/GeoLite2-City.tar.gz" # This link no longer works. TODO: Check alternatives response = get(url) with open('/tmp/GeoLite2-City.tar.gz', 'wb') as file: file.write(response.content) geofilename = re.compile("GeoLite2-City.mmdb") tar = tarfile.open("/tmp/GeoLite2-City.tar.gz") for member in tar.getmembers(): if geofilename.search(member.name): geoippath = '/tmp/' + member.name tar.extract(member, path='/tmp/') tar.close() s3.upload_file(geoippath, bucket_name, 'GeoLite2-City.mmdb') ######################## #### Getting column names for all tables in atomic schema conn = psycopg2.connect(host=os.environ['POSTGRES_HOST'], database=os.environ['POSTGRES_DATABASE'], user=os.environ['POSTGRES_USER'], password=os.environ['POSTGRES_PASSWORD']) sql = "SELECT t.table_schema, t.table_name, c.column_name FROM information_schema.tables t JOIN INFORMATION_SCHEMA.COLUMNS c ON c.table_name = t.table_name WHERE t.table_schema='atomic' ORDER BY t.table_name, c.ordinal_position ;" cur = conn.cursor() cur.execute(sql) table_list = cur.fetchall() cur.close() conn.close() table_columns = {} for e in table_list: if not e[1] in table_columns.keys(): table_columns[e[1]] = [] table_columns[e[1]].append(e[2]) ######################## #### Loading raw events content archgz = gzip.open('/tmp/file.zip') file_content = archgz.read() lines = file_content.split(b'\n') ######################## #### Processing entries and storing them header = re.search('#Fields: (.*)', lines[1].decode("utf-8")) header = header.group(1).split() tprint(file_key, "Processing and enriching raw entries") try: datvalues = "" all_events = [] geoipdbreader = geoip2.database.Reader(geoippath) i = 0 for l in lines[2:-1]: r = re.compile(r'([^\t]*)\t*') l = r.findall(l.decode("utf-8"))[:-1] collector_tstamp = l[0] + ' ' + l[1] refersplitter = re.compile(r'([^/]*)/*') referer = refersplitter.findall(l[9])[:-1] refr_urlscheme = referer[0][:-1] try: refr_urlhost = referer[1] except: refr_urlhost = '-' try: refr_urlpath = '/' + '/'.join(referer[2:]) except: refr_urlpath = '-' querysplitter = re.compile(r'([^\?]*)\?*') qryurl = querysplitter.findall(referer[-1])[:-1] try: refr_urlquery = qryurl[1] except IndexError: refr_urlquery = '-' userag = l[10].replace("%2520", " ") useragent = userag userag = parse(userag) br_name = userag.browser.family + ' ' + userag.browser.version_string br_family = userag.browser.family br_version = userag.browser.version os_family = userag.os.family dvce_type = userag.device.family dvce_ismobile = userag.is_mobile user_ipaddress = l[4] #### We determine geolocation info based on user IP. #### Set to NULL if no info available on DB try: geoipdbresult = geoipdbreader.city(l[4]) geo_country = geoipdbresult.registered_country.iso_code if geo_country is None: geo_country = '' try: geo_city = geoipdbresult.city.names['en'] except: geo_city = '-' geo_zipcode = geoipdbresult.postal.code geo_latitude = geoipdbresult.location.latitude geo_longitude = geoipdbresult.location.longitude try: geo_region_name = geoipdbresult.subdivisions[0].names['en'] except: geo_region_name = '-' geo_timezone = geoipdbresult.location.time_zone except: geo_country = '' geo_city = '' geo_zipcode = '' geo_latitude = '' geo_longitude = '' geo_region_name = '' geo_timezone = '' # In the rare case latitudes and longitudes are set to None, we reset them to '' (later NULL) to avoid insertion errors if geo_latitude is None: geo_latitude = '' if geo_longitude is None: geo_longitude = '' urisplt = re.compile(r'([^&]*)&*') urispltnodes = urisplt.findall(l[11])[:-1] user_ipaddress = hashlib.sha224(user_ipaddress.encode( 'utf-8')).hexdigest() # We store the IP as a hash for privacy spvalues = { 'app_id': '-', 'platform': '-', 'collector_tstamp': collector_tstamp, 'dvce_created_tstamp': '-', 'event': '-', 'event_id': '-', 'txn_id': '-', 'name_tracker': '-', 'v_tracker': '-', 'user_id': '-', 'user_ipaddress': user_ipaddress, 'user_fingerprint': '-', 'domain_userid': '-', 'domain_sessionidx': '-', 'network_userid': '-', 'geo_country': geo_country, 'geo_city': geo_city, 'geo_zipcode': geo_zipcode, 'geo_latitude': geo_latitude, 'geo_longitude': geo_longitude, 'geo_region_name': geo_region_name, 'page_url': '-', 'page_title': '-', 'page_referrer': '-', 'refr_urlscheme': refr_urlscheme, 'refr_urlhost': refr_urlhost, 'refr_urlpath': refr_urlpath, 'refr_urlquery': refr_urlquery, 'se_category': '-', 'se_action': '-', 'se_label': '-', 'se_property': '-', 'se_value': '-', 'unstruct_event': '-', 'tr_orderid': '-', 'tr_affiliation': '-', 'tr_total': '-', 'tr_tax': '-', 'tr_shipping': '-', 'tr_city': '-', 'tr_state': '-', 'tr_country': '-', 'ti_orderid': '-', 'ti_sku': '-', 'ti_name': '-', 'ti_category': '-', 'ti_price': '-', 'ti_quantity': '-', 'pp_xoffset_min': '-', 'pp_xoffset_max': '-', 'pp_yoffset_min': '-', 'pp_yoffset_max': '-', 'useragent': unquote(unquote(useragent)), 'br_name': br_name, 'br_family': br_family, 'br_version': br_version, 'br_lang': '-', 'br_features_pdf': '-', 'br_features_flash': '-', 'br_features_java': '-', 'br_features_director': '-', 'br_features_quicktime': '-', 'br_features_realplayer': '-', 'br_features_windowsmedia': '-', 'br_features_gears': '-', 'br_features_silverlight': '-', 'br_cookies': '-', 'br_colordepth': '-', 'br_viewwidth': '-', 'br_viewheight': '-', 'os_family': os_family, 'os_timezone': '-', 'dvce_type': dvce_type, 'dvce_ismobile': dvce_ismobile, 'dvce_screenwidth': '-', 'dvce_screenheight': '-', 'doc_charset': '-', 'doc_width': '-', 'doc_height': '-', 'tr_currency': '-', 'ti_currency': '-', 'geo_timezone': geo_timezone, 'dvce_sent_tstamp': '-', 'domain_sessionid': '-', 'event_vendor': '-' } if len(urispltnodes[0]) > 3: for spparams in urispltnodes: spsplitter = re.compile(r'([^=]*)=*') sp = spsplitter.findall(spparams)[:-1] if sp[0] == 'stm': spvalues['dvce_sent_tstamp'] = sp[1] if sp[0] == 'e': spvalues['event'] = sp[1] if sp[0] == 'url': spvalues['page_url'] = unquote(unquote(sp[1])) if sp[0] == 'page': spvalues['page_title'] = sp[1] if sp[0] == 'pp_mix': spvalues['pp_xoffset_min'] = sp[1] if sp[0] == 'pp_max': spvalues['pp_xoffset_max'] = sp[1] if sp[0] == 'pp_miy': spvalues['pp_yoffset_min'] = sp[1] if sp[0] == 'pp_may': spvalues['pp_yoffset_max'] = sp[1] if sp[0] == 'tv': spvalues['v_tracker'] = sp[1] if sp[0] == 'tna': spvalues['name_tracker'] = sp[1] if sp[0] == 'aid': spvalues['app_id'] = sp[1] if sp[0] == 'p': spvalues['platform'] = sp[1] if sp[0] == 'tz': spvalues['os_timezone'] = unquote(unquote(sp[1])) if sp[0] == 'lang': spvalues['br_lang'] = sp[1] if sp[0] == 'cs': spvalues['doc_charset'] = sp[1] if sp[0] == 'f_pdf': spvalues['br_features_pdf'] = sp[1] if sp[0] == 'f_qt': spvalues['br_features_quicktime'] = sp[1] if sp[0] == 'f_realp': spvalues['br_features_realplayer'] = sp[1] if sp[0] == 'f_wma': spvalues['br_features_windowsmedia'] = sp[1] if sp[0] == 'f_dir': spvalues['br_features_director'] = sp[1] if sp[0] == 'f_fla': spvalues['br_features_flash'] = sp[1] if sp[0] == 'f_java': spvalues['br_features_java'] = sp[1] if sp[0] == 'f_gears': spvalues['br_features_gears'] = sp[1] if sp[0] == 'f_ag': spvalues['br_features_silverlight'] = sp[1] if sp[0] == 'res': ressplitter = re.compile(r'([^x]*)x*') res = ressplitter.findall(sp[1])[:-1] spvalues['dvce_screenheight'] = res[1] spvalues['dvce_screenwidth'] = res[0] continue if sp[0] == 'cd': spvalues['br_colordepth'] = sp[1] if sp[0] == 'cookie': spvalues['br_cookies'] = sp[1] if sp[0] == 'eid': spvalues['event_id'] = sp[1] if sp[0] == 'dtm': spvalues['dvce_created_tstamp'] = sp[1] if sp[0] == 'vp': ressplitter = re.compile(r'([^x]*)x*') brdim = ressplitter.findall(sp[1])[:-1] spvalues['br_viewwidth'] = brdim[1] spvalues['br_viewheight'] = brdim[0] continue if sp[0] == 'ds': ressplitter = re.compile(r'([^x]*)x*') docdim = ressplitter.findall(sp[1])[:-1] spvalues['doc_width'] = docdim[1] spvalues['doc_height'] = docdim[0] continue if sp[0] == 'vid': spvalues['domain_sessionidx'] = sp[1] if sp[0] == 'sid': spvalues['domain_sessionid'] = sp[1] if sp[0] == 'duid': spvalues['domain_userid'] = sp[1] if sp[0] == 'fp': spvalues['user_fingerprint'] = sp[1] if sp[0] == 'ue_px': spvalues['unstruct_event'] = sp[1] if sp[0] == 'refr': spvalues['page_referrer'] = unquote(unquote(sp[1])) if sp[0] == 'tid': spvalues['txn_id'] = sp[1] if sp[0] == 'uid': spvalues['user_id'] = sp[1] if (sp[0] == 'nuid') or (sp[0] == 'tnuid'): spvalues['network_userid'] = sp[1] if sp[0] == 'se_ca': spvalues['se_category'] = sp[1] if sp[0] == 'se_ac': spvalues['se_action'] = sp[1] if sp[0] == 'se_la': spvalues['se_label'] = sp[1] if sp[0] == 'se_pr': spvalues['se_property'] = sp[1] if sp[0] == 'se_va': spvalues['se_value'] = sp[1] if sp[0] == 'tr_id': spvalues['tr_orderid'] = sp[1] if sp[0] == 'tr_af': spvalues['tr_affiliation'] = sp[1] if sp[0] == 'tr_tt': spvalues['tr_total'] = sp[1] if sp[0] == 'tr_tx': spvalues['tr_tax'] = sp[1] if sp[0] == 'tr_sh': spvalues['tr_shipping'] = sp[1] if sp[0] == 'tr_ci': spvalues['tr_city'] = sp[1] if sp[0] == 'tr_st': spvalues['tr_state'] = sp[1] if sp[0] == 'tr_co': spvalues['tr_country'] = sp[1] if sp[0] == 'ti_id': spvalues['ti_orderid'] = sp[1] if sp[0] == 'ti_sk': spvalues['ti_sku'] = sp[1] if sp[0] == 'ti_na': spvalues['ti_name'] = sp[1] if sp[0] == 'ti_ca': spvalues['ti_category'] = sp[1] if sp[0] == 'ti_pr': spvalues['ti_price'] = sp[1] if sp[0] == 'ti_qu': spvalues['ti_quantity'] = sp[1] if sp[0] == 'tr_cu': spvalues['tr_currency'] = sp[1] if sp[0] == 'ti_cu': spvalues['ti_currency'] = sp[1] if sp[0] == 'evn': spvalues['event_vendor'] = sp[1] if sp[0] == 'ue_pr': spvalues['unstruct_event_unencoded'] = sp[1] if sp[0] == 'cx': spvalues['context'] = sp[1] # new_line = '' # for key,val in spvalues.items(): # new_line += str(val) + '\t' # datvalues += new_line + '\n' all_events.append(spvalues) i += 1 except Exception as e: tprint(file_key, "Error: " + str(e)) error_count += 1 tprint(file_key, "Processed " + str(i) + " entries") ######################## #### Sorting events by destination and storing corresponding CSV files j = 0 csvs = {} ## Dictionary to store all CSVs tprint(file_key, "Sorting events per destination and storing to CSV") for spvalues in all_events: try: unstruct_event_bool = False context_present = False custom_schema_str = '' j += 1 for key, val in copy.deepcopy(spvalues).items(): if val == '-' or val == (): del spvalues[key] if 'dvce_created_tstamp' in spvalues: try: spvalues[ 'dvce_created_tstamp'] = datetime.datetime.fromtimestamp( int(spvalues['dvce_created_tstamp']) / 1000).strftime('%Y-%m-%d %H:%M:%S') except: pass if 'dvce_sent_tstamp' in spvalues: try: spvalues[ 'dvce_sent_tstamp'] = datetime.datetime.fromtimestamp( int(spvalues['dvce_sent_tstamp']) / 1000).strftime('%Y-%m-%d %H:%M:%S') except: pass if 'unstruct_event' in spvalues: unstruct_event_bool = True # decode from base64 and parse into dictionary params = base64.urlsafe_b64decode(spvalues['unstruct_event'] + '===').decode("utf-8") unstruct_event = json.loads(params) del spvalues['unstruct_event'] elif 'unstruct_event_unencoded' in spvalues: # parse into dictionary unstruct_event_bool = True params = urllib.parse.unquote( urllib.parse.unquote(spvalues['unstruct_event_unencoded'])) unstruct_event = json.loads(params) del spvalues['unstruct_event_unencoded'] # assign context to a variable if 'context' in spvalues: context_present = True # decode from base64 and parse into dictionary context_decoded = base64.urlsafe_b64decode( spvalues['context'] + '===').decode("utf-8") context = json.loads(context_decoded) del spvalues['context'] ## In any event we store an atomic.events entry columns_names = list(spvalues.keys()) columns_names_str = ', '.join(columns_names) binds_str = ', '.join('%s' for _ in range(len(columns_names))) values = [spvalues[column_name] for column_name in columns_names] ### Generating CSV for the atomic event event_new_line = '' for column in table_columns['events']: if column in spvalues.keys(): event_new_line += str(spvalues[column]).replace( "'", r"\'") + '\t' else: event_new_line += '\t' event_new_line = re.sub('\t$', '\n', event_new_line) if "events" not in csvs.keys(): csvs["events"] = "" csvs["events"] += event_new_line if unstruct_event_bool: unstruct_event['data']['data']['root_id'] = spvalues[ 'event_id'] # define the corresponding schema name if re.search(r'achievement_gui_interaction', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_achievement_gui_interaction_1' if re.search(r'achievement_unlocked', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_achievement_unlocked_1' if re.search(r'email_click', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_email_click_1' if re.search(r'email_opened', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_email_opened_1' if re.search(r'email_sent', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_email_sent_1' if re.search(r'landing_from_email', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_landing_from_email_1' if re.search(r'user_creation', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_user_creation_1' if re.search(r'blockchain_account_creation', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_blockchain_account_creation_1' if re.search(r'user_new_identity', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_user_new_identity_1' if re.search(r'link_click', unstruct_event['data']['schema']): custom_schema_str = 'com_snowplowanalytics_snowplow_link_click_1' # convert camel snake fields to snake case for key in copy.deepcopy( unstruct_event['data']['data']).keys(): newKey = camel_to_snake(key) unstruct_event['data']['data'][ newKey] = unstruct_event['data']['data'].pop(key) if re.search(r'stream_watch', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_stream_watch_1' if re.search(r'new_creator_account', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_new_creator_account_1' if re.search(r'stream_session_started', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_stream_session_started_1' if re.search(r'stream_session_ended', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_stream_session_ended_1' if re.search(r'challenge_sent_to_live_channel', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_challenge_sent_to_live_channel_1' if re.search(r'cvp_challenge_started', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_cvp_challenge_started_1' if re.search(r'cvp_challenge_joined_by_player', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_cvp_challenge_joined_by_player_1' if re.search(r'cvp_challenge_resolved', unstruct_event['data']['schema']): custom_schema_str = 'io_azarus_cvp_challenge_resolved_1' if len(custom_schema_str) > 0: unstruct_event_data = flatten( unstruct_event['data']['data']) columns_names_custom = list(unstruct_event_data.keys()) columns_names_custom_str = ', '.join( '"{0}"'.format(c) for c in columns_names_custom) binds_custom_str = ', '.join( '%s' for _ in range(len(columns_names_custom))) values_custom = [ unstruct_event_data[column_name_custom] for column_name_custom in columns_names_custom ] ##### Generating a CSV file for the corresponding custom event custom_event_new_line = '' if custom_schema_str in table_columns.keys(): for column in table_columns[custom_schema_str]: if column in unstruct_event_data.keys(): custom_event_new_line += str( unstruct_event_data[column]).replace( "'", r"\'") + '\t' else: custom_event_new_line += '\t' custom_event_new_line = re.sub('\t$', '\n', custom_event_new_line) if custom_schema_str not in csvs.keys(): csvs[custom_schema_str] = "" csvs[custom_schema_str] += custom_event_new_line else: tprint( file_key, "ERROR " + str(custom_schema_str) + " not in table columns") # process context and prepare sql if custom context is found custom_cx_sqls = [] if context_present: # iterate over all contexts and check for custom ones for cx in context['data']: custom_cx_schema_str = '' # define the corresponding custom context schema name if re.search(r'twitch_user_context', cx['schema']): custom_cx_schema_str = 'io_azarus_twitch_user_context_1' if len(custom_cx_schema_str) > 0: cx['data']['root_id'] = spvalues['event_id'] custom_cx_data = flatten(cx['data']) columns_names_custom_cx = list(custom_cx_data.keys()) columns_names_custom_cx_str = ', '.join( '"{0}"'.format(c) for c in columns_names_custom_cx) binds_cx_custom_str = ', '.join( '%s' for _ in range(len(columns_names_custom_cx))) values_custom_cx = [ custom_cx_data[column_name_custom_cx] for column_name_custom_cx in columns_names_custom_cx ] ### Generating CSV for the corresponding context events context_event_new_line = '' if custom_cx_schema_str in table_columns.keys(): for column in table_columns[custom_cx_schema_str]: if column in custom_cx_data.keys(): context_event_new_line += str( custom_cx_data[column]).replace( "'", r"\'") + '\t' else: context_event_new_line += '\t' context_event_new_line = re.sub( '\t$', '\n', context_event_new_line) if custom_cx_schema_str not in csvs.keys(): csvs[custom_cx_schema_str] = "" csvs[ custom_cx_schema_str] += context_event_new_line else: tprint( file_key, "ERROR " + str(custom_cx_schema_str) + " not in table columns") except Exception as e: tprint( file_key, "EventError. One event was not processed due to the following error: " + str(e)) ######################## #### Sorting events by destination and storing corresponding CSV files conn = psycopg2.connect(host=os.environ['POSTGRES_HOST'], database=os.environ['POSTGRES_DATABASE'], user=os.environ['POSTGRES_USER'], password=os.environ['POSTGRES_PASSWORD']) ######################## #### Inserting CSV contents to corresponding tables tprint(file_key, "Inserting events into corresponding table") insertion_error = False if len(csvs.keys()): for k in csvs.keys(): tprint(file_key, "Events " + str(k)) inserts_file_name = "/tmp/" + k + "_data_" + str( hashlib.sha224(csvs[k].encode('utf-8')).hexdigest()) + '.tsv' try: f = open(inserts_file_name, "w") f.write(csvs[k]) f.close() ### INSERTING cur = conn.cursor() cur.copy_from(open(inserts_file_name, 'r'), "atomic." + str(k), null='', sep='\t') except Exception as e: tprint( file_key, "Insertion failed for table " + str(k) + ". Error : " + str(e)) error_count += 1 insertion_error = True if not insertion_error: conn.commit() else: conn.rollback() cur.close() conn.close() ######################## #### Dumping enriched CSV file to S3 # tprint(file_key, "Writing enriched data to S3") # try: # if len(urispltnodes[0]) > 5: # gz_body = BytesIO() # gz = gzip.GzipFile(None, 'wb', 9, gz_body) # gz.write(datvalues.encode('utf-8')) # gz.close() # s3.put_object(Bucket=bucket_name, Key=file_key.replace("RAW", "Converted"), ContentType='text/plain', ContentEncoding='gzip', Body=gz_body.getvalue()) # except Exception as e: # tprint(file_key, "Error: " + str(e)) # error_count += 1 ######################## #### Writing log file to keep track of processed files if not insertion_error: tprint(file_key, "Writing log file to S3") try: if len(urispltnodes[0]) > 5: gz_body = BytesIO() gz = gzip.GzipFile(None, 'wb', 9, gz_body) gz.write("".encode('utf-8')) gz.close() s3.put_object(Bucket=bucket_name, Key=file_key.replace("RAW", "Processed"), ContentType='text/plain', ContentEncoding='gzip', Body=gz_body.getvalue()) except Exception as e: tprint(file_key, "Error: " + str(e)) error_count += 1 if error_count: tprint(file_key, "NbErrors: " + str(error_count))
def parse_user_agent(): engine = snowflake_engine_factory(os.environ, "TRANSFORMER", "util") """ This function searches for and parses all available user agents received via telemetry data that are not currently in the analytics.mattermost.user_agent_registry table.""" # CREATE USER_AGENT_REGISTRY IF IT DOES NOT ALREADY EXIST. query = f""" CREATE TABLE IF NOT EXISTS analytics.WEB.user_agent_registry ( context_useragent VARCHAR, browser VARCHAR, browser_version VARCHAR, operating_system VARCHAR, os_version VARCHAR, device_type VARCHAR, device_brand VARCHAR, device_model VARCHAR );""" execute_query(engine, query) # UNION ALL SOURCES OF CONTEXT_USERAGENT DATA THAT ARE NOT CURRENTLY IN THE USER_AGENT_REGISTRY TABLE. query = f""" SELECT * FROM ( SELECT CONTEXT_USER_AGENT AS CONTEXT_USERAGENT FROM RAW.MATTERMOST2.EVENT WHERE CONTEXT_USER_AGENT IS NOT NULL AND CONTEXT_USER_AGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1) AND TIMESTAMP >= '2020-05-20' GROUP BY 1 UNION ALL SELECT CONTEXT_USERAGENT FROM RAW.MM_TELEMETRY_PROD.EVENT WHERE CONTEXT_USERAGENT IS NOT NULL AND CONTEXT_USERAGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1) AND TIMESTAMP >= '2020-05-20' GROUP BY 1 UNION ALL SELECT USERAGENT AS CONTEXT_USERAGENT FROM RAW.RELEASES.LOG_ENTRIES WHERE USERAGENT IS NOT NULL AND USERAGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1) AND LOGDATE::date >= '2020-05-19' GROUP BY 1 UNION ALL SELECT USERAGENT AS CONTEXT_USERAGENT FROM RAW.DIAGNOSTICS.LOG_ENTRIES WHERE USERAGENT IS NOT NULL AND USERAGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1) AND LOGDATE::date >= '2020-05-19' GROUP BY 1 UNION ALL SELECT CONTEXT_USERAGENT FROM raw.mattermostcom.pages WHERE CONTEXT_USERAGENT IS NOT NULL AND CONTEXT_USERAGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1) AND TIMESTAMP >= '2020-05-20' GROUP BY 1 ) WHERE CONTEXT_USERAGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1) GROUP BY 1; """ df = execute_dataframe(engine, query) if ( len(df) == 0 ): # CHECKS TO SEE IF THERE ARE ANY NEW CONTEXT_USERAGENTS TO INSERT INTO THE TABLE print("Nothing to do.") else: # PARSES USERAGENT COMPONENTS AND APPENDS EACH COMPONENT AS A COLUMN TO THE EXISTING DATAFRAME. browser = [] browser_family = [] browser_version = [] browser_version_string = [] operating_system = [] os_family = [] os_version = [] os_version_string = [] device = [] device_family = [] device_brand = [] device_model = [] for index, row in df.iterrows(): ua_string = row["CONTEXT_USERAGENT"] user_agent = parse(ua_string) browser.append(user_agent.browser) browser_family.append(user_agent.browser.family) browser_version.append(user_agent.browser.version) browser_version_string.append(user_agent.browser.version_string) # Accessing user agent's operating system properties operating_system.append(user_agent.os) os_family.append(user_agent.os.family) os_version.append(user_agent.os.version) os_version_string.append(user_agent.os.version_string) # Accessing user agent's device properties device.append(user_agent.device) device_family.append(user_agent.device.family) device_brand.append(user_agent.device.brand) device_model.append(user_agent.device.model) browser = pd.Series(browser_family, name="browser") browser_version = pd.Series(browser_version_string, name="browser_version") op_sys = pd.Series(os_family, name="operating_system") os_version = pd.Series(os_version_string, name="os_version") device_type = pd.Series(device_family, name="device_type") device_brand = pd.Series(device_brand, name="device_brand") device_model = pd.Series(device_model, name="device_model") agent_lists = [ browser, browser_version, op_sys, os_version, device_type, device_brand, device_model, ] for item in agent_lists: df = df.join(item) connection = engine.connect() # 16,384 is Snowflake Insert statement row limit. To ensure the job executes successfully we use the below code to check that the data being inserted # is not more than the allowed row limit. If it is, we incrementally load the dataframe. df[0:16384 if len(df) > 16384 else len(df)].to_sql( "user_agent_registry", con=connection, index=False, schema="WEB", if_exists="append", ) i = 2 # The default number of times to increment. Will autoincrement if more than 2 inserts are required. if i <= math.ceil(len(df) / 16384): x = 16384 # The start row of the dataframe slice to be inserted. Will autoincrement if more than 2 inserts are required. y = ( 16384 * 2 ) # The end row of the dataframe slice to be inserted. Will autoincrement if more than 2 inserts are required. # Loops through the remaining insert statements required to finish the job i.e. load all new user agents found in the mattermostcom.pages table. for n in range(math.ceil(len(df) / 16384) - 1): df[x:y if y < len(df) else len(df)].to_sql( "user_agent_registry", con=connection, index=False, schema="WEB", if_exists="append", ) x = y y += 16384 i += 1 return print( f"""Successfully uploaded {len(df)} records to mattermost.user_agent_registry!""" )
def get_user_agent(request, max_length=200): """Return user agent for request.""" uaobj = user_agents.parse( force_text(request.META.get('HTTP_USER_AGENT', ''), errors='replace')) return force_text(uaobj)[:max_length]
def hello(): user_agent = parse(request.headers.get('User-Agent')) qmobile = user_agent.is_mobile return render_template("index.html", qmobile=qmobile)
def main(): # Lb type picker lb_type = st.sidebar.radio( "LB Type", ( "AWS Classic", "AWS Application", ), ) st.markdown( "<h1 style='text-align: center;'>AWS LB Log Story</h1>", unsafe_allow_html=True ) uploaded_file = st.file_uploader("Upload here you AWS LB Log.") # if it has a file it will call the parse function and show the chart if uploaded_file is not None: if lb_type == "AWS Classic": df = parse_clb_log_file(uploaded_file.read().decode("utf-8")) else: df = parse_alb_log_file(uploaded_file.read().decode("utf-8")) # it it's an empty file or not compatible it whould thrown an error if df.empty: st.error("Invalid Format or empty file") # otherwise it will show the charts else: # Transformations # creating a new column with the user agent definition (refer to function below) df["ua"] = df.apply(user_agent_definition, axis=1) # this column will get just the browser from the user agent field df["browser"] = df.apply( lambda x: parse(x.user_agent).browser.family, axis=1 ) # this column will get just the device from the user agent field df["device"] = df.apply(lambda x: parse(x.user_agent).device.family, axis=1) # this column will get just the os from the user agent field df["os"] = df.apply(lambda x: parse(x.user_agent).os.family, axis=1) # Plot bots vs Devices # TODO: pull the slice wich contains the bots st.header(f"Bot vs Devices") fig_ua = px.pie( df, names="ua", color_discrete_sequence=px.colors.sequential.RdBu ) st.plotly_chart(fig_ua, use_container_width=True) # Plot Browsers st.header(f"Browser") fig_browser = px.pie( df, names="browser", color_discrete_sequence=px.colors.sequential.RdBu, ) st.plotly_chart(fig_browser, use_container_width=True) # Plot Devices st.header(f"Devices") fig_device = px.pie( df, names="device", color_discrete_sequence=px.colors.sequential.RdBu, ) st.plotly_chart(fig_device, use_container_width=True) # Plot OS st.header(f"OS") fig_os = px.pie( df, names="os", color_discrete_sequence=px.colors.sequential.RdBu ) st.plotly_chart(fig_os, use_container_width=True) # AWS CLB and ALB have different fiedls for backend code, so I'm adding this if to not have any error if lb_type == "AWS Classic": # Backend Response st.header(f"Backend Response Codes") fig_brc = px.pie( df, names="backend_response_code", color_discrete_sequence=px.colors.sequential.RdBu, ) st.plotly_chart(fig_brc, use_container_width=True) # LB Response Code st.header(f"LB Response Codes") fig_erc = px.pie( df, names="elb_response_code", color_discrete_sequence=px.colors.sequential.RdBu, ) st.plotly_chart(fig_erc, use_container_width=True)
def handle(self, *args, **options): logger.info('Starting the processing of raw tracks') t0 = time.time() total_raw_trackers_analysed = 0 total_beat_trackers_analysed = 0 admins_warned = False running_time = 0 while True: t1 = time.time() raw_trackers = RawTracker.objects.filter(processed=False) total_raw_trackers_analysed += raw_trackers.count() for raw_tracker in raw_trackers: # Let's verify account: try: profile = Profile.objects.get(account_id=raw_tracker.account_id) except Profile.DoesNotExist: raw_tracker.ip = None raw_tracker.wrong_account_id = True raw_tracker.processed = True raw_tracker.save() continue parsed_url = urlparse(raw_tracker.url) queries = QueryDict(parsed_url.query, mutable=False) website_url = normalize_website(parsed_url.hostname) page = parsed_url.path if not page: page = '/' utm_source = queries.get('utm_source') try: website = Website.objects.get(website_url=website_url) except Website.DoesNotExist: raw_tracker.ip = None raw_tracker.website_does_not_exist = True raw_tracker.processed = True raw_tracker.save() continue if website.owner != profile.user: raw_tracker.ip = None raw_tracker.wrong_owner = True raw_tracker.processed = True raw_tracker.save() referrer_url = None referrer_page = '/' if raw_tracker.referrer: parsed_referrer = urlparse(raw_tracker.referrer) referrer_url = normalize_referrer(normalize_website(parsed_referrer.hostname)) if 'google' in referrer_url: referrer_url = 'Google' if 'bing' in referrer_url: referrer_url = 'Bing' referrer_page = parsed_referrer.path tracker = Tracker.objects.create( url=website_url, page=page, website=website, referrer_url=referrer_url, referrer_page=referrer_page, timestamp=raw_tracker.timestamp, utm_source=utm_source, raw_tracker=raw_tracker, ) type_device = None if not raw_tracker.dnt: try: user_agent = parse(raw_tracker.user_agent) except: logger.error('Problem parsing user agent Raw Tracker {}'.format(raw_tracker.id)) user_agent = None type_device = Tracker.UNKNOWN if user_agent: operating_system = user_agent.os.family device_family = user_agent.device.family browser = user_agent.browser.family if user_agent.is_mobile: type_device = Tracker.MOBILE elif user_agent.is_tablet: type_device = Tracker.TABLET elif user_agent.is_pc: type_device = Tracker.PC elif user_agent.is_bot: type_device = Tracker.BOT else: type_device = Tracker.UNKNOWN tracker.operating_system = operating_system tracker.device_family = device_family tracker.browser = browser tracker.type_device = type_device tracker.screen_height = raw_tracker.screen_height tracker.screen_width = raw_tracker.screen_width tracker.save() if profile.can_geolocation and not type_device == Tracker.BOT: if raw_tracker.ip: geo = GeoIP2() try: location_data = geo.city(raw_tracker.ip) tracker.country = location_data.get('country_code', '') or '' tracker.region = location_data.get('region', '') or '' except: pass raw_tracker.ip = None tracker.save() raw_tracker.processed = True raw_tracker.save() beats = BeatTracker.objects.filter(processed=False) qs = beats.values('raw_tracker').annotate(Count('pk')) for beat_tracker in qs: try: tracker = Tracker.objects.get(raw_tracker__id=beat_tracker['raw_tracker']) tracker.session_length += 20*beat_tracker['pk__count'] tracker.save() total_beat_trackers_analysed += beat_tracker['pk__count'] except Tracker.DoesNotExist: logger.warning(f"Processing beat for a non existing tracker: (id: {beat_tracker['raw_tracker']}) ") beats.update(processed=True) t2 = time.time() running_time += t2-t1 # When it finishes one loop, check for the already available Raw Trackers # If there are already more than when it started, it is a problem, we are lagging behind new_raw_trackers = RawTracker.objects.filter(processed=False).count() if new_raw_trackers > raw_trackers.count(): logger.warning("The raw tracker is lagging behind") if not admins_warned: message = """ Dear admin, The raw tracker process task has just analysed {} Raw Tracks in {} seconds but there are already {} new tracks to analyse. The taks is running behind and something has to be done. """.format(raw_trackers.count(), t2-t1, new_raw_trackers) subject = "[WARNING] Raw tracks processing lagging behind" try: mail_admins(subject, message, fail_silently=False) admins_warned = True except: logger.error('Failed sending warning e-mail to admins') if t2-t0 > 12*60*60: # Log the statistics every 12 hours logger.info('Processed {} tracks in {}s at a rate of {}tracks/s'\ .format(total_raw_trackers_analysed, running_time, total_raw_trackers_analysed/running_time)) running_time = 0 total_raw_trackers_analysed = 0 t0 = time.time() if t2-t1 < 30: time.sleep(60*30-(t2-t1)) # It sleeps for what is left of the 30 minutes else: logger.error('Processing the batch takes longer than 30 minutes') time.sleep(5) # Only sleep 5 seconds and continue. This is done just to release the CPU.
def ingress_request(service_uuid, tracker, time, payload, ip, location, user_agent, dnt=False, identifier=""): try: service = Service.objects.get(pk=service_uuid, status=Service.ACTIVE) log.debug(f"Linked to service {service}") if dnt and service.respect_dnt: return ip_data = _geoip2_lookup(ip) log.debug(f"Found geoip2 data") # Validate payload if payload.get("loadTime", 1) <= 0: payload["loadTime"] = None # Create or update session session = ( Session.objects.filter( service=service, last_seen__gt=timezone.now() - timezone.timedelta(minutes=10), ip=ip, user_agent=user_agent, ).first() # We used to check for identifiers, but that can cause issues when people # re-open the page in a new tab, for example. It's better to match sessions # solely based on IP and user agent. ) if session is None: log.debug("Cannot link to existing session; creating a new one...") ua = user_agents.parse(user_agent) initial = True device_type = "OTHER" if (ua.is_bot or (ua.browser.family or "").strip().lower() == "googlebot" or (ua.device.family or ua.device.model or "").strip().lower() == "spider"): device_type = "ROBOT" elif ua.is_mobile: device_type = "PHONE" elif ua.is_tablet: device_type = "TABLET" elif ua.is_pc: device_type = "DESKTOP" session = Session.objects.create( service=service, ip=ip, user_agent=user_agent, identifier=identifier.strip(), browser=ua.browser.family or "", device=ua.device.family or ua.device.model or "", device_type=device_type, os=ua.os.family or "", asn=ip_data.get("asn", ""), country=ip_data.get("country", ""), longitude=ip_data.get("longitude"), latitude=ip_data.get("latitude"), time_zone=ip_data.get("time_zone", ""), ) else: log.debug("Updating old session with new data...") initial = False # Update last seen time session.last_seen = timezone.now() if session.identifier == "" and identifier.strip() != "": session.identifier = identifier.strip() session.save() # Create or update hit idempotency = payload.get("idempotency") idempotency_path = f"hit_idempotency_{idempotency}" hit = None if idempotency is not None: if cache.get(idempotency_path) is not None: cache.touch(idempotency_path, 10 * 60) hit = Hit.objects.filter(pk=cache.get(idempotency_path), session=session).first() if hit is not None: # There is an existing hit with an identical idempotency key. That means # this is a heartbeat. log.debug( "Hit is a heartbeat; updating old hit with new data..." ) hit.heartbeats += 1 hit.last_seen = timezone.now() hit.save() if hit is None: log.debug("Hit is a page load; creating new hit...") # There is no existing hit; create a new one hit = Hit.objects.create( session=session, initial=initial, tracker=tracker, # At first, location is given by the HTTP referrer. Some browsers # will send the source of the script, however, so we allow JS payloads # to include the location. location=payload.get("location", location), referrer=payload.get("referrer", ""), load_time=payload.get("loadTime"), ) # Set idempotency (if applicable) if idempotency is not None: cache.set(idempotency_path, hit.pk, timeout=10 * 60) except Exception as e: log.exception(e) raise e
def group_by_device_type(uas_list): '''group user agent by device type, only "desktop", "mobile", "tablet" are supported''' ud = { 'desktop': { 'chrome': dict(), 'safari': dict(), 'firefox': dict(), 'opera': dict(), 'ie': dict() }, 'mobile': { 'chrome': dict(), 'safari': dict(), 'firefox': dict(), 'opera': dict(), 'ie': dict() }, 'tablet': { 'chrome': dict(), 'safari': dict(), 'firefox': dict(), 'opera': dict(), 'ie': dict() }, } for ua in uas_list: parsed_ua = user_agents.parse(ua) os_family = parsed_ua.os.family if parsed_ua.is_mobile: device_dict = ud['mobile'] elif parsed_ua.is_tablet: device_dict = ud['tablet'] elif parsed_ua.is_pc: device_dict = ud['desktop'] else: logger.warn( '[UnsupportedDeviceType] Family: %s, Brand: %s, Model: %s', parsed_ua.device.family, parsed_ua.device.brand, parsed_ua.device.model) continue raw_browser_family = parsed_ua.browser.family.lower() if raw_browser_family.find('safari') != -1 and raw_browser_family.find( 'chrome') == -1: browser_dict = device_dict['safari'] elif raw_browser_family.find('chrome') != -1: browser_dict = device_dict['chrome'] elif raw_browser_family.find('firefox') != -1: browser_dict = device_dict['firefox'] elif raw_browser_family.find('opera') != -1 or raw_browser_family.find( 'opr') != -1: browser_dict = device_dict['opera'] elif raw_browser_family.find('msie') != -1 or raw_browser_family.find( 'ie') != -1: browser_dict = device_dict['ie'] else: logger.warn('[UnsupportedBrowserType] Family: %s', parsed_ua.browser.family) continue if os_family in browser_dict: browser_dict[os_family].append(ua) else: browser_dict[os_family] = [ua] return ud
def do_GET(self): # 传入数据 config = load_yaml_config('./config.yml')['setting'] # 默认测试数据 data = '' # 时区 zone = config['zone'] # 查询天数 search_time_limit = config['search_time_limit'] # 查询条数 search_time_limit_num = config['search_time_limit_num'] # 生成标准时间 now = datetime.utcnow() # 读取用户信息 user_info = { "token": os.environ["DAODAO_TOKEN"], "user": config['user'], "source": config['repo'] } # 生成当前时区时间标题 now_time = time_zone_reset(now, zone, '%Y-%m-%d') # 生成查询范围 since = search_time(search_time_limit) print('当地时间为:', now_time) user_agent = user_agents.parse(self.headers['User-Agent']) o = parse.urlparse(self.path) if 'k' in parse.parse_qs(o.query) : data = parse.parse_qs(o.query)['k'][0] if data == os.environ["DAODAO_PASSWORD"]: if 'g' in parse.parse_qs(o.query): data = parse.parse_qs(o.query)['g'][0] text = change_data_handle(int(data),'','combine',search_time_limit, search_time_limit_num, zone,now_time, user_info, since,user_agent) elif 'a' in parse.parse_qs(o.query): data = parse.parse_qs(o.query)['a'][0] data = data.split(',',1) text = change_data_handle(int(data[0]),data[1],'append',search_time_limit, search_time_limit_num, zone,now_time, user_info, since,user_agent) elif 'e' in parse.parse_qs(o.query): data = parse.parse_qs(o.query)['e'][0] data = data.split(',',1) text = change_data_handle(int(data[0]),data[1],'edit',search_time_limit, search_time_limit_num, zone,now_time, user_info, since,user_agent) elif 'c' in parse.parse_qs(o.query): data = parse.parse_qs(o.query)['c'][0] text = creat_data(now_time, user_info, '{"content":"'+ data+'",\n"user_agents":"'+str(user_agent)+'"}', since) elif 'dn' in parse.parse_qs(o.query): num = parse.parse_qs(o.query)['dn'][0] text = delete_data_single(num,user_info, search_time_limit, search_time_limit_num) elif 'd' in parse.parse_qs(o.query): num = parse.parse_qs(o.query)['d'][0] text = delete_data_muti(num,user_info, search_time_limit, search_time_limit_num) else: text = 'please check!' else: text='Please enter the correct password' elif 'q' in parse.parse_qs(o.query): num = int(parse.parse_qs(o.query)['q'][0]) if num == 0: num = search_time_limit_num if 't' in parse.parse_qs(o.query): limit = int(parse.parse_qs(o.query)['t'][0]) else: limit = search_time_limit text = json.dumps(search_daodao_lite(user_info, limit, num)) else: text='Please enter the correct password' self.send_response(200) # """ Sets headers required for CORS """ self.send_header('Content-type', 'application/json') self.send_header("Access-Control-Allow-Origin", "*") self.send_header("Access-Control-Allow-Methods", "*") self.send_header("Access-Control-Allow-Headers", "Authorization, Content-Type") self.end_headers() self.wfile.write(text.encode())
# 183.69.210.164 - - [07/Apr/2017:09:32:53 +0800] "GET /app/include/authcode.inc.php HTTP/1.1" 200 384 # "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 # (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0" ops = { 'datetime': lambda timestr: datetime.datetime.strptime(timestr, '%d/%b/%Y:%H:%M:%S %z' ), 'status': int, 'length': int, 'request': lambda request: dict(zip(('method', 'url', 'protocol'), request.split())), 'useragent': lambda useragent: parse(useragent) } pattern = '''(?P<remote>[\d.]{7,}) - - \[(?P<datetime>[/\w +:]+)\] \ "(?P<method>\w+) (?P<url>[/\w.]+) (?P<protocol>[/\w.]+)" (?P<status>\d+) (?P<length>\d+) .+ \ "(?P<useragent>.*)"''' regex = re.compile(pattern) def extract(line) -> dict: matcher = regex.match(line) if matcher: return { k: ops.get(k, lambda x: x)(v) for k, v in matcher.groupdict().items() }
def is_spider(): return user_agents.parse(flask.request.headers.get('User-Agent')).is_bot
def expand(self, link, request, edit): ''' Expand a toush link ''' # take edit from the request url # this code is stupid lol # print(request.path) # if 'edit' in request.path and edit: # edit = True # else: # edit = False try: url = ToushLink.objects.get(short_url__exact=link) except ToushLink.DoesNotExist: raise KeyError("invalid shortlink") # ensure we are within usage counts if url.max_count != -1: if url.max_count <= url.usage_count: raise PermissionError("max usages for link reached") # ensure we are within allowed datetime # print(timezone.now()) # print(url.expired) if timezone.now() > url.expired: raise PermissionError("shortlink expired") if not edit: url.usage_count += 1 url.save() ### TO CONSERVE RESOURCES, CREATE VISITOR OBJECT AFTER THE CHECKS. ### VISITOR OBJECT ONLY MADE IF THE VISITOR "SEES" THE END PAGE user = request.user if request.user.username else None ip_addr_v4 = request.META['REMOTE_ADDR'] ua_string = request.META['HTTP_USER_AGENT'] user_agent = parse(ua_string) if user is not None and not edit: Visitor.objects.create( url=url, user=user, ip_addr_v4=ip_addr_v4, is_mobile=user_agent.is_mobile, is_tablet=user_agent.is_tablet, is_touch_capable=user_agent.is_touch_capable, is_pc=user_agent.is_pc, is_bot=user_agent.is_bot, browser=user_agent.browser.family, browser_version=user_agent.browser.version_string, os=user_agent.os.family, os_version=user_agent.os.version_string, device=user_agent.device.family) elif not edit: Visitor.objects.create( url=url, ip_addr_v4=ip_addr_v4, is_mobile=user_agent.is_mobile, is_tablet=user_agent.is_tablet, is_touch_capable=user_agent.is_touch_capable, is_pc=user_agent.is_pc, is_bot=user_agent.is_bot, browser=user_agent.browser.family, browser_version=user_agent.browser.version_string, os=user_agent.os.family, os_version=user_agent.os.version_string, device=user_agent.device.family) #### IN OBJECT TYPE DEFINITION, EXCLUDE THE DATA/STATS FIELDS # so analytics don't leave server return url
async def signin( self, request: Request, password: str, identifier: Union[str, EmailStr], ) -> str: location: str = "Unknown" if "x-real-ip" in request.headers: real_ip: str = request.headers["x-real-ip"] self.logger.debug("requesters IP address is %s", real_ip) try: ipstack_res: Dict = self.ip2geo.get_location(real_ip) location = ( f"{ipstack_res['location']['country_flag_emoji']} " f"{ipstack_res['city']} / " f"{ipstack_res['region_name']} / " f"{ipstack_res['country_name']}" ) except Exception as exception: self.logger.error( "an error acquired when requesting ipstack: %s", exception ) location = "Unknown" self.logger.debug("requesters geolocation is %s", location) device: str = "Unknown" if "user-agent" in request.headers: ua_str: str = request.headers["user-agent"] try: ua = parse(ua_str) device = str(ua) except Exception: device = "Unknown" self.logger.debug("requesters device is %s", device) email: Optional[EmailStr] = None user_id: Optional[str] = None try: email = validate_email(identifier).email except EmailNotValidError: user_id = identifier try: user_id = custom_charset(None, user_id) except Exception as exception: self.logger.error(exception) raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail={ "rus": "incorrect identifier", "eng": "неправльный идентификатор", }, ) try: if email: user = await self.database.fetch_one( sa.sql.select( [self.users.c.hashed_password, self.users.c.user_id] ).where(self.users.c.email == email) ) user_id = user["user_id"] else: user = await self.database.fetch_one( sa.sql.select([self.users.c.hashed_password]).where( self.users.c.user_id == user_id ) ) except Exception as exception: self.logger.error(exception) raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail={ "eng": "An error occurred when working with Auth DB", "rus": "Произошла ошибка при обращении к базе данных " "модуля авторизации", }, ) hashed_password = user["hashed_password"] if not crypto_context.verify(password, hashed_password): raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail={ "end": "Incorrect password", "rus": "Неправильный пароль", }, ) now: datetime.datetime = datetime.datetime.now() token_uuid: bytes = uuid.uuid4().bytes insert = self.tokens.insert().values( token_uuid=token_uuid, location=location, device=device, issued_by=user_id, issued_at=str(now), ) try: await self.database.execute(insert) except Exception as exception: self.logger.error(exception) raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail={ "eng": "An error occurred when working with Auth DB", "rus": "Произошла ошибка при обращении к базе данных " "модуля авторизации", }, ) header: Dict[str, str] = {"alg": "ES384", "typ": "JWT"} payload: Dict[str, Any] = { "iss": "paperback", "sub": str(user_id), "exp": int(round((now + datetime.timedelta(days=2)).timestamp(), 0)), "iat": int(round(now.timestamp(), 0)), "jti": str(uuid.UUID(bytes=token_uuid)), } self.logger.debug("created token %s for user %s", payload, user_id) return jwt.encode(header, payload, self.private_key)
def setup(self, request, *args, **kwargs): super().setup(request, *args, **kwargs) user_agent = request.META.get('HTTP_USER_AGENT', '') self.request.user_agent = parse(user_agent)
def terms_of_use(): user_agent = parse(request.headers.get('User-Agent')) return render_template('terms_of_use.html', mobile=user_agent.is_mobile, subtitle=gettext(u'Nutzungsbedingungen'))
def is_outdated_browser(self): ua_string = self.request.get("HTTP_USER_AGENT", "") ua = parse(ua_string) return ua.browser.family == "IE"
def advertisement(): """ This is the url we give for the ad for our 'external question'. The ad has to display two different things: This page will be called from within mechanical turk, with url arguments hitId, assignmentId, and workerId. If the worker has not yet accepted the hit: These arguments will have null values, we should just show an ad for the experiment. If the worker has accepted the hit: These arguments will have appropriate values and we should enter the person in the database and provide a link to the experiment popup. """ user_agent_string = request.user_agent.string user_agent_obj = user_agents.parse(user_agent_string) browser_ok = True browser_exclude_rule = CONFIG.get('HIT Configuration', 'browser_exclude_rule') for rule in browser_exclude_rule.split(','): myrule = rule.strip() if myrule in ["mobile", "tablet", "touchcapable", "pc", "bot"]: if (myrule == "mobile" and user_agent_obj.is_mobile) or\ (myrule == "tablet" and user_agent_obj.is_tablet) or\ (myrule == "touchcapable" and user_agent_obj.is_touch_capable) or\ (myrule == "pc" and user_agent_obj.is_pc) or\ (myrule == "bot" and user_agent_obj.is_bot): browser_ok = False elif (myrule == "Safari" or myrule == "safari"): if "Chrome" in user_agent_string and "Safari" in user_agent_string: pass elif "Safari" in user_agent_string: browser_ok = False elif myrule in user_agent_string: browser_ok = False if not browser_ok: # Handler for IE users if IE is not supported. raise ExperimentError('browser_type_not_allowed') if not ('hitId' in request.args and 'assignmentId' in request.args): raise ExperimentError('hit_assign_worker_id_not_set_in_mturk') hit_id = request.args['hitId'] assignment_id = request.args['assignmentId'] mode = request.args['mode'] if hit_id[:5] == "debug": debug_mode = True else: debug_mode = False already_in_db = False if 'workerId' in request.args: worker_id = request.args['workerId'] # First check if this workerId has completed the task before (v1). nrecords = Participant.query.\ filter(Participant.assignmentid != assignment_id).\ filter(Participant.workerid == worker_id).\ count() if nrecords > 0: # Already completed task already_in_db = True else: # If worker has not accepted the hit worker_id = None try: part = Participant.query.\ filter(Participant.hitid == hit_id).\ filter(Participant.assignmentid == assignment_id).\ filter(Participant.workerid == worker_id).\ one() status = part.status except exc.SQLAlchemyError: status = None allow_repeats = CONFIG.getboolean('HIT Configuration', 'allow_repeats') if (status == STARTED or status == QUITEARLY) and not debug_mode: # Once participants have finished the instructions, we do not allow # them to start the task again. raise ExperimentError('already_started_exp_mturk') elif status == COMPLETED or (status == SUBMITTED and not already_in_db): # 'or status == SUBMITTED' because we suspect that sometimes the post # to mturk fails after we've set status to SUBMITTED, so really they # have not successfully submitted. This gives another chance for the # submit to work when not using the psiturk ad server. use_psiturk_ad_server = CONFIG.getboolean('Shell Parameters', 'use_psiturk_ad_server') if not use_psiturk_ad_server: # They've finished the experiment but haven't successfully submitted the HIT # yet. return render_template('thanks-mturksubmit.html', using_sandbox=(mode == "sandbox"), hitid=hit_id, assignmentid=assignment_id, workerid=worker_id) else: # Show them a thanks message and tell them to go away. return render_template('thanks.html') elif already_in_db and not (debug_mode or allow_repeats): raise ExperimentError('already_did_exp_hit') elif status == ALLOCATED or not status or debug_mode: # Participant has not yet agreed to the consent. They might not # even have accepted the HIT. with open('templates/ad.html', 'r') as temp_file: ad_string = temp_file.read() ad_string = insert_mode(ad_string, mode) return render_template_string(ad_string, hitid=hit_id, assignmentid=assignment_id, workerid=worker_id) else: raise ExperimentError('status_incorrectly_set')
from user_agents import parse u = 'Mozilla/5.0 ( Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36' ua = parse(u) print(ua) print(ua.browser)
def advertisement(): """ This is the url we give for the ad for our 'external question'. The ad has to display two different things: This page will be called from within mechanical turk, with url arguments hitId, assignmentId, and workerId. If the worker has not yet accepted the hit: These arguments will have null values, we should just show an ad for the experiment. If the worker has accepted the hit: These arguments will have appropriate values and we should enter the person in the database and provide a link to the experiment popup. """ user_agent_string = request.user_agent.string user_agent_obj = user_agents.parse(user_agent_string) browser_ok = True for rule in string.split( CONFIG.get('HIT Configuration', 'browser_exclude_rule'), ','): myrule = rule.strip() if myrule in ["mobile", "tablet", "touchcapable", "pc", "bot"]: if (myrule == "mobile" and user_agent_obj.is_mobile) or\ (myrule == "tablet" and user_agent_obj.is_tablet) or\ (myrule == "touchcapable" and user_agent_obj.is_touch_capable) or\ (myrule == "pc" and user_agent_obj.is_pc) or\ (myrule == "bot" and user_agent_obj.is_bot): browser_ok = False elif myrule in user_agent_string: browser_ok = False if not browser_ok: # Handler for IE users if IE is not supported. raise ExperimentError('browser_type_not_allowed') if not ('hitId' in request.args and 'assignmentId' in request.args): raise ExperimentError('hit_assign_worker_id_not_set_in_mturk') hit_id = request.args['hitId'] assignment_id = request.args['assignmentId'] mode = request.args['mode'] if hit_id[:5] == "debug": debug_mode = True else: debug_mode = False already_in_db = False if 'workerId' in request.args: worker_id = request.args['workerId'] # First check if this workerId has completed the task before (v1). nrecords = Participant.query.\ filter(Participant.assignmentid != assignment_id).\ filter(Participant.workerid == worker_id).\ count() if nrecords > 0: # Already completed task already_in_db = True else: # If worker has not accepted the hit worker_id = None try: part = Participant.query.\ filter(Participant.hitid == hit_id).\ filter(Participant.assignmentid == assignment_id).\ filter(Participant.workerid == worker_id).\ one() status = part.status except exc.SQLAlchemyError: status = None if status == STARTED and not debug_mode: # Once participants have finished the instructions, we do not allow # them to start the task again. raise ExperimentError('already_started_exp_mturk') elif status == COMPLETED: # They've done the debriefing but perhaps haven't submitted the HIT # yet.. Turn asignmentId into original assignment id before sending it # back to AMT return render_template( 'thanks.html', is_sandbox=(mode == "sandbox"), hitid=hit_id, assignmentid=assignment_id, workerid=worker_id ) elif already_in_db and not debug_mode: raise ExperimentError('already_did_exp_hit') elif status == ALLOCATED or not status or debug_mode: # Participant has not yet agreed to the consent. They might not # even have accepted the HIT. with open('templates/ad.html', 'r') as temp_file: ad_string = temp_file.read() ad_string = insert_mode(ad_string, mode) return render_template_string( ad_string, hitid=hit_id, assignmentid=assignment_id, workerid=worker_id ) else: raise ExperimentError('status_incorrectly_set')
def privacy_policy(): user_agent = parse(request.headers.get('User-Agent')) return render_template('privacy_policy.html', mobile=user_agent.is_mobile, subtitle=gettext(u'Datenschutz'))