def wiki_image(self, addr, alt, class_='wiki', lineno=0): """Create HTML for a wiki image.""" addr = addr.strip() html = werkzeug.html chunk = '' if parser.external_link(addr): return html.img(src=werkzeug.url_fix(addr), class_="external", alt=alt) if '#' in addr: addr, chunk = addr.split('#', 1) if addr == '': return html.a(name=chunk) elif addr.startswith(':'): if chunk: chunk = '#' + chunk alias = self.link_alias(addr[1:]) href = werkzeug.url_fix(alias + chunk) return html.img(src=href, class_="external alias", alt=alt) elif addr in self.storage: mime = page_mime(addr) if mime.startswith('image/'): return html.img(src=self.get_download_url(addr), class_=class_, alt=alt) else: return html.img(href=self.get_download_url(addr), alt=alt) else: return html.a(html(alt), href=self.get_url(addr))
def git_add_remote_url(cls, url, user, password): # link new git to remote url # user/password are coded directly in the git url if user and password: user = url_fix(user) password = url_fix(password) user_string = user if password != '': user_string += ':' + password user_string += '@' git_full_url = 'https://' + user_string + url[8:] else: git_full_url = url cls.git_remove_remote_url() try: subprocess.check_output(['git remote add origin ' + git_full_url], shell=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: raise Exception(e.output) try: subprocess.check_output(['git config http.sslVerify "false"'], shell=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: raise Exception(e.output) return True
def wiki_image(self, addr, alt, class_='wiki', lineno=0): """Create HTML for a wiki image.""" addr = addr.strip() html = werkzeug.html chunk = '' if hatta.parser.external_link(addr): return html.img(src=werkzeug.url_fix(addr), class_="external", alt=alt) if '#' in addr: addr, chunk = addr.split('#', 1) if addr == '': return html.a(name=chunk) elif addr.startswith(':'): if chunk: chunk = '#' + chunk alias = self.link_alias(addr[1:]) href = werkzeug.url_fix(alias + chunk) return html.img(src=href, class_="external alias", alt=alt) elif addr in self.storage: mime = page_mime(addr) if mime.startswith('image/'): return html.img(src=self.get_download_url(addr), class_=class_, alt=alt) else: return html.img(href=self.get_download_url(addr), alt=alt) else: return html.a(html(alt), href=self.get_url(addr))
def test_url_fixing(): """URL fixing""" x = url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') assert x == 'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29' x = url_fix('http://example.com/?foo=%2f%2f') assert x == 'http://example.com/?foo=%2f%2f'
def wiki_link(self, addr, label=None, class_=None, image=None, lineno=0): """Create HTML for a wiki link.""" addr = addr.strip() text = werkzeug.escape(label or addr) chunk = '' if class_ is not None: classes = [class_] else: classes = [] if parser.external_link(addr): classes.append('external') if addr.startswith('mailto:'): # Obfuscate e-mails a little bit. classes.append('mail') text = text.replace('@', '@').replace('.', '.') href = werkzeug.escape(addr, quote=True).replace('@', '%40').replace( '.', '%2E') else: href = werkzeug.escape(werkzeug.url_fix(addr), quote=True) else: if '#' in addr: addr, chunk = addr.split('#', 1) chunk = '#' + werkzeug.url_fix(chunk) if addr.startswith(':'): alias = self.link_alias(addr[1:]) href = werkzeug.escape(werkzeug.url_fix(alias) + chunk, True) classes.append('external') classes.append('alias') elif addr.startswith('+'): href = '/'.join([ self.request.script_root, '+' + werkzeug.escape(addr[1:], quote=True) ]) classes.append('special') elif addr == u'': href = werkzeug.escape(chunk, True) classes.append('anchor') else: classes.append('wiki') href = werkzeug.escape(self.get_url(addr) + chunk, True) if addr not in self.storage: classes.append('nonexistent') class_ = werkzeug.escape(' '.join(classes) or '', True) # We need to output HTML on our own to prevent escaping of href return '<a href="%s" class="%s" title="%s">%s</a>' % ( href, class_, werkzeug.escape(addr + chunk, True), image or text)
def receive_callback(*args, **kwargs): r = frappe.request uri = url_fix(r.url.replace("+"," ")) # http_method = r.method body = r.get_data() # headers = r.headers message = {} if body : data = body.decode('utf-8') msgs = ToObject(data) atr_list = list(msgs.__dict__) for atr in atr_list: if getattr(msgs, atr) : message[atr] = getattr(msgs, atr) else: frappe.throw("This has no body!") parsed_url = urlparse.urlparse(uri) message["fees_token"] = parsed_url[4][6:] message["doctype"] = "NMB Callback" nmb_doc = frappe.get_doc(message) if nmb_doc.insert(ignore_permissions=True): frappe.response['status'] = 1 frappe.response['description'] = "success" else: frappe.response['description'] = "insert failed" frappe.response['http_status_code'] = 409 enqueue(method=make_payment_entry, queue='short', timeout=10000, is_async=True , kwargs =nmb_doc )
def authorize(*args, **kwargs): #Fetch provider URL from settings oauth_settings = get_oauth_settings() params = get_urlparams_from_kwargs(kwargs) request_url = urlparse(frappe.request.url) success_url = request_url.scheme + "://" + request_url.netloc + "/api/method/frappe.integrations.oauth2.approve?" + params failure_url = (frappe.form_dict["redirect_uri"] or frappe.form_dict["cmd"] or "") + "?error=access_denied" if frappe.session['user'] == 'Guest': #Force login, redirect to preauth again. frappe.local.response["type"] = "redirect" frappe.local.response[ "location"] = "/login?redirect-to=/api/method/frappe.integrations.oauth2.authorize?" + quote( params.replace("+", " ")) elif frappe.session['user'] != 'Guest': try: r = frappe.request uri = url_fix(r.url) http_method = r.method body = r.get_data() headers = r.headers scopes, frappe.flags.oauth_credentials = get_oauth_server( ).validate_authorization_request(uri, http_method, body, headers) skip_auth = frappe.db.get_value( "OAuth Client", frappe.flags.oauth_credentials['client_id'], "skip_authorization") unrevoked_tokens = frappe.get_all("OAuth Bearer Token", filters={"status": "Active"}) if skip_auth or (oauth_settings["skip_authorization"] == "Auto" and len(unrevoked_tokens)): frappe.local.response["type"] = "redirect" frappe.local.response["location"] = success_url else: #Show Allow/Deny screen. response_html_params = frappe._dict({ "client_id": frappe.db.get_value("OAuth Client", kwargs['client_id'], "app_name"), "success_url": success_url, "failure_url": failure_url, "details": scopes }) resp_html = frappe.render_template( "templates/includes/oauth_confirmation.html", response_html_params) frappe.respond_as_web_page("Confirm Access", resp_html) except FatalClientError as e: return e except OAuth2Error as e: return e
def wiki_link(self, addr, label=None, class_=None, image=None, lineno=0): """Create HTML for a wiki link.""" addr = addr.strip() text = werkzeug.escape(label or addr) chunk = '' if class_ is not None: classes = [class_] else: classes = [] if hatta.parser.external_link(addr): classes.append('external') if addr.startswith('mailto:'): # Obfuscate e-mails a little bit. classes.append('mail') text = text.replace('@', '@').replace('.', '.') href = werkzeug.escape(addr, quote=True).replace('@', '%40').replace('.', '%2E') else: href = werkzeug.escape(werkzeug.url_fix(addr), quote=True) else: if '#' in addr: addr, chunk = addr.split('#', 1) chunk = '#' + werkzeug.url_fix(chunk) if addr.startswith(':'): alias = self.link_alias(addr[1:]) href = werkzeug.escape(werkzeug.url_fix(alias) + chunk, True) classes.append('external') classes.append('alias') elif addr.startswith('+'): href = '/'.join([self.request.script_root, '+' + werkzeug.escape(addr[1:], quote=True)]) classes.append('special') elif addr == u'': href = werkzeug.escape(chunk, True) classes.append('anchor') else: classes.append('wiki') href = werkzeug.escape(self.get_url(addr) + chunk, True) if addr not in self.storage: classes.append('nonexistent') class_ = werkzeug.escape(' '.join(classes) or '', True) # We need to output HTML on our own to prevent escaping of href return '<a href="%s" class="%s" title="%s">%s</a>' % ( href, class_, werkzeug.escape(addr + chunk, True), image or text)
def get_token(*args, **kwargs): r = frappe.request uri = url_fix(r.url) http_method = r.method body = r.form headers = r.headers #Check whether frappe server URL is set frappe_server_url = frappe.db.get_value("Social Login Key", "frappe", "base_url") or None if not frappe_server_url: frappe.throw(_("Please set Base URL in Social Login Key for Frappe")) try: headers, body, status = get_oauth_server().create_token_response( uri, http_method, body, headers, frappe.flags.oauth_credentials) out = frappe._dict(json.loads(body)) if not out.error and "openid" in out.scope: token_user = frappe.db.get_value("OAuth Bearer Token", out.access_token, "user") token_client = frappe.db.get_value("OAuth Bearer Token", out.access_token, "client") client_secret = frappe.db.get_value("OAuth Client", token_client, "client_secret") if token_user in ["Guest", "Administrator"]: frappe.throw(_("Logged in as Guest or Administrator")) import hashlib id_token_header = {"typ": "jwt", "alg": "HS256"} id_token = { "aud": token_client, "exp": int((frappe.db.get_value("OAuth Bearer Token", out.access_token, "expiration_time") - frappe.utils.datetime.datetime(1970, 1, 1)).total_seconds()), "sub": frappe.db.get_value("User Social Login", { "parent": token_user, "provider": "frappe" }, "userid"), "iss": frappe_server_url, "at_hash": frappe.oauth.calculate_at_hash(out.access_token, hashlib.sha256) } import jwt id_token_encoded = jwt.encode(id_token, client_secret, algorithm='HS256', headers=id_token_header) out.update({"id_token": str(id_token_encoded)}) frappe.local.response = out except FatalClientError as e: return e
def test_quoting(): """URL quoting""" assert url_quote(u'\xf6\xe4\xfc') == '%C3%B6%C3%A4%C3%BC' assert url_unquote(url_quote(u'#%="\xf6')) == u'#%="\xf6' assert url_quote_plus('foo bar') == 'foo+bar' assert url_unquote_plus('foo+bar') == 'foo bar' assert url_encode({'a': None, 'b': 'foo bar'}) == 'b=foo+bar' assert url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffsklärung)') == \ 'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29'
def wiki_link(self, addr, label=None, class_=None, image=None, lineno=0): addr = addr.strip() text = werkzeug.escape(label or addr) chunk = '' if class_ is not None: classes = [class_] else: classes = [] if hatta.parser.external_link(addr): classes.append('external') if addr.startswith('mailto:'): # Obfuscate e-mails a little bit. classes.append('mail') text = text.replace('@', '@').replace('.', '.') href = werkzeug.escape(addr, quote=True).replace('@', '%40').replace( '.', '%2E') else: href = werkzeug.escape(werkzeug.url_fix(addr), quote=True) else: if '#' in addr: addr, chunk = addr.split('#', 1) chunk = '#' + werkzeug.url_fix(chunk) if addr.startswith(':'): alias = self._link_alias(addr[1:]) href = werkzeug.escape(werkzeug.url_fix(alias) + chunk, True) classes.append('external') classes.append('alias') elif addr == u'': href = werkzeug.escape(chunk, True) classes.append('anchor') else: classes.append('wiki') href = werkzeug.escape(self.get_ref_path(addr) + chunk, True) if addr not in self.wiki.storage: classes.append('nonexistent') # if necessary, add suffix if self.add_link_ext is not None: href += self.add_link_ext class_ = werkzeug.escape(' '.join(classes) or '', True) # We need to output HTML on our own to prevent escaping of href return u'<a href="%s" class="%s" title="%s">%s</a>' % ( href, class_, werkzeug.escape(addr + chunk, True), image or text)
def create_url(coll, netloc, path, qs, is_malware, cdate, vdate, source): # create mongo connection if none provided if coll == None: c = MongoClient() db = c.malwaredb coll = db.urls # set defaults if nothing provided if cdate == None: cdate = datetime.datetime.utcnow() - datetime.timedelta(days=random.choice(range(10,1000))) if vdate == None: vdate = cdate + datetime.timedelta(days=random.choice(range(1,100))) if is_malware == None: is_malware = random.choice([True,False]) # provide a temporary scheme to aid in url parsing iurl = 'http://' + netloc + path # attach query string if exists if qs != '': up = urlparse.urlsplit(url_fix(iurl + '?' + qs)) else: up = urlparse.urlsplit(url_fix(iurl)) # initialize dict for new record newentry = {'netloc' : up.netloc.lower(), 'path' : up.path, 'urlfull' : up.netloc.lower() + up.path, 'created' : cdate, 'is_malware' : is_malware, 'source' : source} # sort query string and prepare index entry if qs != '': newentry['qs'] = qs_sort(qs) newentry['qsLIST'] = make_qs_list(qs) # add verified date if vdate < datetime.datetime.today(): newentry['verified'] = vdate # insert new db record return coll.insert(newentry)
def request(url): """ default call to the api call http://endpoint/v1/{url} return the response and the url called (it might have been modified with the normalization) """ norm_url = werkzeug.url_fix(_api_current_root_point + url) # normalize url raw_response = requests.get(norm_url) return json.loads(raw_response.text), norm_url, raw_response.status_code
def api(url): """ default call to the api call http://endpoint/v1/{url} return the response and the url called (it might have been modified with the normalization) """ norm_url = werkzeug.url_fix(_api_current_root_point + url) # normalize url raw_response = requests.get(norm_url) return json.loads(raw_response.text), norm_url
def open_url(self, urlS, headers): ''' Return contents of url. ''' # Properly encode special characters in url. url = url_fix(urlS) # Make request and fetch the webpage.. request = urllib2.Request(url, None, headers) try: response = urllib2.urlopen(request, timeout = 5) except: return 'Oops, something went wrong.' return response
def open_url(self, urlS, headers): ''' Return contents of url. ''' # Properly encode special characters in url. url = url_fix(urlS) # Make request and fetch the webpage.. request = urllib2.Request(url, None, headers) try: response = urllib2.urlopen(request, timeout=5) except: return 'Oops, something went wrong.' return response
def revoke_token(*args, **kwargs): r = frappe.request uri = url_fix(r.url) http_method = r.method body = r.form headers = r.headers headers, body, status = get_oauth_server().create_revocation_response(uri, headers=headers, body=body, http_method=http_method) frappe.local.response['http_status_code'] = status if status == 200: return "success" else: return "bad request"
def string_to_url_fix(some_string): ''' This method converts a string to into another string that can be safely used as an URL. This is done by removing any characters that is not a letter or a digit. Also, all white spaces are converted to a dash. ''' modified_string = [c for c in some_string \ if c in string.ascii_letters \ or c in string.digits \ or c == ' '] modified_string = string.replace(''.join(modified_string), ' ', '-') #in case any non-URL safe characters sneaks into some_string modified_string = url_fix(modified_string) return modified_string
def authorize(*args, **kwargs): #Fetch provider URL from settings oauth_settings = get_oauth_settings() params = get_urlparams_from_kwargs(kwargs) request_url = urlparse(frappe.request.url) success_url = request_url.scheme + "://" + request_url.netloc + "/api/method/frappe.integrations.oauth2.approve?" + params failure_url = frappe.form_dict["redirect_uri"] + "?error=access_denied" if frappe.session['user']=='Guest': #Force login, redirect to preauth again. frappe.local.response["type"] = "redirect" frappe.local.response["location"] = "/login?redirect-to=/api/method/frappe.integrations.oauth2.authorize?" + quote(params.replace("+"," ")) elif frappe.session['user']!='Guest': try: r = frappe.request uri = url_fix(r.url) http_method = r.method body = r.get_data() headers = r.headers scopes, frappe.flags.oauth_credentials = get_oauth_server().validate_authorization_request(uri, http_method, body, headers) skip_auth = frappe.db.get_value("OAuth Client", frappe.flags.oauth_credentials['client_id'], "skip_authorization") unrevoked_tokens = frappe.get_all("OAuth Bearer Token", filters={"status":"Active"}) if skip_auth or (oauth_settings["skip_authorization"] == "Auto" and len(unrevoked_tokens)): frappe.local.response["type"] = "redirect" frappe.local.response["location"] = success_url else: #Show Allow/Deny screen. response_html_params = frappe._dict({ "client_id": frappe.db.get_value("OAuth Client", kwargs['client_id'], "app_name"), "success_url": success_url, "failure_url": failure_url, "details": scopes }) resp_html = frappe.render_template("templates/includes/oauth_confirmation.html", response_html_params) frappe.respond_as_web_page("Confirm Access", resp_html) except FatalClientError as e: return e except OAuth2Error as e: return e
def urlShortener(): """ strips and encodes the URL. if the url is validate then returns the encoded `id` in json format. """ originalUrl = request.form['originalUrl'].strip() originalUrl = url_fix(originalUrl) if validateUrl(originalUrl): return json.dumps({ 'status': 'OK', 'message': hashids.encode(int(shortenUrl(originalUrl))) }) else: return json.dumps({ 'status': 'ERROR', 'message': 'This URL is Not Valid' })
def get_token(*args, **kwargs): r = frappe.request uri = url_fix(r.url) http_method = r.method body = r.form headers = r.headers #Check whether frappe server URL is set frappe_server_url = frappe.db.get_value("Social Login Key", "frappe", "base_url") or None if not frappe_server_url: frappe.throw(_("Please set Base URL in Social Login Key for Frappe")) try: headers, body, status = get_oauth_server().create_token_response(uri, http_method, body, headers, frappe.flags.oauth_credentials) out = frappe._dict(json.loads(body)) if not out.error and "openid" in out.scope: token_user = frappe.db.get_value("OAuth Bearer Token", out.access_token, "user") token_client = frappe.db.get_value("OAuth Bearer Token", out.access_token, "client") client_secret = frappe.db.get_value("OAuth Client", token_client, "client_secret") if token_user in ["Guest", "Administrator"]: frappe.throw(_("Logged in as Guest or Administrator")) import hashlib id_token_header = { "typ":"jwt", "alg":"HS256" } id_token = { "aud": token_client, "exp": int((frappe.db.get_value("OAuth Bearer Token", out.access_token, "expiration_time") - frappe.utils.datetime.datetime(1970, 1, 1)).total_seconds()), "sub": frappe.db.get_value("User Social Login", {"parent":token_user, "provider": "frappe"}, "userid"), "iss": frappe_server_url, "at_hash": frappe.oauth.calculate_at_hash(out.access_token, hashlib.sha256) } import jwt id_token_encoded = jwt.encode(id_token, client_secret, algorithm='HS256', headers=id_token_header) out.update({"id_token":str(id_token_encoded)}) frappe.local.response = out except FatalClientError as e: return e
def approve(*args, **kwargs): r = frappe.request uri = url_fix(r.url.replace("+"," ")) http_method = r.method body = r.get_data() headers = r.headers try: scopes, frappe.flags.oauth_credentials = get_oauth_server().validate_authorization_request(uri, http_method, body, headers) headers, body, status = get_oauth_server().create_authorization_response(uri=frappe.flags.oauth_credentials['redirect_uri'], \ body=body, headers=headers, scopes=scopes, credentials=frappe.flags.oauth_credentials) uri = headers.get('Location', None) frappe.local.response["type"] = "redirect" frappe.local.response["location"] = uri except FatalClientError as e: return e except OAuth2Error as e: return e
def get_urlinfo_by_path(urlpath, **kwargs): # get url collection url_coll = get_mongodb_db_collection(app.config['MONGODB_URLS']) # prepare url path for query url_parse_result = urlparse.urlsplit(url_fix('http://' + urlpath)) netloc = url_parse_result.netloc.lower() path = url_parse_result.path # initialize record set record_set = {} # prepare regex of url for search url_regx = '^' + re.escape(netloc + path) # query database if 'search' in kwargs and kwargs['search']: # open search if 'qs' in kwargs and len(kwargs['qs']) > 0: app.logger.debug('search query ==> {urlfull : {$regex : /' + str(url_regx) + '/}, ' + 'qsLIST:{$all : ' + str(make_qs_list(kwargs['qs'])) + '}}') # positive match for any depth of url netloc, path and any number of query string members record_set = url_coll.find({ 'urlfull': { '$regex': url_regx }, 'qsLIST': { '$all': make_qs_list(kwargs['qs']) } }) else: app.logger.debug('search query ==> {urlfull : {$regex : /' + str(url_regx) + '/}}') # positive match for any depth of url netloc, path record_set = url_coll.find({'urlfull': {'$regex': url_regx}}) return record_set
def send(*args, **kwargs): r = frappe.request uri = url_fix(r.url.replace("+", " ")) http_method = r.method body = r.get_data() headers = r.headers space = "\n" * 2 message = "" if body: data = body.decode('utf-8') msgs = ToObject(data) atr_list = list(msgs.__dict__) for atr in atr_list: if getattr(msgs, atr): message = message + atr + ": " + getattr(msgs, atr) + space headers_list = list(headers) message = str(headers_list[0]) + space + message else: message = headers # "[email protected]" is a name for 'Telegram User Settings' telegram_chat_id = frappe.db.get_value('Telegram User Settings', '[email protected]', 'telegram_chat_id') telegram_settings = frappe.db.get_value('Telegram User Settings', '[email protected]', 'telegram_settings') telegram_token = frappe.db.get_value('Telegram Settings', telegram_settings, 'telegram_token') bot = telegram.Bot(token=telegram_token) message = space + str(message) + space bot.send_message(chat_id=telegram_chat_id, text=message)
def get_urlinfo_by_path(urlpath, **kwargs): # get url collection url_coll = get_mongodb_db_collection(app.config['MONGODB_URLS']) # prepare url path for query url_parse_result = urlparse.urlsplit(url_fix('http://' + urlpath)) netloc = url_parse_result.netloc.lower() path = url_parse_result.path # initialize record set record_set = {} # query database by exact search if 'qs' in kwargs and len(kwargs['qs']) > 0: # exact search on complete url netloc, path and all query string members app.logger.debug('path query ==> {netloc : \'' + netloc + '\', path : \'' + path + '\', qs : \'' + str(qs_sort(kwargs['qs'])) + '\'}') record_set = url_coll.find({ 'netloc': netloc, 'path': path, 'qs': qs_sort(kwargs['qs']) }) else: # exact search on complete url netloc, path; no query string so restrict results app.logger.debug('path query ==> {netloc : \'' + netloc + '\', path : \'' + path + '\', qs : {$exists : false}}') record_set = url_coll.find({ 'netloc': netloc, 'path': path, 'qs': { '$exists': False } }) return record_set
def make_urls(n, source): rcount = 0 for i in range(1, n): # fake a host host = id_gen_basic(random.choice(range(8, 24))) + '.' + random.choice( ['com', 'org', 'net', 'io', 'info']) #host = 'melville.' + random.choice(['com','org','net', 'io', 'info']) # fake a random number of ports including null random_hosts = random.choice(range(1, 6)) for j in range(1, random_hosts): # create a random port with an occasional blank one r = random.choice(range(1, 7)) port = str(random.choice(range(10000, 30000))) if (r % 6 != 0) else '' # fake multiple page entries per host:port random_ports = random.choice(range(1, 6)) for k in range(1, random_ports): hostport = host + (':' + port) if port != '' else host newentry = {} # fake a random number of pages per host:port random_pages = random.choice(range(1, 6)) for l in range(1, random_pages): # vary the directory depth for each page random_dir_depth = random.choice(range(1, 6)) path = '' for m in range(1, random_dir_depth): path = path + id_gen_path(random.choice(range( 5, 16))) + '/' iurl = 'http://' + hostport + '/' + path + id_gen_path( random.choice(range(5, 16))) + '.html' # create multiple entries per page with different query strings random_qs_args = random.choice(range(1, 6)) already_has_null_qs = False for page in range(1, random_qs_args): # a creation and verified date cdate = datetime.datetime.utcnow( ) - datetime.timedelta( days=random.choice(range(10, 1000))) vdate = cdate + datetime.timedelta( days=random.choice(range(1, 100))) # fake malware designation is_malware = random.choice([True, False]) # fake a query string with a random number of k=v args qs_dict = {} qs_str = '' qsrandom = random.choice(range(0, 4)) for nargs in range(0, qsrandom): # force unicode (for sanity) #k = id_gen_basic(random.choice(range(1,2))).encode('utf8') #v = id_gen_qs_val(random.choice(range(1,2))).encode('utf8') k = id_gen_basic(random.choice(range( 1, 9))).encode('utf8') v = id_gen_qs_val(random.choice(range( 1, 9))).encode('utf8') qs_dict[k] = v if qs_dict != {}: qs_str = urllib.urlencode(qs_dict) up = urlparse.urlsplit(url_fix(iurl + '?' + qs_str)) else: up = urlparse.urlsplit(url_fix(iurl)) # insert the new record if (qs_dict == {} and not already_has_null_qs) or (qs_dict != {}): id = create_url(db.urls, up.netloc.lower(), up.path, qs_str, is_malware, cdate, vdate, source) already_has_null_qs = True rcount = rcount + 1 if rcount % 10001 == 0: print rcount, id, iurl if n < 100: print id, up.netloc.lower(), up.path, up.query print 'total records ==>', rcount