def extract_feed_rss(self): tmp = self._tree_explorer.xpath( self._backup_html_tree, '//link[@type="application/rss+xml" and @rel="alternate"] | //link[@type="application/atom+xml" and @rel="alternate"]' ) feeds = dict() for t in tmp: feeds[self._tree_explorer.get_attribute( t, attr='href')] = self._tree_explorer.get_attribute(t, attr='title') if len(feeds) == 0: tmp = self._tree_explorer.xpath(self._backup_html_tree, "//a[contains(@href, '.xml')]") for t in tmp: href = self._tree_explorer.get_attribute(t, attr='href') file_type = utils.get_filetype_from_url(href) if file_type and file_type == 'xml': feeds[href] = '' if len(feeds) == 0: tmp = self._tree_explorer.xpath( self._backup_html_tree, "//a[contains(@href, 'rss')] | //a[contains(@href, 'feed')]") for t in tmp: href = self._tree_explorer.get_attribute(t, attr='href') if not utils.is_valid_url(href): final_url = '%s/%s' % (self._url, href) if utils.is_valid_url(final_url): feeds[final_url] = '' elif not utils.are_equals_urls(href, self._url): feeds[href] = '' return feeds
def find_rss_path(html, url): soup = BeautifulSoup(html, 'html.parser') raw_rss_paths = soup.find_all('link', {'type': re.compile('rss|atom')}) if not raw_rss_paths: raw_rss_paths = soup.find_all('a', {'href': re.compile('feed|rss')}) rss_paths = [] invalid_rss_path = re.compile('(feedback)|((.pdf|.xlsx|.xls|.doc|.docx)$)', re.IGNORECASE) for path in raw_rss_paths: if invalid_rss_path.search(path['href']) is None: rss_paths.append(path['href']) if rss_paths: href = rss_paths[0].replace(' ', '%20') if is_valid_url(href): full_path = href else: if href[0] != '/': full_path = f'{url}/{href}' else: full_path = url + href return full_path return None
def post(self): user = users.get_current_user() if not user or 'user_id' not in dir(user): self.redirect(users.create_login_url('/addbuffr')) apiAddress = self.request.get('apiAddress') to_console = {} to_console["apiAddress"] = apiAddress to_console["is_valid_url(apiAddress)"] = (is_valid_url(apiAddress) != None) buffr_instance = Buffr() buffr_instance.apiName = self.request.get('apiName') buffr_instance.apiAddress = apiAddress APIUnstable = self.request.get('APIUnstable') if APIUnstable not in [True, False]: buffr_instance.APIUnstable = False else: buffr_instance.APIUnstable = APIUnstable buffr_instance.user_id = user.user_id() buffr_instance.user_email = user.email() buffr_instance.update_interval = int(self.request.get('updateInterval')) for possibility in user_readable_convertion_table: logging.info(str((possibility[0], buffr_instance.update_interval))) if int(possibility[0]) == buffr_instance.update_interval: buffr_instance.user_readable_update_interval = possibility[2] buffr_instance.end_point = hashlib.md5('%s:%s' % (user.user_id(), apiAddress)).hexdigest() buffr_instance.last_known_data = None buffr_instance.buffr_version = current_api_version buffr_instance.put() memcache.flush_all() logging.info('Added new Buffr to datastore') taskqueue.add(url='/confirm_working_url', params={'key': buffr_instance.key()}) render(self, 'addbuffer.html', {'to_console': to_console, 'submitted': True, 'apiAddress': apiAddress})
def svg_to_any(elem, doc): """ Convert a svg to supported formats """ if not isinstance(elem, Image): return # We don't want urls, you have to download them first if is_valid_url(elem.url): return mimet, _ = mimetypes.guess_type(elem.url) flag, file_ext = FMT_OPTIONS.get(doc.format) if mimet == 'image/svg+xml' and flag: base_name, _ = os.path.splitext(elem.url) target_name = base_name + "." + file_ext try: mtime = os.path.getmtime(target_name) except OSError: mtime = -1 if mtime < os.path.getmtime(elem.url): cmd_line = ['inkscape', flag, target_name, elem.url] sys.stderr.write("Running %s\n" % " ".join(cmd_line)) subprocess.call(cmd_line, stdout=sys.stderr.fileno()) elem.url = target_name
def download_image(elem, _): """ Download an image from the web """ if not isinstance(elem, Image): return result = is_valid_url(elem.url) if not result: # not a valid url, return return file_name = unquote(result.path).split('/')[-1] full_path = os.path.join(IMAGEDIR, file_name) if os.path.isfile(full_path): elem.url = full_path else: try: os.mkdir(IMAGEDIR) sys.stderr.write('Created directory ' + IMAGEDIR + '\n') except OSError: pass try: with urlopen(elem.url) as response, open(full_path, 'wb') as out_file: shutil.copyfileobj(response, out_file) elem.url = full_path except HTTPError as err: logging.warning('HTTP error %s %d %s', elem.url, err.code, err.reason)
def pdf_to_svg(elem, doc): """ Convert a pdf to svg """ if not isinstance(elem, Image): return # We don't want urls, you have to download them first if is_valid_url(elem.url): return mimet, _ = mimetypes.guess_type(elem.url) flag, file_ext = ('--export-plain-svg', 'svg') if mimet == 'application/pdf' and flag: base_name, _ = os.path.splitext(elem.url) target_name = base_name + "." + file_ext try: mtime = os.path.getmtime(target_name) except OSError: mtime = -1 if mtime < os.path.getmtime(elem.url): cmd_line = ['inkscape', flag, target_name, elem.url] sys.stderr.write("Running %s\n" % " ".join(cmd_line)) subprocess.call(cmd_line, stdout=sys.stderr.fileno()) elem.url = target_name
def get_har( self, remove_domain_request=True, domains_to_remove={ 'facebook.com', 'facebook.it', 'youtube.it', 'youtube.com', 'twitter.it', 'twitter.com' }, file_type_to_remove={'jpg', 'png', 'jpeg'}): result = list() if self.logging and self.logs: domain = None if remove_domain_request: domain = utils.get_domain(self.current_url) for log in self.logs: message = json.load(StringIO(log['message']))['message'] if 'method' in message: method = message['method'] if method and method == 'Network.responseReceived': url = message['params']['response']['url'] if utils.is_valid_url(url): to_insert = (domain and not utils.is_domain_link( url, domain)) or domain is None to_insert = to_insert and utils.get_filetype_from_url( url) not in file_type_to_remove if to_insert: for d in domains_to_remove: if utils.is_domain_link(url, d): to_insert = False break if to_insert: result.append(url) result = list(set(result)) #print('har len: ' + str(len(result))) return result
def download_album(host, url, name, dest=".", delim=" - ", digits=3, number=1): if not is_valid_url(url): sys.exit(1) host = host.lower() name = name.lower() if host == "imagebam": imagebam(url, name, dest, delim, digits, number) elif host == "imagevenue": imagevenue(url, name, dest, delim, digits, number) elif host == "imgbox": imgbox(url, name, dest, delim, digits, number) elif host == "imgur": imgur(url, name, dest, delim, digits, number) elif host == "someimage": someimage(url, name, dest, delim, digits, number) elif host == "upix": upix(url, name, dest, delim, digits, number) elif host == "hotflick": hotflick(url, name, dest, delim, digits, number) elif host == "myceleb": myceleb(url, name, dest, delim, digits, number) elif host == "mangastream": mangastream(url, name, dest, delim, digits, number) else: print "ERROR: Unsupported image host '{}'".format(host)
def crawl(url_list): q = deque(url_list) processed = set(url_list) domain = 'http://www.ourcampaigns.com/' count = 0 sw = StopWatch() while q: current_url = q.popleft() result = html_to_json(domain + current_url) if result is None: print ' skip', current_url continue category, uid = tokenize(current_url) if category == 'race': components = result['RACE DETAILS']['Parents'][0]['text'].split( '>') if len(components) <= 2: print ' Bad', components, current_url continue if components[1].strip() != 'United States': continue position = campactify(components[-2] + components[-1]) year = int(result['RACE DETAILS']['Term Start'][0]['text'].split( '-')[0].split(',')[-1].strip()) if year > 2017 or year < 1900: continue description = 'race_{}_{}'.format(position, year) elif category == 'candidate': name = campactify(result['CANDIDATE DETAILS']['Name'][0]['text']) description = 'candidate_{}'.format(name) elif category == 'container': name = campactify(result['INCUMBENT']['Name'][0]['text']) year = result['INCUMBENT']['Won'][0]['text'].split('/')[-1].strip() description = 'container_{}_{}'.format(name, year) count += 1 if count % 500 == 0: print '{}, crawling {}'.format(count, description) for table_title, table in result.iteritems(): camel_title = to_camel(table_title) if camel_title not in [ 'LastGeneralElection', 'PrimaryOtherSchedule' ]: with open( os.path.join( JSON_DIR, '{}_{}_{}.json'.format(description, uid, camel_title)), 'wb') as fp: json.dump(table, fp) if category == 'race' and 'Governor' not in description and 'Mayor' not in description: continue for row_title, row in table.iteritems(): for cell in row: link = cell['link'] if link not in processed and is_valid_url(link): q.append(link) processed.add(link) sw.tic('crawl {} urls'.format(count))
async def remove_award(self, ctx, user: UserConverter, url: str): ''' !add_award @user url [Admin only] Removes an award from a user ''' if not is_valid_url(url): raise InvalidUrl() await self.bot.remove_award(ctx, user, url) await ctx.send('Done.')
async def set_award(self, ctx, url: str): ''' !set_award url [Admin only] Sets an award for current challenge ''' if not is_valid_url(url): raise InvalidUrl() await self.bot.set_award(ctx, url) await ctx.send('Done.')
def generate_features(self, url_info, svm_feature=False): # preprocess the url_info self.features = [] if is_valid_url(url_info[0]): print url_info[0], len(url_info[0]) for feature_obj in self.pipeline: self.features.append( feature_obj.extract(url_info, svm_feature) ) else: self.features = []
def crawl(url): q = deque([url]) with open('processed.txt', 'rb') as fp: processed = set(fp.read().split()) processed.add(url) domain = 'http://www.ourcampaigns.com/' while q: current_url = q.popleft() if current_url.startswith(domain): current_url = current_url[len(domain):] result = html_to_json(domain + current_url) if result is None: print ' skip', current_url continue category, uid = tokenize(current_url) if category == 'race': components = result['RACE DETAILS']['Parents'][0]['text'].split( '>') if len(components) <= 2: print ' Bad', components, current_url continue if components[1].strip() != 'United States': continue position = campactify(components[-2] + components[-1]) year = int(result['RACE DETAILS']['Term Start'][0]['text'].split( '-')[0].split(',')[-1].strip()) if year > 2016 or year < 1950: continue description = 'race_{}_{}'.format(position, year) elif category == 'candidate': name = campactify(result['CANDIDATE DETAILS']['Name'][0]['text']) description = 'candidate_{}'.format(name) elif category == 'container': name = campactify(result['INCUMBENT']['Name'][0]['text']) year = result['INCUMBENT']['Won'][0]['text'].split('/')[-1].strip() description = 'container_{}_{}'.format(name, year) # print ' ' + description, current_url for table_title, table in result.iteritems(): camel_title = to_camel(table_title) if camel_title not in [ 'LastGeneralElection', 'PrimaryOtherSchedule' ]: with open( 'data/{}_{}_{}.json'.format(description, uid, camel_title), 'wb') as fp: json.dump(table, fp) if category == 'race' and 'Governor' not in description: continue for row_title, row in table.iteritems(): for cell in row: link = cell['link'] if is_valid_url(link) and link not in processed: q.append(link) processed.add(link) with open('processed.txt', 'wb') as fp: fp.write('\n'.join(processed))
def on_new_url(self, request): error = None if request.method == "POST": url = request.form["url"] if not is_valid_url(url): error = "URL is not valide" else: url_id = insert_url(self.redis, url) return redirect("/%s_details" % url_id) return self.render_template("new_url.html", error=error)
def test_is_valid_url(self): self.assertEqual(is_valid_url('https://google.com'), True) self.assertEqual(is_valid_url(\ 'https://www.google.com/search?q=the+simpsons'), True) self.assertEqual(is_valid_url('https://google.co'), False) self.assertEqual(is_valid_url('//google.com'), False) self.assertEqual(is_valid_url('google.com'), False) self.assertEqual(is_valid_url('google'), False) self.assertEqual(is_valid_url(''), False)
def test_is_valid_url(self): self.assertEqual(is_valid_url('https://google.com'), True) self.assertEqual(is_valid_url(\ 'https://www.google.com/search?q=the+simpsons'), True) self.assertEqual(is_valid_url('https://google.co'), False) self.assertEqual(is_valid_url('//google.com'), False) self.assertEqual(is_valid_url('google.com'), False) self.assertEqual(is_valid_url('google'), False) self.assertEqual(is_valid_url(''), False)
def _retrieve_outbound_links(self): result = dict() principal_domain = utils.get_principal_domain(self._url) regex = "//*[@href and not(@href [contains(., '%s')])]" % principal_domain elements_with_urls = self._tree_explorer.xpath(self.body_node, regex) for element in elements_with_urls: href = element.attrib['href'] if utils.is_valid_url(href): href = utils.clean_url(href) if href not in result: result[href] = '' return list(result.keys())
def add_url(self, request): error = None url = None if request.method == 'POST': url = request.form['url'] if not is_valid_url(url): error = 'Please enter a valid URL' else: short_id = self.insert_url(url) return redirect(f'/{short_id}+') return self.render_template('new_url.html', error=error, url=url)
def on_new_url(self, request): error = None url = "" if request.method == "POST": url = request.form["url"] if not is_valid_url(url): error = "Invalid url" else: short_id = insert_url(self.redis, url) return redirect('/%s_details' % short_id) return self.render_template("new_url.html", error=error, url=url)
def on_new_url(self, request): error = None url = "" if request.method == 'POST': url = request.form['url'] if not is_valid_url(url): error = 'invalid url' else: id = insert_url(self.redis, url) return redirect(b'/%s_details' % id) return self.render_template("new_url.html", error=error, url=url)
def on_new_url(self, request): error = None url = "" if request.method == "POST": url = request.form['url'] if not is_valid_url(url): error = 'invalid url' else: id = insert_url(self.redis, url) if type(id) == bytes: return redirect('%s_details' % id.decode('utf-8')) return redirect('/%s_details' % id) return self.render_template("new_url.html", error=error, url=url)
def on_new_url(self, request): error = None url = "" if request.method == 'POST': url = request.form['url'] if is_valid_url(url): url_id = insert_url(self.redis, url) return redirect('%s/detail' % url_id.decode('utf-8')) error = 'URL is not valid' return self.render_template("new_url.html", error=error, url=url)
async def handle_shortify(request): """ Хендлер обрабатывающий запросы на сокращение ссылок """ data = await request.post() db = request.app["db"] url = data.get("url") user_url = data.get("user_url") if not url: return aiohttp_jinja2.render_template("index.html", request, {"error": ERRORS["without_url"]}) if not utils.is_valid_url(url): return aiohttp_jinja2.render_template("index.html", request, {"error": ERRORS["invalid_url"]}) if user_url: exists = await db.get(user_url) if exists: return aiohttp_jinja2.render_template( "index.html", request, {"error": ERRORS["busy_url"]}) short_url = user_url else: short_url = await db.get(url) # Если ссылка уже есть в базе, то не плодим новые короткие ссылки if short_url: return aiohttp_jinja2.render_template( "index.html", request, { "shortened_url": "{}:{}/{}".format(settings.HOST, settings.PORT, short_url.decode("UTF-8")) }) link_count = await db.incr(settings.DB_LINKS_COUNT_KEY) short_url = utils.encode(link_count) exists = await db.get(short_url) while exists: link_count = await db.incr(settings.DB_LINKS_COUNT_KEY) short_url = Shortener.encode(link_count) exists = await db.get(short_url) # Заносим оба варианта в базу, чтобы получить возможно проверять наличие полного url в базе # Это позволит избежать наличия дубликатов await db.set(short_url, url) await db.set(url, short_url) return aiohttp_jinja2.render_template( "index.html", request, { "shortened_url": "{}:{}/{}".format(settings.HOST, settings.PORT, short_url) })
def on_new_url(self, request): error = None url = '' if request.method == 'POST': url = request.form['url'] if(is_valid_url(url) == False): error = 'Not valid url' else: id = insert_url(self.redis,url) return redirect('%s'%id) return self.render_template("new_url.html", error=error, url=url)
def generate_links(link, base_link): """docstring for generate_links""" a_url_list = [] try: response = requests.get(link) # BeautifulSoup generate DOM tree for html document # for searching and manuplation of dom document dom_tree = BeautifulSoup.BeautifulSoup(response.text) a_element_list = dom_tree.fetch('a') # a elements in html doc for a_element in a_element_list: a_url = a_element.get('href') if utils.is_valid_url(a_url, base_link): a_url_list.append(utils.sanitize_url(a_url, base_link)) except: return a_url_list return a_url_list
def __crawl(self, page, level): if page not in self.__page_content: self.read(page) if self.__progress_bar: print(".", end="") if level < self.__deep: result = [] searchers.FindLink("").do(page, self.__page_content[page], result) for link in result: new_page = link if utils.is_valid_url( link) else urllib.parse.urljoin(self.__url, link) self.__crawl(new_page, level + 1)
def get(self, url): '''deliver pages to bots''' try: logging.info('Serving %s to %s', url, self.request.headers['User-Agent']) self.set_header('content-type', 'text/html') if not utils.is_valid_url(url): self.send_error(400) return content = datastore.get_page(utils.to_pretty_url(url)) if content is None: self.send_error(502) else: self.write(content) except Exception, e: logging.error('Error getting page for crawler', exc_info=True)
def download_album(host, url, name, dest=".", delim=" - ", digits=3, number=1): if not is_valid_url(url): sys.exit(1) host = host.lower() name = name.lower() if host == "imagebam": imagebam(url, name, dest, delim, digits, number) elif host == "imagevenue": imagevenue(url, name, dest, delim, digits, number) elif host == "imgbox": imgbox(url, name, dest, delim, digits, number) elif host == "imgur": imgur(url, name, dest, delim, digits, number) else: print "ERROR: Unsupported image host '{}'".format(host)
def on_new_url(self, request): error = None url = "" if request.method == "POST": url = request.form['url'] if not is_valid_url(url): error = 'invalid url' else: id = insert_url(self.redis, url) return redirect('/%s_details' % str(id)) # TODO: Проверить что метод для создания новой ссылки "POST" # Проверить валидность ссылки используя is_valid_url # Если ссылка верна - создать запись в базе и # отправить пользователя на детальную информацию # Если неверна - написать ошибку return self.render_template("new_url.html", error=error, url=url)
def _add_job(self, update: telegram.Update, context: telegram.ext.CallbackContext): f""" Callback for the update of a job. Message must be: ``` {Bot.ADD_USAGE} ``` """ user = update.effective_chat.id try: # Extract info. url = context.args[0] # Check url validity. if not utils.is_valid_url(url): update.message.reply_text(f"{url} is not a valid url.", disable_web_page_preview=True) logging.warning(f"Invalid url from user {user}.") return # Check minimum time freq = int(context.args[1]) if freq < self._minimum_interval: update.message.reply_text( f"{self._minimum_interval} minutes is the minimum time. I'll just set it for you." ) freq = self._minimum_interval keywords = context.args[2::] if len(context.args) > 2 else list() # Update database. job = Job(user, url, freq, keywords) Database(self._database_file).add_job(job) # Schedule job. self._schedule(job) # Send back a response as a confirmation. response = f"Will start searching {url} for links containing {', '.join(keywords)} every {freq} minutes." update.message.reply_text(response, disable_web_page_preview=True) logging.info(f"/add command received by user: {user}. {response}") except (IndexError, ValueError): update.message.reply_text(f"Usage: {Bot.ADD_USAGE}") logging.warning(f"Inappropriate /add command from user {user}.")
def set_webhook(url: str): """ Accepts a url string and checks to see if it meets HTTP/s specifications. If it does, it returns the string. If it does not, it raises an ArgumentError. :url: A string representing the webhook's url """ try: if url is None: return None if utils.is_valid_url(url=url): return url except Exception as e: # Should catch type errors raise errors.ArgumentError( {'webhook': (e.__class__, url, e.__str__())}) # Was something wrong with URL format raise errors.ArgumentError({ 'webhook': (ValueError, url, 'Webhook URL not meet HTTP/s specifications.') })
async def parse_title_args(self, ctx, guild_id, *_args): args = [ x for x in _args ] params = {} params['user'] = ctx.message.author params['pool'] = 'main' # todo: make so default pool isn't main but the first pool in a challenge params['guild_id'] = guild_id for i, arg in enumerate(args): if is_valid_url(arg): params['url'] = arg args[i] = None usr = await user_or_none(ctx, arg) if usr: params['user'] = usr args[i] = None if await self.bot.has_pool(ctx, arg, params['guild_id']): params['pool'] = arg args[i] = None args = [ arg for arg in args if arg != None ] if params['url']: title_info = self.bot.get_api_title_info(params['url']) params['title_name'] = title_info.name params['score'] = title_info.score params['duration'] = title_info.duration params['num_of_episodes'] = title_info.num_of_episodes params['difficulty'] = title_info.difficulty if len(args) > 1: raise BotErr('Bad argumnets') if len(args) == 1: params['title_name'] = args[0] return params
def url_post(): """Saves URLs if ok, else wont (?)""" data = request.json.copy() if "url" not in list(data.keys()): return jsonify(error="URL not provided"), 400 else: if "code" not in list(data.keys()): code = utils.gen_code() else: code = data.get('code') if not utils.is_valid_code(code): return jsonify(error="Code not valid"), 409 _, exists = utils.code_exists(code) if exists: return jsonify(error="Code in use"), 422 else: if utils.is_valid_url(data.get('url')): utils.insert_url(data.get('url'), code) else: return jsonify(error="URL not valid"), 409 return jsonify(code=code), 201
def fetch_reviews(self): print('Fetching reviews...', flush=True) self.lookup = {} reviews = [] has_next = True set_locale(self.url) if not is_valid_url(self.url): print('[ERROR] URL is not valid: ' + self.url, flush=True) return None self.driver.get(self.url) try: self.driver.find_element_by_id('taplc_location_review_filter_controls_0_filterLang_ALL').click() except: pass while has_next: time.sleep(SECONDS_BETWEEN_REQUEST + 0.5) reviews_parsed = self.__parse_page() if len(reviews_parsed) == 0: break reviews += reviews_parsed print('Fetched reviews: ' + str(len(reviews)), flush=True) try: has_next = self.driver.execute_script( 'return !document.querySelector(".ui_pagination>.next").classList.contains("disabled")') except: has_next = False if has_next: self.driver.execute_script( 'document.querySelector(".ui_pagination>.next").click()') return [r.__dict__ for r in reviews]
def post(self, username): self.response.headers['Content-Type'] = 'text/plain' callback = self.request.get('hub.callback', '') topic = self.request.get('hub.topic', '') verify_type_list = [s.lower() for s in self.request.get_all('hub.verify')] verify_token = unicode(self.request.get('hub.verify_token', '')) secret = unicode(self.request.get('hub.secret', '')) or None lease_seconds = self.request.get('hub.lease_seconds', str(constants.DEFAULT_LEASE_SECONDS)) mode = self.request.get('hub.mode', '').lower() error_message = None if not callback or not utils.is_valid_url(callback): error_message = 'Invalid parameter: hub.callback' else: callback = utils.unicode_to_iri(callback) if not topic or not utils.is_valid_url(topic): error_message = 'Invalid parameter: hub.topic' else: topic = utils.unicode_to_iri(topic) enabled_types = [vt for vt in verify_type_list if vt in ('async', 'sync')] if not enabled_types: error_message = 'Invalid values for hub.verify: %s' % (verify_type_list,) else: verify_type = enabled_types[0] if mode not in ('subscribe', 'unsubscribe'): error_message = 'Invalid value for hub.mode: %s' % mode if lease_seconds: try: old_lease_seconds = lease_seconds lease_seconds = int(old_lease_seconds) if not old_lease_seconds == str(lease_seconds): raise ValueError except ValueError: error_message = ('Invalid value for hub.lease_seconds: %s' % old_lease_seconds) if error_message: logging.debug('Bad request for mode = %s, topic = %s, ' 'callback = %s, verify_token = %s, lease_seconds = %s: %s', mode, topic, callback, verify_token, lease_seconds, error_message) self.response.out.write(error_message) return self.response.set_status(400) try: # Retrieve any existing subscription for this callback. sub = hubmodel.HubSubscription.get_by_key_name( hubmodel.HubSubscription.create_key_name(callback, topic)) # Deletions for non-existant subscriptions will be ignored. if mode == 'unsubscribe' and not sub: return self.response.set_status(204) # Enqueue a background verification task, or immediately confirm. # We prefer synchronous confirmation. if verify_type == 'sync': if hooks.execute(confirm_subscription, mode, topic, callback, verify_token, secret, lease_seconds): return self.response.set_status(204) else: self.response.out.write('Error trying to confirm subscription') return self.response.set_status(409) else: if mode == 'subscribe': hubmodel.HubSubscription.request_insert(callback, topic, verify_token, secret, lease_seconds=lease_seconds) else: hubmodel.HubSubscription.request_remove(callback, topic, verify_token) logging.debug('Queued %s request for callback = %s, ' 'topic = %s, verify_token = "%s", lease_seconds= %s', mode, callback, topic, verify_token, lease_seconds) return self.response.set_status(202) except (apiproxy_errors.Error, db.Error, runtime.DeadlineExceededError, taskqueue.Error): logging.exception('Could not verify subscription request') self.response.headers['Retry-After'] = '120' return self.response.set_status(503)
engine.execute( text(""" UPDATE posts_rssfeed SET updated_at=:updated_at WHERE id=:rss_id """), { 'updated_at': updated_at, 'rss_id': parser['rss_id'] }) internal_entries = [] raw_external_entries = [] for entry in parser['entries']: if entry.get('link'): link = entry['link'] else: if entry.get('id') and is_valid_url(entry['id']): link = entry['id'] else: print("Unfortunately this news doesn\'t have a link.") print(entry) continue content = None if entry.get('summary'): text_to_search_in = entry['summary'] elif entry.get('content') and entry['content'][0]['value']: content = remove_html_tags(entry['content'][0]['value']) text_to_search_in = content else: text_to_search_in = ''
class Sms: DESCRIPTION = 'Provides authentication using an SMS code.' CONFIG = { 'msg': 'Enter in __URL__ and put this code __CODE__', 'registration-action': { 'mode': 'vote', 'mode-config': None, }, 'authentication-action': { 'mode': 'vote', 'mode-config': None, } } PIPELINES = { 'give_perms': [{ 'object_type': 'UserData', 'perms': [ 'edit', ], 'object_id': 'UserDataId' }, { 'object_type': 'AuthEvent', 'perms': [ 'vote', ], 'object_id': 'AuthEventId' }], "register-pipeline": [ ["check_whitelisted", { "field": "tlf" }], ["check_whitelisted", { "field": "ip" }], ["check_blacklisted", { "field": "ip" }], ["check_blacklisted", { "field": "tlf" }], ["check_total_max", { "field": "ip", "max": 8 }], ["check_total_max", { "field": "tlf", "max": 7 }], ["check_total_max", { "field": "tlf", "period": 1440, "max": 5 }], ], "authenticate-pipeline": [ #['check_total_connection', {'times': 5 }], #['check_sms_code', {'timestamp': 5 }] ], "resend-auth-pipeline": [ ["check_whitelisted", { "field": "tlf" }], ["check_whitelisted", { "field": "ip" }], ["check_blacklisted", { "field": "ip" }], ["check_blacklisted", { "field": "tlf" }], ["check_total_max", { "field": "tlf", "period": 3600, "max": 5 }], [ "check_total_max", { "field": "tlf", "period": 3600 * 24, "max": 15 } ], ["check_total_max", { "field": "ip", "period": 3600, "max": 10 }], [ "check_total_max", { "field": "ip", "period": 3600 * 24, "max": 20 } ], ] } USED_TYPE_FIELDS = ['tlf'] tlf_definition = { "name": "tlf", "type": "text", "required": True, "min": 4, "max": 20, "required_on_authentication": True } code_definition = { "name": "code", "type": "text", "required": True, "min": 6, "max": 255, "required_on_authentication": True } CONFIG_CONTRACT = [{ 'check': 'isinstance', 'type': dict }, { 'check': 'dict-keys-exact', 'keys': ['msg', 'registration-action', 'authentication-action'] }, { 'check': 'index-check-list', 'index': 'msg', 'check-list': [{ 'check': 'isinstance', 'type': str }, { 'check': 'length', 'range': [1, 200] }] }, { 'check': 'index-check-list', 'index': 'registration-action', 'check-list': [{ 'check': 'isinstance', 'type': dict }, { 'check': 'dict-keys-exact', 'keys': ['mode', 'mode-config'] }, { 'check': 'index-check-list', 'index': 'mode', 'check-list': [{ 'check': 'isinstance', 'type': str }, { 'check': 'lambda', 'lambda': lambda d: d in ['vote', 'go-to-url'] }] }, { 'check': 'switch-contract-by-dict-key', 'switch-key': 'mode', 'contract-key': 'mode-config', 'contracts': { 'vote': [{ 'check': 'lambda', 'lambda': lambda d: d is None }], 'go-to-url': [{ 'check': 'isinstance', 'type': dict }, { 'check': 'dict-keys-exact', 'keys': ['url'] }, { 'check': 'index-check-list', 'index': 'url', 'check-list': [{ 'check': 'isinstance', 'type': str }, { 'check': 'length', 'range': [1, 400] }, { 'check': 'lambda', 'lambda': lambda d: is_valid_url(d, schemes=['https']) }] }] } }] }, { 'check': 'index-check-list', 'index': 'authentication-action', 'check-list': [{ 'check': 'isinstance', 'type': dict }, { 'check': 'dict-keys-exact', 'keys': ['mode', 'mode-config'] }, { 'check': 'index-check-list', 'index': 'mode', 'check-list': [{ 'check': 'isinstance', 'type': str }, { 'check': 'lambda', 'lambda': lambda d: d in ['vote', 'go-to-url'] }] }, { 'check': 'switch-contract-by-dict-key', 'switch-key': 'mode', 'contract-key': 'mode-config', 'contracts': { 'vote': [{ 'check': 'lambda', 'lambda': lambda d: d is None }], 'go-to-url': [{ 'check': 'isinstance', 'type': dict }, { 'check': 'dict-keys-exact', 'keys': ['url'] }, { 'check': 'index-check-list', 'index': 'url', 'check-list': [{ 'check': 'isinstance', 'type': str }, { 'check': 'length', 'range': [1, 400] }, { 'check': 'lambda', 'lambda': lambda d: is_valid_url(d, schemes=['https']) }] }] } }] }] def error(self, msg, error_codename): d = {'status': 'nok', 'msg': msg, 'error_codename': error_codename} return d def check_config(self, config): """ Check config when create auth-event. """ msg = '' try: check_contract(self.CONFIG_CONTRACT, config) return '' except CheckException as e: return json.dumps(e.data, cls=JsonTypeEncoder) def census(self, ae, request): req = json.loads(request.body.decode('utf-8')) validation = req.get('field-validation', 'enabled') == 'enabled' data = {'status': 'ok'} msg = '' current_tlfs = [] for r in req.get('census'): if r.get('tlf'): r['tlf'] = get_cannonical_tlf(r.get('tlf')) tlf = r.get('tlf') if isinstance(tlf, str): tlf = tlf.strip() msg += check_field_type(self.tlf_definition, tlf) if validation: msg += check_field_value(self.tlf_definition, tlf) msg += check_fields_in_request(r, ae, 'census', validation=validation) if validation: msg += exist_user(r, ae) if tlf in current_tlfs: msg += "Tlf %s repeat." % tlf current_tlfs.append(tlf) else: if msg: msg = '' continue exist = exist_user(r, ae) if exist and not exist.count('None'): continue # By default we creates the user as active we don't check # the pipeline u = create_user(r, ae, True) give_perms(u, ae) if msg and validation: return self.error("Incorrect data", error_codename="invalid_credentials") if validation: for r in req.get('census'): # By default we creates the user as active we don't check # the pipeline u = create_user(r, ae, True) give_perms(u, ae) return data def register(self, ae, request): req = json.loads(request.body.decode('utf-8')) msg = check_pipeline(request, ae) if msg: return self.error("Incorrect data", error_codename="invalid_credentials") # create the user as active? Usually yes, but the execute_pipeline call inside # check_fields_in_request might modify this req['active'] = True msg = '' if req.get('tlf'): req['tlf'] = get_cannonical_tlf(req.get('tlf')) tlf = req.get('tlf') if isinstance(tlf, str): tlf = tlf.strip() msg += check_field_type(self.tlf_definition, tlf) msg += check_field_value(self.tlf_definition, tlf) msg += check_fields_in_request(req, ae) if msg: return self.error("Incorrect data", error_codename="invalid_credentials") # get active from req, this value might have changed in check_fields_in_requests active = req.pop('active') msg_exist = exist_user(req, ae, get_repeated=True) if msg_exist: u = msg_exist.get('user') if u.is_active: return self.error("Incorrect data", error_codename="invalid_credentials") else: u = create_user(req, ae, active) msg += give_perms(u, ae) if msg: return self.error("Incorrect data", error_codename="invalid_credentials") elif not active: # Note, we are not calling to extend_send_sms because we are not # sending the code in here return {'status': 'ok'} result = plugins.call("extend_send_sms", ae, 1) if result: return self.error("Incorrect data", error_codename="invalid_credentials") send_codes.apply_async(args=[[ u.id, ], get_client_ip(request)]) return {'status': 'ok'} def authenticate(self, ae, request): req = json.loads(request.body.decode('utf-8')) msg = '' if req.get('tlf'): req['tlf'] = get_cannonical_tlf(req.get('tlf')) tlf = req.get('tlf') if isinstance(tlf, str): tlf = tlf.strip() msg += check_field_type(self.tlf_definition, tlf, 'authenticate') msg += check_field_value(self.tlf_definition, tlf, 'authenticate') msg += check_field_type(self.code_definition, req.get('code'), 'authenticate') msg += check_field_value(self.code_definition, req.get('code'), 'authenticate') msg += check_fields_in_request(req, ae, 'authenticate') if msg: return self.error("Incorrect data", error_codename="invalid_credentials") try: u = User.objects.get(userdata__tlf=tlf, userdata__event=ae, is_active=True) except: return self.error("Incorrect data", error_codename="invalid_credentials") code = Code.objects.filter( user=u.userdata, code=req.get('code').upper()).order_by('-created').first() if not code: return self.error("Incorrect data", error_codename="invalid_credentials") msg = check_pipeline(request, ae, 'authenticate') if msg: return self.error("Incorrect data", error_codename="invalid_credentials") msg = check_metadata(req, u) if msg: return self.error("Incorrect data", error_codename="invalid_credentials") u.save() data = {'status': 'ok'} data['username'] = u.username data['auth-token'] = genhmac(settings.SHARED_SECRET, u.username) # add redirection auth_action = ae.auth_method_config['config']['authentication-action'] if auth_action['mode'] == 'go-to-url': data['redirect-to-url'] = auth_action['mode-config']['url'] return data def resend_auth_code(self, ae, request): req = json.loads(request.body.decode('utf-8')) msg = '' if req.get('tlf'): req['tlf'] = get_cannonical_tlf(req.get('tlf')) tlf = req.get('tlf') if isinstance(tlf, str): tlf = tlf.strip() msg += check_field_type(self.tlf_definition, tlf, 'authenticate') msg += check_field_value(self.tlf_definition, tlf, 'authenticate') if msg: return self.error("Incorrect data", error_codename="invalid_credentials") try: u = User.objects.get(userdata__tlf=tlf, userdata__event=ae, is_active=True) except: return self.error("Incorrect data", error_codename="invalid_credentials") msg = check_pipeline(request, ae, 'resend-auth-pipeline', Sms.PIPELINES['resend-auth-pipeline']) if msg: return self.error("Incorrect data", error_codename="invalid_credentials") result = plugins.call("extend_send_sms", ae, 1) if result: return self.error("Incorrect data", error_codename="invalid_credentials") send_codes.apply_async(args=[[ u.id, ], get_client_ip(request)]) return {'status': 'ok'}