def snippet(self, length=100): """returns a snippet of a particular length (default=100) without tags""" snippet_length = len(self.content) if snippet_length > length: snippet_length = length plain_text = strip_tags(self.content) return strip_tags(plain_text[0:snippet_length])
def page_mod_shortcodes(page, shortcodes): """takes in a page and alters using shortcodes, returns new page""" # theory, as shortcodes are processed, these turn into additional key value pairs on the page object # [[template sidebar-left.html]] would add page['template'] = 'sidebar-left.html' # if the first content keyword is "page" it fetches the page content referenced shortcode_tag = app.config['SHORTCODE_TAG'] shortcode_endtag = app.config['SHORTCODE_ENDTAG'] for shortcode in shortcodes: # remove tag markers and split on whitespace scs = shortcode scs = scs.replace(shortcode_tag, '') scs = scs.replace(shortcode_endtag, '') sclist = scs.split() if len(sclist) > 1: if sclist[0].lower() == "page": key = strip_tags(sclist[1].lower()) slug = strip_tags(sclist[2].strip()) cpage = g.db.pages.find_one({'slug': slug}) page[key] = cpage['content'] else: key = strip_tags(sclist[0]).strip( ) # have to strip_tags because of multiline shortcode value = scs[scs.find(key) + len(key):].strip( ) # exclude just the tag from the content page[key] = value # ensure that a default page-template is set (if it was not set in the shortcodes) page['template'] = page.get('template', 'page.html') page['theme'] = page.get('theme', 'default') if page['theme'] in bootswatch_themes: # bootswatch_themes are default theme set # slight weakness in design that we are dependent on SiteMeta navbackground setting g.theme = page['theme'] page['theme'] == 'default' theme_path = os.path.join(BASE_DIR, 'templates', app.config['THEME_DIR'], page['theme']) if not os.path.exists(theme_path) or page['theme'] == '': # in case user selected a non-existent theme page['theme'] = 'default' # note add some error trapping here, # if somehow there is a non-existent template, we should flash error and put them on default # page template. page['template'] = "{}/{}/{}".format(app.config['THEME_DIR'], page['theme'], page['template']) # remove shortcodes from display content for shortcode in shortcodes: page['content'] = page['content'].replace(shortcode, '') return page
def getSurveyQuestions(surveyId, dataCenter, apiToken): print("GETTING SURVEY QUESTIONS...\n") # Step 1: Set API call parameters baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/questions/".format( dataCenter, surveyId) headers = { "content-type": "application/json", "x-api-token": apiToken, } # Step 2: Make the API CALL questionRequestUrl = baseUrl downloadRequestResponse = requests.request("GET", questionRequestUrl, headers=headers, stream=True) surveyQuestionData = json.loads( downloadRequestResponse.content)['result']['elements'] # Step 3: Run through json object to get needed information # set questionData dict questionData = dict() # set counter counter = 0 # set question type tracker questionTypes = [] for question in surveyQuestionData: # increment counter counter += 1 # conditionally set choices dict choices = "none" if "Choices" in question: choices = question['Choices'] # set new question data dict questionData[question['QuestionID']] = { "ListPosition": counter, "QuestionText": strip_tags(question['QuestionText']), "QuestionDescription": strip_tags(question['QuestionDescription']), "QuestionType": strip_tags(question['QuestionType']), "QuestionChoices": choices } # check to see if question type has been captured before if question['QuestionType'] not in questionTypes: questionTypes.append(question['QuestionType']) # overview of question analysis and processing print("THERE ARE " + str(len(questionData)) + " QUESTIONS LOADED..\n") print("THERE ARE " + str(len(questionTypes)) + " QUESTION TYPES: \n") return questionData
def main(args): print("Loading dataset...") train_set = utils.load_dataset(args.training_file) print("FILE: ", args.training_file) test_set = utils.load_dataset(args.test_file) print("Loaded dataset") print() #for algorithm, name in zip([baseline, viterbi_p1, viterbi_p2, extra], ['Baseline', 'Viterbi_p1', 'Viterbi_p2', 'extra']): for algorithm, name in zip([viterbi_p2, extra], ['Viterbi_p2', 'extra']): print("Running {}...".format(name)) testtag_predictions = algorithm(train_set, utils.strip_tags(test_set)) baseline_acc, correct_wordtagcounter, wrong_wordtagcounter = utils.evaluate_accuracies( test_set, testtag_predictions) multitags_acc, unseen_acc, = utils.specialword_accuracies( train_set, test_set, testtag_predictions) print("Accuracy: {:.2f}%".format(baseline_acc * 100)) print("\tTop K Wrong Word-Tag Predictions: {}".format( utils.topk_wordtagcounter(wrong_wordtagcounter, k=4))) print("\tTop K Correct Word-Tag Predictions: {}".format( utils.topk_wordtagcounter(correct_wordtagcounter, k=4))) print("\tMultitags Accuracy: {:.2f}%".format(multitags_acc * 100)) print("\tUnseen words Accuracy: {:.2f}%".format(unseen_acc * 100)) print()
def process_item(self, article, spider): doc = Document(article['text']) article['text'] = strip_tags(doc.summary()) article['hash'] = hashlib.sha256(article['url']).hexdigest() return article
def snippet(self): text = strip_tags(self.content) snippet_length = len(text) if snippet_length > 250: snippet_length = 250 return text[0:snippet_length]
def extract_google_query_results(html): soup = BeautifulSoup(html, 'html.parser') # result block from google result_blocks = soup.find_all("div", class_="g") query_results = [] for result_block in result_blocks: a = result_block.find("a") em = result_block.find("span", "st") href = extract_info(lambda: a['href']) title = extract_info(lambda: "".join(list(a.strings))) if not href or not href.startswith("http"): try: href = a['data-href'] except: href = a['href'] if href.startswith("/url"): href = href.strip("/url?q=") i = href.find("&sa") if i != -1: href = href[:i] description = extract_info( lambda: strip_tags("".join(list(em.strings)))) query_results.append( QueryResult(unicode(title), unicode(href), unicode(description))) return query_results
def main(args): print("Loading dataset...") train_set = utils.load_dataset(args.training_file) test_set = utils.load_dataset(args.test_file) print("Loaded dataset") print() algorithms = { "baseline": baseline, "viterbi_1": viterbi_1, "viterbi_2": viterbi_2, "viterbi_ec": viterbi_ec } algorithm = algorithms[args.algorithm] print("Running {}...".format(args.algorithm)) testtag_predictions = algorithm(train_set, utils.strip_tags(test_set)) baseline_acc, correct_wordtagcounter, wrong_wordtagcounter = utils.evaluate_accuracies( testtag_predictions, test_set) multitags_acc, unseen_acc, = utils.specialword_accuracies( train_set, testtag_predictions, test_set) print("Accuracy: {:.2f}%".format(baseline_acc * 100)) print("\tMultitags Accuracy: {:.2f}%".format(multitags_acc * 100)) print("\tUnseen words Accuracy: {:.2f}%".format(unseen_acc * 100)) print("\tTop K Wrong Word-Tag Predictions: {}".format( utils.topk_wordtagcounter(wrong_wordtagcounter, k=4))) print("\tTop K Correct Word-Tag Predictions: {}".format( utils.topk_wordtagcounter(correct_wordtagcounter, k=4))) print()
def process_feed_item(current): text = current.get('content', current.get('summary', None)) if text is None: return if isinstance(text, list): text = text[0] if isinstance(text, dict): text = text['value'] return utils.strip_tags(text).strip()
def _book_markdown(book): book_md = (f"*{book['title']}* \n" f"{', '.join(book['authors'])}\n\n") description = book.get('description') if description: book_md = book_md + f"{book['description'][:200]}... " book_md = book_md + f"[На сайте 🌎]({book['link']})\n" return strip_tags(book_md)
def post_html(contents, title, permalink, taglist, stream_only, metadata, scrutinize = True, allow_comments = True, Patreon_type = "blog"): head = [] post_content = blog_server_shared.postprocess_post_string(contents, metadata["id"], title, False, scrutinize)[0] head.append ("<script>window.elidupree.handle_content_warnings ('"+ metadata ["id"]+"', false)</script>" ) next_transcript_number = 1 while True: transcript_generator = re.search(r"<transcript"+ blog_server_shared.grouped_string_regex("transcript_text")+">", post_content, re.DOTALL) if transcript_generator is None: break transcript_identifier_string = str(next_transcript_number)+'_'+ metadata ["id"] post_content = post_content [0: transcript_generator.start(0)]+'<div id="transcript_'+ transcript_identifier_string+'" class="transcript_block"><div class="transcript_header">Transcript: <a id="show_transcript_button_'+ transcript_identifier_string+'" href="javascript:;">(show)</a><a id="hide_transcript_button_'+ transcript_identifier_string+'" href="javascript:;">(hide)</a></div><div class="transcript_content id'+ transcript_identifier_string+'">'+ transcript_generator.group("transcript_text")+'</div></div>' + post_content [transcript_generator.end(0):] head.append('''<style> html.transcript_hidden_'''+ transcript_identifier_string +''' div.transcript_content.id'''+ transcript_identifier_string +''' {display: none;} #show_transcript_button_'''+ transcript_identifier_string +''' {display: none;} html.transcript_hidden_'''+ transcript_identifier_string +''' #show_transcript_button_'''+ transcript_identifier_string +''' {display: inline;} html.transcript_hidden_'''+ transcript_identifier_string +''' #hide_transcript_button_'''+ transcript_identifier_string +''' {display: none;} </style> <script> window.elidupree.handle_transcript ("'''+ transcript_identifier_string +'''"); </script>''') next_transcript_number = next_transcript_number + 1 if stream_only == True: cutter = re. compile ( r"<cut>.*?</p>.*$", re.DOTALL) post_content = cutter.sub ('''[...]</p> <a class="continue_reading" href="'''+ permalink +'''">Continue reading<span class="invisible"> '''+ title +'''</span>...</a>''', post_content) #this sometimes cuts off anchors, so make sure fragments point at the canonical URL post_content = re.sub ('href="#','href="' + permalink + '#', post_content) else: post_content = re.sub ("<cut>", "", post_content) calculate_readability = (stream_only != True) if calculate_readability: #using the automated readability index reference = re.sub(r"\s+", " ", html.unescape (utils.strip_tags (post_content))) sentences = len(re.findall (r"\w\w\w.*?[.?!]", reference)) words = utils.word_count (reference) characters = len(re.findall (r"\w", reference)) if words >0 and sentences >0: readability = 4.71*characters/words +0.5 *words/sentences -21.43 post_content = post_content + '<em class="debug"> Approximate readability: ' + "{:.2f}".format (readability) + " ("+ str (characters) + " characters, " + str (words) + " words, " + str (sentences) + " sentences, " + "{:.2f}".format (characters/words) + " characters per word, " + "{:.2f}".format (words/sentences) + " words per sentence)</em>" post_content_sections = post_content.split("<bigbreak>") id_str = '' if title: id_str = 'id="'+utils.format_for_url(title)+'"' post_content_sections[0] = '<h1><a class="post_title_link" href="'''+permalink+'">'+title+'</a></h1>'+post_content_sections[0] for i in range(0, len(post_content_sections)): post_content_sections[i] = '<div class="post_content_section">'+post_content_sections[i]+'</div>' return (''' <div '''+id_str+''' class="blog_post"> '''+(''.join(post_content_sections))+''' </div>'''+metadata_and_comments_section_html(title, permalink, taglist, stream_only, metadata, allow_comments = allow_comments, Patreon_type = Patreon_type), "".join (head))
def league(league): user_team = request.params.get("team") if not user_team: # a malformed POST - didn't select a team. return redirect('/%s' % league) user_team = strip_tags(user_team) new_user(user_team.decode('utf-8'), conn) response.set_cookie(league, cookie_safe(user_team), max_age=3600*24*365, path='/%s' % league) redirect('/%s' % league)
def get_random_user(message): if message.chat.type != 'private': chat_id = message.chat.id query = User.select().where(User.chat_id == chat_id, User.is_member).order_by(fn.Random()) if query: user = query[0] my_bot.reply_to(message, 'Вам выпал: <a href="tg://user?id={}">{}</a>'. format(user.user_id, strip_tags(user.first_name)), parse_mode='HTML') else: pass # TODO: log that we do not have user base else: my_bot.reply_to(message, "КТО ВЫ ТО? Я ТУТ ОДИН!")
def league(league): user_team = request.params.get("team") if not user_team: # a malformed POST - didn't select a team. return redirect('/%s' % league) user_team = strip_tags(user_team) new_user(user_team.decode('utf-8'), conn) response.set_cookie(league, cookie_safe(user_team), max_age=3600 * 24 * 365, path='/%s' % league) redirect('/%s' % league)
def recent_feed(): feed = AtomFeed('Recent Articles', feed_url=request.url, url=request.url_root) articles = (p for p in pages if 'published' in p.meta) for article in articles: feed.add(strip_tags(article['title']), unicode(article.html), content_type='html', url=article.path, author='Cameron Maske', published=article['date'], updated=article['date']) return feed.get_response()
def add_pin(post_no,board,short_com,thumb_url,time_created,replies_count): timestamp = int(time.time()) database = storage.Storage() #Pretty ghetto way to determine if string has http tags... if "<" in short_com: short_com = strip_tags(short_com) if len(short_com) > 100: short_com = short_com[0:100] short_com = short_com.replace("'","'") short_com = short_com.encode() database.add_pin(post_no,board,short_com,thumb_url,timestamp,time_created,replies_count)
def get_random_user(message): if message.chat.type != 'private': chat_id = message.chat.id query = User.select().where(User.chat_id == chat_id, User.is_member).order_by(fn.Random()) if query: user = query[0] my_bot.reply_to( message, 'Вам выпал: <a href="tg://user?id={}">{}</a>'.format( user.user_id, strip_tags(user.first_name)), parse_mode='HTML') else: pass # TODO: log that we do not have user base else: my_bot.reply_to(message, "КТО ВЫ ТО? Я ТУТ ОДИН!")
def search(self, pattern): body = self.soup.find('body') try: lines = body.prettify().split('\n') except: return [] matches = [] p = re.compile(pattern) for line in lines: line2 = utils.strip_tags(line) m = p.search(line2) if m: start = m.start() match_obj = FileParser.MatchObj(line2, start, start + len(m.group(0))) matches.append(match_obj) return matches
def inlinequery(update, context): query = update.inline_query.query user_id = update.inline_query.from_user.id page = int(update.inline_query.offset or 1) logger.info(f"query: {query}, page: {page}") try: books = goodreads_api.get_search_books(user_id, query, page=page, per_page=20) except AuthError: logger.error(f"AuthError: user_id {user_id}") result = [(InlineQueryResultArticle( id=uuid4(), title="Запустите бота!", description= "Для использования бота, нажмите на кнопку выше, и авторизуйтесь в Goodreads", input_message_content=InputTextMessageContent("None"), ))] return update.inline_query.answer(result, cache_time=0, switch_pm_text="Добавить бота", switch_pm_parameter="f") result = [] for index, book in enumerate(books): book_md = ( f"*{strip_tags(book['title'])}* \n" f"{', '.join(book['authors'])}\n" f"[На сайте 🌎](https://www.goodreads.com/book/show/{book['id']})") add_book_button = InlineKeyboardButton( "Добавить книгу 📚", callback_data=f"inlinebook {book['id']}") result.append( InlineQueryResultArticle( id=uuid4(), title=strip_tags(book["title"]), thumb_url=book["image_url"], description=f"{', '.join(book['authors'])}", input_message_content=InputTextMessageContent( book_md, ParseMode.MARKDOWN), reply_markup=InlineKeyboardMarkup([[add_book_button]]))) update.inline_query.answer(result, next_offset=page + 1)
def book(update, context): user_id = update.message.from_user.id book_id = update.message.text.split('_')[1] logger.info((f"user_id: {user_id}, " f"book_id:{book_id}")) try: book = goodreads_api.get_book(user_id, book_id) except AuthError as ex: logger.error(f"AuthError: user_id {user_id}") return context.bot.send_message(user_id, text=str(ex)) markup = _book_buttons(book.get('shelf'), book_id, user_id) update.message.reply_text(text=strip_tags(book['markdown']), parse_mode=ParseMode.MARKDOWN, reply_markup=markup)
def make_page(title, head_stuff, body_stuff, extras = {}): jQuery ='''<script type="text/javascript" src="/media/jquery-3.0.0.min.js?rr"></script>''' jQuery_before = "" jQuery_after = "" if "jQuery_before" in extras: jQuery_before = jQuery else: jQuery_after = jQuery unbranded_title = re.sub(r"\s*⊂.*", "", title) image = (extras ["blurb_image"] if "blurb_image" in extras else "/media/colorful-background.jpg?rr") cooperation_stuff = [''' <meta name="twitter:card" content="summary"> <meta name="twitter:site" content="@EliDupree"> <meta property="og:site_name" content="Eli Dupree's website"> <meta property="og:title" content="'''+ title +'''"> <!--<link rel="image_src" href="'''+ image +'''">--> <meta property="og:image" content="'''+ utils.canonical_scheme_and_domain + image +'''"> '''] if "blurb" in extras: blurb = html.escape (utils.strip_tags (extras ["blurb"])) cooperation_stuff.append (''' <meta name="description" content="'''+ blurb +'''"> <meta property="og:description" content="'''+ blurb +'''"> ''') return '''<!DOCTYPE html> <html lang="en" class="javascript_disabled'''+(' '+extras["html_class"] if "html_class" in extras else '')+'''"> <head> <meta charset="utf-8" /> <title>'''+title+'''</title> <link rel="shortcut icon" href="/favicon.ico" /> <link rel="stylesheet" type="text/css" href="'''+css.domain_relative_url()+'''?rr"> <link rel="alternate" type="application/atom+xml" href="/atom.xml" title="RSS (Atom) feed" /> <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"> '''+ "".join (cooperation_stuff) +''' <script type="text/javascript" src="/before-body.js?rr"></script> '''+ jQuery_before +''' '''+head_stuff+''' </head> <body> '''+body_stuff+''' '''+ jQuery_after +''' <script type="text/javascript" src="/after-body.js?rr"></script> '''+ (extras ["after_body"] if "after_body" in extras else "") +'''
def index(fname, index_name, keys_to_tag): fptr = open(fname, 'rb') line_count = 0 conn = ES(["localhost:9200"]) if not conn.exists_index(index_name): conn.create_index(index_name) start = time.clock() numb_exceptions = 0 for line in fptr: if ((line_count % 10000) == 0): end = time.clock() minutes = (end - start) / 60.0 print 'File: %s Done with %d took %f min. ' %(fname, line_count, minutes) print 'number of exceptions ', numb_exceptions line_count += 1 data = json.loads(line) if not data.get('tags'): continue post_id = int(data['post_id']) found_content = False for k in keys_to_tag: if data.get(k): found_content = True if not found_content: continue index_data = dict() for k in keys_to_tag: value = data.get(k) if (value and (k == 'content')): try: stripped_value = utils.strip_tags(value) except Exception: stripped_value = value index_data[k] = stripped_value if post_id and data: try: conn.index(index_data, index_name, "test-type", post_id) except Exception: numb_exceptions += 1 continue print 'number of exceptions ', numb_exceptions
def migrate_one_comment(self, thread_id, comment_key, replies, parent_id=None): comment = BloogBreakingMigration.Comment.get(comment_key) post_args = { 'request_type': urlfetch.POST, 'thread_id': thread_id, 'message': utils.strip_tags(comment.body).encode('utf-8'), 'author_name': comment.name.encode('utf-8') if comment.name else 'Someone', 'author_email': comment.email.encode('utf-8') if comment.email else '*****@*****.**', 'forum_api_key': self.forum_key, 'created_at': comment.published.strftime('%Y-%m-%dT%H:%M'), } if comment.homepage: post_args['author_url'] = comment.homepage.encode('utf-8') if parent_id: post_args['parent_post'] = parent_id post_id = disqus_request('create_post', **post_args)['message']['id'] for parent_id, replies in itertools.groupby(replies, lambda x:x[0]): parent_key = db.Key.from_path('Comment', parent_id, parent=comment_key) deferred.defer(self.migrate_one_comment, thread_id, parent_key, [x[1:] for x in replies if x[1:]], post_id)
def handle_remove_event(): """ Validate secret code and remove event, flash and redirect to frontpage """ # TODO Aikaperusteinen hidaste avainten brute force -spämmäyksen estoon if request.method == 'GET': flash(u'Käytä lomaketta') return redirect(url_for('handle_list_events')) # Get data from form remove_short_name = request.form['short_name'].strip().lower() remove_secret = request.form['secret'].strip().lower() ayah_session_secret = strip_tags(request.form['session_secret'].strip()) # Setup Are You A Human check ayah.configure(app.config['ARE_YOU_HUMAN_PUBLISHER_KEY'], app.config['ARE_YOU_HUMAN_SCORING_KEY']) # Validation if is_valid_short_name(remove_short_name) != True: flash(u'Lyhytnimi tai salainen avain ei täsmää. Tarkista tiedot ja yritä uudelleen.') return redirect(url_for('handle_list_events')) if is_valid_secret_key(remove_secret, app.config['SECRET_KEY_CHARACTERS']) != True: flash(u'Lyhytnimi tai salainen avain ei täsmää. Tarkista tiedot ja yritä uudelleen.') return redirect(url_for('handle_list_events')) if event_exists(remove_short_name) != True: flash(u'Lyhytnimi tai salainen avain ei täsmää. Tarkista tiedot ja yritä uudelleen.') return redirect(url_for('handle_list_events')) # Call Are You A Human scoring service if ayah.score_result(ayah_session_secret) == False: flash(u'Spämmitarkistus epäonnistui. Tarkista että et ole spämmirobotti ja/tai yritä uudelleen.') return redirect(url_for('handle_list_events')) if not remove_event_with_secret(remove_short_name, remove_secret): flash(u'Lyhytnimi tai salainen avain ei täsmää. Tarkista tiedot ja yritä uudelleen.') return redirect(url_for('handle_list_events')) else: flash(u'Tapahtuma poistettu onnistuneesti.') return redirect(url_for('handle_list_events'))
def on_left_clicked(self): entry = reader.get_entry() title = entry['title'] summary = entry['summary'] if summary is None: summary = entry['content'] updated = entry['updated'] updated = datetime.date.fromtimestamp(updated).isoformat() author = entry['author'] if notify.closed is True: notify.update(title, '<b>%s</b> written at <i>%s</i>\n\n%s' % (author, updated, strip_tags(summary))) notify.attach_to_widget(self) # read_more_button = gtk.Button() # icon = read_more_button.render_icon(gtk.STOCK_DIALOG_INFO, gtk.ICON_SIZE_DIALOG) # n.set_icon_from_pixbuf(icon) notify.closed = False notify.show() reader.set_read(entry) reader.iter_next()
def on_left_clicked(self): entry = reader.get_entry() title = entry['title'] summary = entry['summary'] if summary is None: summary = entry['content'] updated = entry['updated'] updated = datetime.date.fromtimestamp(updated).isoformat() author = entry['author'] if notify.closed is True: notify.update( title, '<b>%s</b> written at <i>%s</i>\n\n%s' % (author, updated, strip_tags(summary))) notify.attach_to_widget(self) # read_more_button = gtk.Button() # icon = read_more_button.render_icon(gtk.STOCK_DIALOG_INFO, gtk.ICON_SIZE_DIALOG) # n.set_icon_from_pixbuf(icon) notify.closed = False notify.show() reader.set_read(entry) reader.iter_next()
story["pages"] = [post for post in story["pages"] if "don't deploy" not in post] for post_dict in story["pages"]: index = index + 1 #post_dict["path_prefix"] = story["url"]+"/" post_dict["long_story_name"] = name post_dict["long_story_index"] = index if "listed" in story: post_dict["listed"] = True posts ["stories"].append(post_dict) for cat,post_list in posts.items(): for post_dict in post_list: if "long_story_name" not in post_dict: post_dict["path_prefix"] = "/" if cat=="" else "/"+cat+"/" post_dict["category"] = cat post_dict["word_count"] = utils.word_count (html.unescape (utils.strip_tags (post_dict ["contents"]))) if "auto_paragraphs" in post_dict: post_dict ["contents"] = utils.auto_paragraphs (post_dict ["contents"]) if cat == 'blog': post_dict['contents'] += signature for name,story in long_stories.items(): story["word_count"] = 0 for post_dict in story["pages"]: story["word_count"] = story["word_count"] + post_dict["word_count"] css.insert (''' a.small_story {display: block; padding: 0.8em 0; color: black; text-decoration: none;} a.small_story h2 {font-weight: bold; color: black;} a.small_story .blurb {font-size:71%;}
def handle_add_event(): """ Validate and add event, flash and redirect to frontpage """ # TODO Aikaperusteinen hidaste tapahtumien lisäyksen spämmäämiseen if request.method == 'GET': flash(u'Käytä lomaketta') return redirect(url_for('handle_list_events')) # Get data from form add_short_name = strip_tags(request.form['short_name'].strip()) add_name = strip_tags(request.form['name'].strip()) add_url = request.form['url'].strip().lower() add_start_time = strip_tags(request.form['start_time'].strip()) add_end_time = strip_tags(request.form['end_time'].strip()) ayah_session_secret = strip_tags(request.form['session_secret'].strip()) # Setup Are You A Human check ayah.configure(app.config['ARE_YOU_HUMAN_PUBLISHER_KEY'], app.config['ARE_YOU_HUMAN_SCORING_KEY']) # Validate short name if is_valid_short_name(add_short_name) != True: flash(u'Tapahtuman lyhytnimi ei ole kelvollinen. Tarkista tiedot ja yritä uudelleen.') return redirect(url_for('handle_list_events')) # Parse times and validate them add_start_time = parse_date(add_start_time) add_end_time = parse_date(add_end_time) if add_start_time == False: flash(u'Tapahtuman alkuaika ei ole kelvollinen. Tarkista tiedot ja yritä uudelleen.') return redirect(url_for('handle_list_events')) if add_end_time == False: flash(u'Tapahtuman päättymisaika ei ole kelvollinen. Tarkista tiedot ja yritä uudelleen.') return redirect(url_for('handle_list_events')) # Check that end time is later than start time if add_start_time >= add_end_time: flash(u'Tapahtuma ei voi päättyä ennen alkamistaan. Tarkista tiedot ja yritä uudelleen.') return redirect(url_for('handle_list_events')) # Check if event already exists with short name if event_exists(add_short_name): flash(u'Tapahtuman lyhytnimi on jo käytössä. Valitse toinen nimi tai poista olemassa oleva tapahtuma.') return redirect(url_for('handle_list_events')) # Call Are You A Human scoring service if ayah.score_result(ayah_session_secret) == False: flash(u'Spämmitarkistus epäonnistui. Tarkista että et ole spämmirobotti ja/tai yritä uudelleen.') return redirect(url_for('handle_list_events')) # Request URL and check it's valid add_url = validate_url(add_url) if add_url == False: flash(u'Tapahtuman URL-osoite ei ole kelvollinen. Tarkista tiedot ja yritä uudelleen.') return redirect(url_for('handle_list_events')) # TODO Spämmitarkistuksia lyhyt nimi, nimi ja URL. Domain/sanalistan mukaan pisteytys. # Add event to system if add_event(short_name=add_short_name, name=add_name, url=add_url, start_time=add_start_time, end_time=add_end_time) == False: flash( u'Tapahtuman lisäämisessä tapahtui mystinen virhe. Tarkista tiedot ja yritä uudelleen. Ongelman jatkuessa ota yhteyttä.') return redirect(url_for('handle_list_events')) else: secret_key = get_event_secret_key(add_short_name) flash( u'Tapahtuma lisättiin onnistuneesti. Tapahtuman salainen avain on %s. Pidä se tallessa esimerkiksi poistoa varten.' % secret_key) return redirect(url_for('handle_list_events'))
def post_html(contents, title, permalink, taglist, stream_only, metadata, scrutinize = True, allow_comments = True, Patreon_type = "blog"): head = [] post_content = blog_server_shared.postprocess_post_string(contents, metadata["id"], title, False, scrutinize)[0] before_content_warnings = post_content content_warning_header_regex = re.compile(r"<content_warning_header"+blog_server_shared.grouped_string_regex("content_warning_header_contents")+">", re.DOTALL) post_content = content_warning_header_regex.sub(lambda match: (''' <div class="story_content_warning_header"> <p>This story contains:</p> '''+hidden_cw_box(''' <ul> '''+match.group("content_warning_header_contents")+''' </ul> <p>Notices will also appear in-context in the story, just before the material appears.</p> <p>If you see other material that should be marked (such as common triggers or phobias), '''+exmxaxixl.a('e-mail me')+'''. I am serious about web accessibility, and I will respond to your concerns as soon as I can manage.</p> ''')+''' </div>'''), post_content) content_warning_p_regex = re.compile(r"<content_warning_p"+blog_server_shared.grouped_string_regex("content_warning_p_contents")+">", re.DOTALL) post_content = content_warning_p_regex.sub(lambda match: secondary_hidden_cw_box('This section depicts '+match.group("content_warning_p_contents")+'.'), post_content) if post_content != before_content_warnings: head.append ("<script>window.elidupree.handle_content_warnings('"+ metadata ["id"]+"', false)</script>" ) next_transcript_number = 1 while True: transcript_generator = re.search(r"<transcript"+ blog_server_shared.grouped_string_regex("transcript_text")+">", post_content, re.DOTALL) if transcript_generator is None: break transcript_identifier_string = str(next_transcript_number)+'_'+ metadata ["id"] post_content = post_content [0: transcript_generator.start(0)]+'<div id="transcript_'+ transcript_identifier_string+'" class="transcript_block"><div class="transcript_header">Transcript: <a id="show_transcript_button_'+ transcript_identifier_string+'" href="javascript:;">(show)</a><a id="hide_transcript_button_'+ transcript_identifier_string+'" href="javascript:;">(hide)</a></div><div class="transcript_content id'+ transcript_identifier_string+'">'+ transcript_generator.group("transcript_text")+'</div></div>' + post_content [transcript_generator.end(0):] head.append('''<style> html.transcript_hidden_'''+ transcript_identifier_string +''' div.transcript_content.id'''+ transcript_identifier_string +''' {display: none;} #show_transcript_button_'''+ transcript_identifier_string +''' {display: none;} html.transcript_hidden_'''+ transcript_identifier_string +''' #show_transcript_button_'''+ transcript_identifier_string +''' {display: inline;} html.transcript_hidden_'''+ transcript_identifier_string +''' #hide_transcript_button_'''+ transcript_identifier_string +''' {display: none;} </style> <script> window.elidupree.handle_transcript ("'''+ transcript_identifier_string +'''"); </script>''') next_transcript_number = next_transcript_number + 1 if stream_only == True: cutter = re. compile ( r"<cut>.*?</p>.*$", re.DOTALL) post_content = cutter.sub ('''[...]</p> <a class="continue_reading" href="'''+ permalink +'''">Continue reading<span class="invisible"> '''+ title +'''</span>...</a>''', post_content) #this sometimes cuts off anchors, so make sure fragments point at the canonical URL post_content = re.sub ('href="#','href="' + permalink + '#', post_content) else: post_content = re.sub ("<cut>", "", post_content) calculate_readability = (stream_only != True) if calculate_readability: #using the automated readability index reference = re.sub(r"\s+", " ", html.unescape (utils.strip_tags (post_content))) sentences = len(re.findall (r"\w\w\w.*?[.?!]", reference)) words = utils.word_count (reference) characters = len(re.findall (r"\w", reference)) if words >0 and sentences >0: readability = 4.71*characters/words +0.5 *words/sentences -21.43 post_content = '<em class="debug"> Approximate readability: ' + "{:.2f}".format (readability) + " ("+ str (characters) + " characters, " + str (words) + " words, " + str (sentences) + " sentences, " + "{:.2f}".format (characters/words) + " characters per word, " + "{:.2f}".format (words/sentences) + " words per sentence)</em>" + post_content post_content_sections = post_content.split("<bigbreak>") id_str = '' if title: id_str = 'id="'+utils.format_for_url(title)+'"' post_content_sections[0] = '<h1><a class="post_title_link" href="'''+permalink+'">'+title+'</a></h1>'+post_content_sections[0] for i in range(0, len(post_content_sections)): post_content_sections[i] = '<div class="post_content_section">'+post_content_sections[i]+'</div>' return (''' <div '''+id_str+''' class="blog_post"> '''+(''.join(post_content_sections))+''' </div>'''+metadata_and_comments_section_html(title, permalink, taglist, stream_only, metadata, allow_comments = allow_comments, Patreon_type = Patreon_type), "".join (head))
def short_description(self): if self.summary: return self.summary return strip_tags(self.body)
rs[domain['_id']] = False print "%s no es valido(1)" % domain['_id'] else: test = test['test'] #actualiza el lastSeenDate col_domain.update({"_id": url}, {"$set": {"test.ls": now}}) #Se deja de comprobar si alguna validación ya ha dado negativo if not domain['_id'] in rs or rs[domain['_id']]: #Deben coincidir los metadatos for key, val in test['md'].items(): if key == "description": #La descripcion la deja pasar continue if not key in data[url] or strip_tags( data[url][key].lower()) != strip_tags( val.lower()): if changes_allowed: if not key in data[url]: print "eliminando %s de la url %s" % (key, url) col_domain.update( {"_id": url}, {"$unset": { "test.md." + key: "" }}) else: #save new data print "salvando como nuevo data %s con el valor %s en la url %s" % ( key, strip_tags(data[url][key]), url) col_domain.update({"_id": url}, {
def mapQuestionsToResponses(responseData, questionData, surveyId): print("STARTING QUESTION TO RESPONSE MAPPING PROCESS...\n") # Step 1: Create new array for all cleansed answers cleanResponses = [] # Step 2: Consolidate muti-tier question format in response data for response in responseData: if float(response['values']['progress']) == 100: try: cleanResponse = dict() cleanResponse['responseId'] = response['responseId'] cleanResponse['startDate'] = response['values']['startDate'] cleanResponse['endDate'] = response['values']['endDate'] cleanResponse['recordedDate'] = response['values'][ 'recordedDate'] cleanResponse['locationLongitude'] = response['values'][ 'locationLongitude'] cleanResponse['locationLatitude'] = response['values'][ 'locationLatitude'] cleanResponse['userLanguage'] = response['values'][ 'userLanguage'] cleanResponse['originalResponseString'] = str(response) cleanResponse['surveyId'] = surveyId except: print( "Error: An exception occurred: One of the response keys is missing or mispelled in the base code. The response is the following: \n" ) print(response) else: continue # we need to read all question attributes in labels # segregate answers by question title # specifically get surveyNumber, Submitter Gender, Submitter Age responseLabelData = response['labels'] answers = [] for key, value in responseLabelData.items(): # only focus on the keys that are QID answers if "QID" in key: # assign varying indices needed qid = key questionNumber = qid.split("ID")[1] subQuestionNumber = "none" print("PROCESSING ANSWER: " + qid + "\n") print("QUESTION NUMBER NOW: " + questionNumber + "\n") print("SUB QUESTION NUMBER NOW: " + subQuestionNumber + "\n") # account for varying question types if "_" in qid: subQuestionNumber = questionNumber.split("_")[1] questionNumber = questionNumber.split("_")[0] baseQuestionID = "QID" + questionNumber if "#" in qid: # fix baseQuestionID to proper string baseQuestionID = qid.split("#")[0] # communicate current values print("QUESTION NUMBER NOW: " + questionNumber + "\n") print("SUB QUESTION NUMBER NOW: " + subQuestionNumber + "\n") print("BASE QUESTION ID: " + baseQuestionID + "\n") # create question type index dict questionTypes = { "MC": "Multiple Choice", "TE": "Text Entry", "DB": "Descriptive Text or Graphics", "RO": "Ranked Order", "Slider": "Slider Question", "SBS": "Side by Side", "Matrix": "Matrix Table" } if questionData[baseQuestionID]['QuestionType'] in [ "MC", "TC", "TE", "DB" ]: # dynamically create new answer dict answer = responseLabelData[qid] if "NPS_GROUP" in qid: if responseLabelData[qid] == "Promoter": answer = "9 or 10" elif responseLabelData[qid] == "Passive": answer = "7 or 8" else: answer = "0-6" answers.append({ 'QuestionText': questionData[baseQuestionID]['QuestionText'], "QuestionType": questionTypes[questionData[baseQuestionID] ['QuestionType']], "QuestionAnswer": answer, "QuestionId": qid }) elif questionData[baseQuestionID]['QuestionType'] in [ "RO", "Slider", "Matrix" ]: # dynamically create new answer dict matrixQuestions = questionData[baseQuestionID][ 'QuestionChoices'] if is_int(subQuestionNumber): # find which of the questions is being answered answers.append({ 'QuestionText': questionData[baseQuestionID]['QuestionText'] + " (" + strip_tags( matrixQuestions[subQuestionNumber]['Display']) + ")", "QuestionType": questionTypes[questionData[baseQuestionID] ['QuestionType']], "QuestionAnswer": responseLabelData[qid], "QuestionId": qid }) elif questionData[baseQuestionID]['QuestionType'] == "SBS": # dynamically create new answer dict # EX: subQuestionNumber = 1 # EX: questionNumber = 8#1 # EX: baseQuestionID = QID8#1 # create new variable side to determine question section questionSection = questionNumber.split("#")[1] # collect statements used across question sections matrixQuestions = questionData[baseQuestionID][ 'QuestionChoices'] answers.append({ 'QuestionText': questionData[baseQuestionID]['QuestionText'] + " (" + strip_tags( matrixQuestions[subQuestionNumber]['Display']) + ": Question Section " + questionSection + ")", "QuestionType": "Side by Side", "QuestionAnswer": responseLabelData[qid], "QuestionId": qid }) # set answer to cleanResponse cleanResponse['answers'] = answers # set respondent age, gender, and survey number surveyNumber = "" for answer in cleanResponse['answers']: if "how old are you" in answer["QuestionText"].lower(): try: age = int(answer["QuestionAnswer"]) cleanResponse['workerAge'] = makeAgeBin(age) except ValueError: cleanResponse['workerAge'] = answer["QuestionAnswer"] if "what is your gender" in answer["QuestionText"].lower(): cleanResponse['workerGender'] = answer["QuestionAnswer"] if "survey number" in answer["QuestionText"].lower(): surveyNumber += str(answer["QuestionAnswer"]) print(surveyNumber) if "which factory do you work at" in answer["QuestionText"].lower( ): cleanResponse['workerFactory'] = answer["QuestionAnswer"] cleanResponse['surveyNumber'] = str(surveyNumber) # final cleanse of cleanResponse then append clean response attributes = ('surveyNumber', 'workerFactory', 'workerAge', 'answers', 'responseId', 'startDate', 'endDate', 'recordedDate', 'locationLongitude', 'locationLatitude', 'userLanguage', 'originalResponseString', 'surveyId', 'workerGender') if all(key in cleanResponse for key in attributes): print("CHECKING IF TEST RESPONSE..\n") # 00000, 11111; 12345; 99999; 10000 if (cleanResponse['surveyNumber'] != "00000" or cleanResponse['surveyNumber'] != "11111" or cleanResponse['surveyNumber'] != "12345" or cleanResponse['surveyNumber'] != "99999" or cleanResponse['surveyNumber'] != "10000" or cleanResponse['surveyNumber'] != "1000" or cleanResponse['surveyNumber'] != "5408"): print("NOT TEST RESPONSE. ADDING TO LIST!..\n") cleanResponses.append(cleanResponse) # Step 3: return clean responses print("COMPLETED ANSWER TO QUESTION MATCHING!...\n") return cleanResponses
def receive_tweet(self, tweet, word=None): user = '******' + tweet['user']['screen_name'] text = user + ' ' + utils.strip_tags(tweet['text']) if not self.filters or any( filt in text.lower() for filt in self.filters ): self.ccipca_iter(text, text, extras=word)
def receive_tweet(self, tweet, word=None): user = '******' + tweet['user']['screen_name'] text = user + ' ' + utils.strip_tags(tweet['text']) if not self.filters or any(filt in text.lower() for filt in self.filters): self.ccipca_iter(text, text, extras=word)
def notifcation(self, typ, msg): if self.settings.is_true('notify'): typ = 'dialog-error' if typ == MESSAGE_ERROR else 'dialog-warning' self.main.notifier.add(('Atarashii', strip_tags(msg), typ, 'theme:' + typ))
def title_formatted_title(post_dict): return utils.strip_tags(post_dict["title"])