def testTextNotEmptyStrict(self): """Verify strict text returns content, if text is provided and not null. """ a = AutoTagify() a.text = 'This is a test' a.css = 'taggable' self.assertEqual(a.tag_list(), ['test']) self.assertEqual(a.generate(), 'This is a <a href="/test" class="taggable">test</a> ')
def testTextNotEmptyNotStrict(self): """Verify non-strict text returns content, if text is provided and not null. """ a = AutoTagify() a.text = 'These are my tests' a.css = 'taggable' self.assertEqual(a.tag_list(), ['are', 'test']) self.assertEqual(a.generate(strict=False), 'These <a href="/are" class="taggable">are</a> my <a href="/tests" class="taggable">tests</a> ')
def window(): global lastFilename name,ext=os.path.splitext(request.args.get('type')) filenamenew=name+ext lastFilename=filenamenew status=1 if ext==".pdf": # creating a pdf file object pdfFileObj = open(UPLOAD_FOLDER+"/"+filenamenew, 'rb') # creating a pdf reader object pdfReader = PyPDF2.PdfFileReader(pdfFileObj) # printing number of pages in pdf file print(pdfReader.numPages) #print(pdfReader.getDocumentInfo()) #print(pdfReader.getIsEncrypted()) # creating a page object bundle="" for i in range(1,pdfReader.numPages): pageObj = pdfReader.getPage(i) # extracting text from page #print(pageObj.extractText()) bundle+=pageObj.extractText() #print(bundle) # closing the pdf file object pdfFileObj.close() #Auto tagging t = AutoTagify() t.text = bundle #print(t.tag_list()) e_words = list(dict.fromkeys(t.tag_list())) #print(e_words) else: file = open(UPLOAD_FOLDER+"/"+filenamenew,"r+") #print(type(file.read())) t = AutoTagify() t.text = file.read() #print(len(t.tag_list())) e_words = list(dict.fromkeys(t.tag_list())) #print(e_words) file.close() #Summarization summary=generate_summary(UPLOAD_FOLDER+"/"+filenamenew,5) conn = sqlite3.connect('TAGS.db') #c = conn.cursor() # Insert a row of data conn.execute('''INSERT INTO Tag (Filename,Auto_tag,Manual_tag,Summary,status) VALUES (?,?,?,?,?)''',(filenamenew, str(e_words),str([]),str(summary),status)) # Save (commit) the changes conn.commit() conn.close() return render_template('window.html',F=filenamenew,L=e_words)
# -*- coding: utf-8 -*- import base64 import os import random import time from auto_tagify import AutoTagify from boto.s3.key import Key from PIL import Image from pymongo import DESCENDING from pymongo.objectid import ObjectId import settings CONTENT_TYPE = 'image/jpeg' ATAG = AutoTagify() ATAG.link = "/tag" RECENT_LIMIT = 12 class Snappy(object): """All the snapshot functionality""" def __init__(self): self.token = '' self.env = 'dev' self.db = settings.DATABASE def set_environment(self, env='dev'): if env == 'test': self.env = env self.db = settings.TEST_DATABASE
import urllib import math import re import web from web import session from datetime import datetime from redis import Redis from auto_tagify import AutoTagify tag = AutoTagify() tag.link = '/tags' clean_word = re.compile('[\[\],().:"\'?!*<>/\+={}`~\n\r\t]') clean_quotes = re.compile('(%27)|()') r = Redis() record_max_length = 50 class TapeChatTag(object): def __init__(self): self.all_entries = '' self.next_page = 1 self.prev_page = 0 self.page_value = 0 self.start_pos = 0 self.text_entries = '' def generate(self,user_id): if self.next_page < 0: self.next_page = 0 if self.prev_page < 0: self.prev_page = 0 if self.page_value < 0: self.page_value = 0 try:
import feedparser import re import urllib from redis import Redis from BeautifulSoup import BeautifulSoup from auto_tagify import AutoTagify from tapechat_tag import TapeChatTag from time import time VALID_TAGS = ['p'] tag = AutoTagify() tag.link = '/tags' tapechat_tag = TapeChatTag() r = Redis() text_unique = [] feeds = r.sort("global:feeds",desc=True) circle_sym = re.compile('(•)|(\xe2\x80\xa2)') p_tags = re.compile('(<p>)|(</p>)') for feed_id in feeds: feed = r.get("fid:" + str(feed_id) + ":url") rss = feedparser.parse(feed) for entry in rss.entries: if not r.exists("guid:" + str(entry.guid) + ":fid"): clean_text = BeautifulSoup(entry.summary) for t in clean_text.findAll(True): if t.name not in VALID_TAGS: t.hidden = True tag.text = p_tags.sub(' ',clean_text.renderContents()) sanitized_text = tag.generate() text_id = r.incr("global:nextTextId")
def testTagsNotEmpty(self): """Verify that tags are returned.""" a = AutoTagify() a.text = 'This is a test with other valid tags' test_array = ['test', 'other', 'valid', 'tag'] self.assertEqual(a.tag_list(), test_array)
def testTextEmpty(self): """Verify sending no text returns nothing.""" a = AutoTagify() self.assertEqual(a.generate(), '')