def index(request): blog = common.get_blog() context = RequestContext(request, { 'blog': blog, 'title': common.get_title(blog), 'is_home': True, }) template = loader.get_template('%s/blog_home.html' % (blog.theme,)) return HttpResponse(template.render(context))
def show_entry(request, slug=''): blog = common.get_blog() entry = get_object_or_404(Entry, slug=slug, is_published=True) context = RequestContext(request, { 'blog': blog, 'title': common.get_title(blog, u"Entry list"), 'entry': entry, }) template = loader.get_template('%s/blog_entry_detail.html' % (blog.theme,)) return HttpResponse(template.render(context))
def show_page(request, slug): blog = common.get_blog() page = get_object_or_404(Page, slug=slug) context = RequestContext(request, { 'blog': blog, 'title': common.get_title(blog, page.title), 'page': page, }) template = loader.get_template('%s/blog_page_detail.html' % (blog.theme,)) return HttpResponse(template.render(context))
def worker(work_queue, done_queue): """ Worker si bere data z fronty, dokud tam nějaká jsou a volá na ně procesní funkci """ try: for url in iter(work_queue.get, 'STOP'): url_title = common.get_title(url) done_queue.put("{} got {}.".format(current_process().name, url_title)) except Exception as e: done_queue.put("{} failed on {} with: {}".format( current_process().name, url, e)) return True
def show_tag(request, tag_name): blog = common.get_blog() tag = get_object_or_404(Tag, tag_name=tag_name) page = int(request.GET.get('page', 1)) entries = tag.entry_set.filter(is_published=True) for entry in entries: entry.location = eval(entry.location) if entry.location else None paginator = Paginator(entries, blog.display_count) try: contacts = paginator.page(page) except EmptyPage: contacts = paginator.page(paginator.num_pages) context = RequestContext(request, { 'blog': blog, 'title': common.get_title(blog, u"Tag: {0}".format(tag_name)), 'contacts': contacts, }) template = loader.get_template('%s/blog_entry_list.html' % (blog.theme,)) return HttpResponse(template.render(context))
def show_category(request, slug=''): blog = common.get_blog() category = get_object_or_404(Category, slug=slug) page = int(request.GET.get('page', 1)) entries = category.entry_set.filter(is_published=True) for entry in entries: entry.location = eval(entry.location) if entry.location else None paginator = Paginator(entries, blog.display_count) try: contacts = paginator.page(page) except EmptyPage: contacts = paginator.page(paginator.num_pages) context = RequestContext(request, { 'blog': blog, 'title': common.get_title(blog, category.title), 'category': category, 'contacts': contacts, }) template = loader.get_template('%s/blog_entry_list.html' % (blog.theme,)) return HttpResponse(template.render(context))
def show_archive(request, archive): blog = common.get_blog() year = int(archive[:4]) month = int(archive[-2:]) if (year > 0) and (month > 0) and (month <= 12): page = int(request.GET.get('page', 1)) entries = blog.get_archive_entries(year, month) for entry in entries: entry.location = eval(entry.location) if entry.location else None paginator = Paginator(entries, blog.display_count) try: contacts = paginator.page(page) except EmptyPage: contacts = paginator.page(paginator.num_pages) context = RequestContext(request, { 'blog': blog, 'title': common.get_title(blog, u"{0}年{1}月".format(year, month)), 'contacts': contacts, }) template = loader.get_template('%s/blog_entry_list.html' % (blog.theme,)) return HttpResponse(template.render(context)) else: raise Http404("Archive does not exist")
raw_text = unspace(p.get_text()) pdf = None title = None if has_pdf(p): pdf = has_pdf(p) pdf = urljoin(URL, pdf) #print(pdf) presentation = None if "presentation" in p.text: presentation = has_pdf(p) presentation = urljoin(URL, presentation) try: # Remove the first text before ":" if pdf: title = get_title(p) except: continue authors = p.text.split(":")[0] authors = namify(authors) if len(authors) <= 2 and title == None: continue if title == None: try: title = p.text.split(":")[1] except: pass try: title = unspace(title) title = remove_parenthesised(title)
line = unspace(p.text) sliced_line = line.split(splitter) try: left = splitter.join(sliced_line[:-1]) right = sliced_line[-1] except: print(sliced_line) continue raw_text = p.get_text().replace("\n", " ") pdf = None if has_pdf(p): pdf = has_pdf(p) title = get_title(p) if has_pdf(p1): title2 = get_title(p1) title = title + " " + title2 p2 = page_lines[i + 2] j = 2 p1 = p2 if not pdf: continue while len(p1.text.strip()) <= 3: p1 = page_lines[i + j] j = j + 1 try:
def main_parse(data, state, myname, settings): """ >> Main entry function! << The returned values from this function should be valid irc message, minus the trailing \r\n. NO IT SHOULD NOT """ if type(data) != ircparser.In_Message: return None msg = data.message channel = data.recipient sendernick = data.sender senderident = data.senderident command_prefix = settings['behaviour']['command_prefix'] is_admin = common.is_admin(sendernick, senderident) if not state['markov_sentences'][channel]: try: state['markov_sentences'][channel] = markov.run_cmarkov(myname, settings, 'log/{}.log'.format(channel)) except ValueError as e: common.log(str(e), 'error') if common.is_blacklisted(sendernick, senderident): return None startswith_cp = lambda msg, cmd: re.match(r'[{}]{}(\s|$)'.format(command_prefix, cmd), msg) url_re = re.compile(r'https?://\S+') #(www[.]\S+?[.]\S+) spotify_url_re = re.compile(r'spotify(:\S+)+?') plugins = get_plugins() # .giveop if startswith_cp(msg, 'giveop') and is_admin: return giveop(msg, myname, channel, sendernick) # memery: elif re.match('{}.? '.format(myname), msg): if random.randint(1, 2) == 1: try: sentence = state['markov_sentences'][channel].pop(0) return ircparser.Out_Messages(myname, channel, '{}: {}'.format(sendernick, sentence)) except IndexError: pass # .help elif startswith_cp(msg, 'help'): return ircparser.Out_Messages(myname, channel, get_command_help(msg, sendernick, myname, command_prefix, plugins)) # plugins: elif msg.startswith(command_prefix)\ and msg.split()[0][1:] in plugins\ and msg.split()[0][1:] not in settings['plugins']['blacklist']: return ircparser.Out_Messages(myname, channel, run_plugin(sendernick, msg, msg.split()[0][1:])) # Title elif url_re.search(msg): titles = [] for url in url_re.findall(msg): title = common.get_title(url) if title and title not in titles: titles.append(title) return ircparser.Out_Messages(myname, channel, titles) # spotify title elif spotify_url_re.search(msg): titles = [] for m in spotify_url_re.findall(msg): title = common.get_title('http://open.spotify.com' + m.replace(':', '/')) if title and title not in titles: titles.append(re.sub(r'(.+?) by (.+?) on Spotify', r'Spotify: \1 (\2)', title)) return ircparser.Out_Messages(myname, channel, titles) # Rest of the commands else: output = get_output(msg, myname, sendernick, channel, command_prefix) if output: return ircparser.Out_Messages(myname, channel, output) # markov chain-style talking else: if settings['markov']['frequency'] > 0 and \ random.randint(1, settings['markov']['frequency']) == 1: try: sentence = state['markov_sentences'][channel].pop(0) return ircparser.Out_Messages(myname, channel, sentence) except IndexError: pass
for i in range(len(page_lines)): j = 1 p = page_lines[i] p1 = page_lines[i+1] raw_text = p.get_text().replace("\n", " ") pdf = None if has_pdf(p) : pdf = has_pdf(p) print(pdf) title = p.text if has_pdf(p1) : title2 = get_title(p1) title = title+" "+title2 p2 = page_lines[i+2] j = 2 p1 = p2 if not pdf: continue while len(p1.text.strip()) <= 3: p1 = page_lines[i+j] j = j+1 try: pages = extract_pages(p1)
def worker(url_queue, result_queue): while True: current_url = url_queue.get() result_queue.put(common.get_title(current_url)) url_queue.task_done()