def test_update_page(mock_model, client, admin): # Given page = Page(name='Page', text='coucou') page.create() update_obj = {"_id": "page", "name": "Super Page", "text": "<b>coucou</b> <script>lol()</script>"} # When res = client.put(url_for('api.update_page', page_id='page'), data=json.dumps(update_obj), content_type='application/json') data = res.json assert res.status_code == 200 assert data['data']['name'] == 'Super Page' assert data['data']['_id'] == 'page' assert data['data']['text'] == '<b>coucou</b> <script>lol()</script>'
def get_pages_from_json(json) -> Dict[str, Page]: categorizer = Categorizer() pages_to_return = {} response_pages = get_hits(json) for page in response_pages: page_info = page.get("_source") url = page_info.get("raw_url") if is_url_valid(url): page_id = page.get("_id") title = page_info.get("title") response_links = page_info.get("links") last_updated = page_info.get("updated_on") links = get_links_from_json(response_links) content = page_info.get("content") category_name = categorizer.categorize(bytes(content, 'utf-8').decode('utf-8', 'ignore')) category = Category( name=category_name, occurrence=1, ) pages_to_return[url] = Page( id=page_id, url=url, title=title, last_updated=last_updated, links=links, content=content, categories=[category] ) else: print("cannot find url for source: {}".format(page_info)) return pages_to_return
def create_pages(self, data: dict, uuid: str): """ Take formatted JSON log data and save it to a log. :param self: Task instance, supplied by Celery. :param data: Formatted data. :param uuid: Log uuid. :return: Log uuid. """ progress = WebSocketProgressRecorder(self) messages = data.pop('messages') batch_list = [] batches = range(0, len(messages), 1000) total = len(batches) for count, batch in enumerate(batches): progress.set_progress(count, total) # [[0, 1, 2...], [1000, 1001, 1002...], [2000, 2001, 2002...]...] batch_list.append(messages[batch:batch + 1000]) # Split messages by the 1000 while not Log.objects.filter(uuid=uuid).exists(): time.sleep(1) log = Log.objects.update_or_create(uuid=uuid, defaults={'users': data.pop('users')})[0] pages = Page.objects.bulk_create([ Page(log=log, messages=batch_list[i], index=i) for i in range(len(batch_list)) ]) log.pages.set(pages) progress.set_progress(total, total) return uuid
def make_custom_page(url: str, links: List[str] = [], category: str = 'Social', content: str = 'Social page content') -> Page: return Page(url=url, categories=[Category(name=category, occurrence=1)], content=content, links=[Link(link=lin, occurrences=1) for lin in links])
def create_page_document(db_session: Session, document_id: uuid.UUID): """ New page with document id """ page = Page(document=document_id) db_session.add(page) db_session.commit() return page.id
def create_page_old(db_session: Session, page_id: uuid.UUID): """ New page with old page data copied """ old_page = (db_session.query(Page).filter(Page.id == page_id).filter( Page.processed_page == None).first()) if not old_page: raise NotFoundException page = Page(rotation=old_page.rotation, document=old_page.document) db_session.add(page) db_session.flush() old_page.processed_page = page.id db_session.commit() return page.id
def guarantee_pages_for_links(pages: Dict[str, Page]) -> Dict[str, Page]: """ :param pages: Original pages from the database with possible links to non-existent pages :type pages: Dict[Page] :return: Pages with links to guaranteed pages :rtype: Dict[Page] """ guaranteed_pages = dict(pages) for page_url, page in pages.items(): links = page.links for link in links: link_url = link.link link_page = guaranteed_pages.get(link_url) if not link_page: link_page = Page(id=link_url, url=link_url) guaranteed_pages[link_url] = link_page return guaranteed_pages
def create_from(cls, procedure_id, display_index): fields_dict = { 'display_index': display_index, 'procedure_id': procedure_id, } return Page(**fields_dict)
def page(): return Page(name='Super Page', text='<p>Youpi !</p>')
def dirty_page(): return Page(name='<script>niark()</script> Super Page', text='<p>Youpi !</p> <abc>super</abc>')
def page_summary(request): success = False user = request.user data = {} errors = {} errors['page_summary'] = [] data['summary'] = { 'summary': '', } if request.GET: url = process_url(request.GET.get('url')) try: p = Page.objects.get(url=url) s, s_created = Summary.objects.get_or_create(page=p) from_zone = tz.tzutc() to_zone = tz.tzlocal() date = s.date.replace(tzinfo=from_zone) local = date.astimezone(to_zone) data['summary'] = { 'summary': s.summary, 'user': s.last_editor.username, 'date': local.strftime('%b %m, %Y, %I:%M %p'), } success = True except: errors['page_summary'].append('Could not get page ' + url) if request.POST: url = process_url(request.POST.get('url')) domain = '{uri.netloc}'.format(uri=urlparse(url)) summary = request.POST.get('summary') try: d, d_created = Domain.objects.get_or_create(url=domain) d.save() if len(Page.objects.filter(url=url)) == 0: p = Page(url=url, domain=d) p.save() else: p = Page.objects.get(url=url) s, s_created = Summary.objects.get_or_create(page=p) prev_summary = s.summary sh = SummaryHistory(user=user, new_summary=summary, previous_summary=prev_summary, summary=s) sh.save() s.summary = summary s.last_editor = user s.date = datetime.now() s.save() from_zone = tz.tzutc() to_zone = tz.tzlocal() local = s.date.replace(tzinfo=from_zone) data['summary'] = { 'summary': summary, 'user': user.username, 'date': local.strftime('%b %m, %Y, %I:%M %p'), } success = True except: errors['page_summary'].append('Could not get page ' + url) return { 'success': success, 'errors': errors, 'data': data, }
def initialize_page(request): tags = {} errors = {} user = request.user count_tags = False highlights = 0 if request.POST: url = process_url(request.POST.get('url')) favIconUrl = request.POST.get('favIconUrl') domain_name = request.POST.get('domain_name') title = request.POST.get('title') add_usertags = request.POST.get('add_usertags') domain = '{uri.netloc}'.format(uri=urlparse(url)) errors['add_page'] = [] title = url if title == "" else title # Add domain d, d_created = Domain.objects.get_or_create(url=domain) if domain_name is not None: d.name = domain_name d.save() # Add page try: p = Page.objects.get(url=url) p.title = title p.save() except: if len(Page.objects.filter(url=url)) == 0: p = Page(url=url, domain=d) p.title = title p.save() count_tags = True else: errors['add_page'].append("More than one page exists") if len(errors['add_page']) == 0: highlights = len(Highlight.objects.filter(page__url=url)) vts = Tag.objects.filter(page__url=url, highlight=None) if len(vts) == 0: count_tags = True for vt in vts: vt_info = { 'user_voted': False, 'name': vt.common_tag.name, 'color': vt.common_tag.color, 'description': vt.common_tag.description, 'is_private': vt.is_private, 'vote_count': len(Vote.objects.filter(tag=vt)), } tags[vt.common_tag.name] = vt_info # Add tag to user if add_usertags == "true": uti, created = UserTagInfo.objects.get_or_create(user=user, page=p, tag=vt) uti.save() if count_tags: errors['get_tc'] = [] try: tc = TagCollection.objects.get(subscribers=user) trie = json.loads(tc.trie_blob) except: errors['get_tc'].append('User not subscribed') if len(errors['get_tc']) == 0: # Count value tags for page r = requests.get(url, verify=False) emotes = countEmote(r.text, trie) ts = [(e, emotes[e]) for e in emotes if e] sorted(ts, key=lambda x: x[1], reverse=True) errors['add_valuetags'] = [] if len(ts) == 0: errors['add_valuetags'].append('No tags counted') count = 3 for tag in ts: if tag[1] > 2 and count > 0: count -= 1 name = tag[0] # Add tag to page try: vt = Tag.objects.get(page__url=url, common_tag__name=name, highlight=None) except Tag.DoesNotExist: try: common_tag = CommonTag.objects.get(name=name) vt = Tag(page=p, common_tag=common_tag, word_count=tag[1]) vt.save() # Add tag to user if add_usertags == "true": uti, created = UserTagInfo.objects.get_or_create(user=user, page=p, tag=vt) uti.save() except CommonTag.DoesNotExist: errors['add_valuetags'].append("Could not get base tag") if len(errors['add_valuetags']) == 0: tags[name] = { 'name': name, 'color': vt.common_tag.color, 'description': vt.common_tag.description, } success = True for error_field in errors: if errors[error_field] != []: success = False return { 'success': success, 'errors': errors, 'tags': tags, 'highlights': highlights, }
def get_linked_groups(pages: Dict[str, Page], parent_group_id: str = None) -> List[Group]: """ :param parent_group_id: id of the group of which pages the subgroups will be created :type parent_group_id: str :param pages: the original pages from db :type pages: Dict[str, Page] :return: a list of groups with links to other groups with all isolates as a separate group :rtype: List[Group] """ mined_data = dict(pages) partition_count_last_run = -1 partition_count = MAX_PARTITION_COUNT + 1 group_id_to_pages = {} number_of_runs = 0 isolates = [] linked_groups = [] original_table_to_alias, original_table_to_original_keys = create_hash_tables( mined_data) while partition_count > MAX_PARTITION_COUNT and partition_count_last_run != partition_count: number_of_runs += 1 partition_count_last_run = partition_count table_to_alias, table_to_original = create_hash_tables(mined_data) page_originals_partition, isolated_nodes = get_groups_without_links_and_isolates( mined_data, table_to_alias, table_to_original) if not isolates and isolated_nodes: isolates = isolated_nodes linked_groups = get_linked_groups_from_ids(mined_data, page_originals_partition, parent_group_id) new_mined_data = {} partition_count = 0 new_group_ids_with_pages = {} for group in linked_groups: partition_count += 1 new_mined_data[group.id] = Page(id=group.id, url=group.id, links=group.links, categories=group.categories) new_group_ids_with_pages[group.id] = [] for subgroup_id in group.members: if subgroup_id in group_id_to_pages: new_group_ids_with_pages[ group.id] += group_id_to_pages[subgroup_id] else: new_group_ids_with_pages[group.id].append( group.members[subgroup_id]) group_id_to_pages = new_group_ids_with_pages mined_data = new_mined_data for group in linked_groups: group.members = {x.url: x for x in group_id_to_pages[group.id]} if parent_group_id is None: linked_groups = insert_isolated_nodes_group( linked_groups, isolates, pages, original_table_to_original_keys) return linked_groups