Пример #1
0
    def get(self):
        try:
            objs = Article.get_post_for_homepage()
        except:
            self.redirect('/install')
            return
        if objs:
            fromid = objs[0].id
            endid = objs[-1].id
        else:
            fromid = endid = ''

        allpost = Article.count_all_post()
        allpage = allpost / EACH_PAGE_POST_NUM
        if allpost % EACH_PAGE_POST_NUM:
            allpage += 1

        output = self.render('index.html', {
            'title': "%s - %s" % (SITE_TITLE, SITE_SUB_TITLE),
            'keywords': KEYWORDS,
            'description': SITE_DECR,
            'objs': objs,
            'cats': Category.get_all_cat_name(),
            'tags': Tag.get_hot_tag_name(),
            'page': 1,
            'allpage': allpage,
            'listtype': 'index',
            'fromid': fromid,
            'endid': endid,
            'comments': Comment.get_recent_comments(),
            'links': Link.get_all_links(),
        },
                             layout='_layout.html')
        self.write(output)
        return output
Пример #2
0
 def get(self, direction = 'next', page = '2', base_id = '1'):
     if page == '1':
         self.redirect(BASE_URL)
         return
     objs = Article.get_page_posts(direction, page, base_id)
     if objs:
         if direction == 'prev':
             objs.reverse()            
         fromid = objs[0].id
         endid = objs[-1].id
     else:
         fromid = endid = ''
     
     allpost =  Article.count_all_post()
     allpage = allpost/EACH_PAGE_POST_NUM
     if allpost%EACH_PAGE_POST_NUM:
         allpage += 1
     output = self.render('index.html', {
         'title': "%s - %s | Part %s"%(SITE_TITLE,SITE_SUB_TITLE, page),
         'keywords':KEYWORDS,
         'description':SITE_DECR,
         'objs': objs,
         'cats': Category.get_all_cat_name(),
         'tags': Tag.get_hot_tag_name(),
         'page': int(page),
         'allpage': allpage,
         'listtype': 'index',
         'fromid': fromid,
         'endid': endid,
         'comments': Comment.get_recent_comments(),
         'links':Link.get_all_links(),
     },layout='_layout.html')
     self.write(output)
     return output
Пример #3
0
def run_decode():
    logging.debug('decode your input by our pretrained model')
    try:
        source = request.get_json()['source'] # GET request with String from frontend directly
        logging.debug('input: {}'.format(source)) # GET String-type context from the backend
        try:
            logging.debug('using the pretrained model.')
            sentNums, summary = summarizationModel.decode.run_(source)
        except Exception as e:
            logging.error(e)
        else:
            logging.debug('The number of sentences is {}'.format(sentNums))
            logging.debug('The abstract is that {}'.format(summary))
            results = {'sent_no': sentNums, 'final': summary}
            
        try:
            article = Content(text=source)
            abstract = Summary(text=summary)
            pair = Article(article=article.id, abstract=abstract.id)
            article.save()
            abstract.save()
            pair.save()
        except Exception as e:
            logging.error(e)

        return json.dumps(results)
    except:
        message = {'message' : 'Fail to catch the data from client.'}
        return json.dumps(message)
Пример #4
0
def test_create_article():
    _author_id = random.randint(1, 20)
    
    _new_article = Article()
    _new_article.title = 'Test_Article_%s' % random.randint(100001, 999999)
    _new_article.author_id = _author_id
    _new_article.published_datetime = _new_article.last_modified_datetime = datetime.datetime.now()
    
    _random_seed = str(random.random())
    _new_article.digest = 'digest - %s' % (''.join(random.randint(2, 5)*md5(_random_seed).hexdigest()))
    
    _content = ArticleContent(content='content - %s' % (''.join(random.randint(10, 50)*sha224(_random_seed).hexdigest())))
    _new_article.content = _content
    
    db_session.add(_new_article)  # @UndefinedVariable
    db_session.flush()  # @UndefinedVariable
    
    _catalogs = [
                 random.randint(1, 20),
                 random.randint(1, 20),
                 random.randint(1, 20),
                 ]
    for _cid in _catalogs:
        db_session.execute(association_table_catalog_article.insert().values({  # @UndefinedVariable
                                                                              'catalog_id': _cid,
                                                                              'article_id': _new_article.id,
                                                                              }))
    db_session.commit()  # @UndefinedVariable
def run(input_filename, output_filename):
    articles = defaultdict(set)

    without_identifiers = set()

    reader = csv.reader(open(input_filename, 'r'))

    try:
        biggest = 0

        for i, article in enumerate(reader):
            article = Article(*article)
            identifiers = [(k,v) for k,v in article._asdict().items() if k in IDENTIFIERS and v]
            data = None # dict(identifiers)
            if not identifiers:
                without_identifiers.add(article.id)
                continue
            articles[identifiers[0]].add(article.id)
            for identifier in identifiers[1:]:
                if articles[identifiers[0]] is not articles[identifier]:
                    articles[identifiers[0]] |= articles[identifier]
                    articles[identifier] = articles[identifiers[0]]
                    if len(articles[identifier]) > biggest:
                        biggest = len(articles[identifier])

            if i % 10000 == 0:
                print "%7d" % i, resource.getrusage(resource.RUSAGE_SELF)[2], biggest
                if resource.getrusage(resource.RUSAGE_SELF)[2] > 1e7:
                    print "Using too much memory"
                    raise Exception
    except Exception, e:
        print e
Пример #6
0
 def post(self, secret="", id=""):
     article = (self.get_argument("article", ""),)
     if id and article and secret:
         if secret == getAttr("MOVE_SECRET"):
             Article.set_article(id, encode_special_txt(article[0]))
             return self.write("1")
     return self.write("Fail")
Пример #7
0
 def post(self, secret=''):
     articles = self.get_argument("articles", ''),
     if articles and secret:
         if secret == getAttr('MOVE_SECRET'):
             Article.set_articles(articles[0])
             return self.write('1')
     return self.write('Fail')
Пример #8
0
 def get(self):
     try:
         objs = Article.get_post_for_homepage()
     except:
         self.redirect('/install')
         return
     if objs:
         fromid = objs[0].id
         endid = objs[-1].id
     else:
         fromid = endid = ''
     
     allpost =  Article.count_all_post()
     allpage = allpost/EACH_PAGE_POST_NUM
     if allpost%EACH_PAGE_POST_NUM:
         allpage += 1
     
     output = self.render('index.html', {
         'title': "%s - %s"%(SITE_TITLE,SITE_SUB_TITLE),
         'keywords':KEYWORDS,
         'description':SITE_DECR,
         'objs': objs,
         'cats': Category.get_all_cat_name(),
         'tags': Tag.get_hot_tag_name(),
         'page': 1,
         'allpage': allpage,
         'listtype': 'index',
         'fromid': fromid,
         'endid': endid,
         'comments': Comment.get_recent_comments(),
         'links':Link.get_all_links(),
     },layout='_layout.html')
     self.write(output)
     return output
Пример #9
0
    def get(self, id=''):
        #try:
        if id:
            oldobj = Article.get_article_by_id_edit(id)
            print 'DelPost()', oldobj
            if not oldobj:
                return
            if MYSQL_TO_KVDB_SUPPORT:
                oldobj_category = oldobj['category']
                oldobj_archive = oldobj['archive']
                oldobj_tags = oldobj['tags']
            else:
                oldobj_category = oldobj.category
                oldobj_archive = oldobj.archive
                oldobj_tags = oldobj.tags

            Category.remove_postid_from_cat(oldobj_category, str(id))
            Archive.remove_postid_from_archive(oldobj_archive, str(id))
            Tag.remove_postid_from_tags(set(oldobj_tags.split(',')), str(id))
            Article.del_post_by_id(id)
            increment('Totalblog', NUM_SHARDS, -1)
            cache_key_list = [
                '/',
                'post:%s' % id,
                'cat:%s' % quoted_string(oldobj_category)
            ]
            clear_cache_by_pathlist(cache_key_list)
            clear_cache_by_pathlist(['post:%s' % id])
            self.redirect('%s/admin/edit_post/' % (BASE_URL))
Пример #10
0
 def post(self, secret='', id=''):
     article = self.get_argument("article", ''),
     if id and article and secret:
         if secret == getAttr('MOVE_SECRET'):
             Article.set_article(id, encode_special_txt(article[0]))
             return self.write('1')
     return self.write('Fail')
Пример #11
0
def getArticle():
	article=Article()
	articlelist=Article.query.all()
	articles=[]
	for article in articlelist:
		articles.append(article.to_json())
	return (jsonify(rows=articles))
Пример #12
0
    def get(self, id=""):
        # try:
        if id:
            oldobj = Article.get_article_by_id_edit(id)
            print "DelPost()", oldobj
            if not oldobj:
                return
            if MYSQL_TO_KVDB_SUPPORT:
                oldobj_category = oldobj["category"]
                oldobj_archive = oldobj["archive"]
                oldobj_tags = oldobj["tags"]
            else:
                oldobj_category = oldobj.category
                oldobj_archive = oldobj.archive
                oldobj_tags = oldobj.tags

            Category.remove_postid_from_cat(oldobj_category, str(id))
            Archive.remove_postid_from_archive(oldobj_archive, str(id))
            Tag.remove_postid_from_tags(set(oldobj_tags.split(",")), str(id))
            Article.del_post_by_id(id)
            increment("Totalblog", NUM_SHARDS, -1)
            cache_key_list = ["/", "post:%s" % id, "cat:%s" % quoted_string(oldobj_category)]
            clear_cache_by_pathlist(cache_key_list)
            clear_cache_by_pathlist(["post:%s" % id])
            self.redirect("%s/admin/edit_post/" % (BASE_URL))
Пример #13
0
 def post(self, secret=""):
     articles = (self.get_argument("articles", ""),)
     if articles and secret:
         if secret == getAttr("MOVE_SECRET"):
             Article.set_articles(articles[0])
             return self.write("1")
     return self.write("Fail")
def run(input_filename, output_filename):
    articles = defaultdict(set)

    without_identifiers = set()

    reader = csv.reader(open(input_filename, 'r'))

    try:
        biggest = 0

        for i, article in enumerate(reader):
            article = Article(*article)
            identifiers = [(k, v) for k, v in article._asdict().items()
                           if k in IDENTIFIERS and v]
            data = None  # dict(identifiers)
            if not identifiers:
                without_identifiers.add(article.id)
                continue
            articles[identifiers[0]].add(article.id)
            for identifier in identifiers[1:]:
                if articles[identifiers[0]] is not articles[identifier]:
                    articles[identifiers[0]] |= articles[identifier]
                    articles[identifier] = articles[identifiers[0]]
                    if len(articles[identifier]) > biggest:
                        biggest = len(articles[identifier])

            if i % 10000 == 0:
                print "%7d" % i, resource.getrusage(
                    resource.RUSAGE_SELF)[2], biggest
                if resource.getrusage(resource.RUSAGE_SELF)[2] > 1e7:
                    print "Using too much memory"
                    raise Exception
    except Exception, e:
        print e
Пример #15
0
def get_article_link(begin, publisher, fakeid):
    url = article_list_url(begin, fakeid)
    # 获取json响应
    r = requests.get(url, headers=headers.get_cookie())
    # json转化为字典
    d = json.loads(r.content)
    log('d', type(d), d)
    app_msg_list = d['app_msg_list']
    # log('app-msg-list', app_msg_list)
    articles = []
    # 查询条目是否已经在数据库中,不在则插入,存在则更新
    for item in app_msg_list:
        # 转义不能用在文件名里的非法字符
        item['title'] = validate_title(item['title'])
        # 文件名为时间+标题+html
        time_prefix = formatted_time(item['create_time']).split(' ', 1)[0].replace('/', '')
        item['filename'] = time_prefix + '-' + item['title']
        item['publisher'] = publisher
        a = Article.one(aid=item['aid'])
        if a is None:
            a = Article.new(item)
        else:
            a.update(a.id, **item)
            break

        articles.append(a)
        # 返回article对象的列表
    return articles
Пример #16
0
    def get(self, direction='next', page='2', base_id='1'):
        if page == '1':
            self.redirect(BASE_URL)
            return
        objs = Article.get_page_posts(direction, page, base_id)
        if objs:
            if direction == 'prev':
                objs.reverse()
            if MYSQL_TO_KVDB_SUPPORT:
                fromid = objs[0]['id']
                endid = objs[-1]['id']
            else:
                fromid = objs[0].id
                endid = objs[-1].id
        else:
            fromid = endid = ''

        allpost = Article.count_all_post()
        allpage = allpost / EACH_PAGE_POST_NUM
        if allpost % EACH_PAGE_POST_NUM:
            allpage += 1
        output = self.render('index.html', {
            'title':
            "%s - %s | Part %s" %
            (getAttr('SITE_TITLE'), getAttr('SITE_SUB_TITLE'), page),
            'keywords':
            getAttr('KEYWORDS'),
            'description':
            getAttr('SITE_DECR'),
            'objs':
            objs,
            'cats':
            Category.get_all_cat_name(),
            'tags':
            Tag.get_hot_tag_name(),
            'archives':
            Archive.get_all_archive_name(),
            'page':
            int(page),
            'allpage':
            allpage,
            'listtype':
            'index',
            'fromid':
            fromid,
            'endid':
            endid,
            'comments':
            Comment.get_recent_comments(),
            'links':
            Link.get_all_links(),
            'isauthor':
            self.isAuthor(),
            'Totalblog':
            get_count('Totalblog', NUM_SHARDS, 0),
        },
                             layout='_layout.html')
        self.write(output)
        return output
Пример #17
0
Файл: views.py Проект: zimuxh/pb
def editor(request):
    content = request.POST['content']
    title = request.POST['title']
    article = Article(title=title,content=content,group='1');
    article.save()
    result = "文章保存成功!"
    print result
    return HttpResponse(result, mimetype='application/javascript')
Пример #18
0
 def get(self):
     posts = Article.get_post_for_homepage()
     output = self.render('index.xml', {
                 'posts':posts,
                 'site_updated':Article.get_last_post_add_time(),
             })
     self.set_header('Content-Type','application/atom+xml')
     self.write(output)
Пример #19
0
 def get(self):
     posts = Article.get_post_for_homepage()
     output = self.render('index.xml', {
         'posts': posts,
         'site_updated': Article.get_last_post_add_time(),
     })
     self.set_header('Content-Type', 'application/atom+xml')
     self.write(output)
Пример #20
0
def _create_structure():
    category = Category('test category', 'category test', 'test_category')
    category.meta = {'id': 1, 'webtranslateit_ids': {'content': 1}}
    section = Section(category, 'test section', 'section test', 'test_section')
    section.meta = {'id': 2, 'webtranslateit_ids': {'content': 2}}
    category.sections.append(section)
    article = Article(section, 'test article', 'article body', 'test_article')
    article.meta = {'id': 3, 'webtranslateit_ids': {'content': 3, 'body': 4}}
    section.articles.append(article)
    return category, section, article
    def get(self):
        logging.info("Seeding Datastore...")
        for user in users:
            user_ent = User(name=user[0],
                            age=user[1])
            user_ent.put()

        for article in articles:
            article_ent = Article(title=article[0])
            article_ent.put()
Пример #22
0
 def _createArticle(self, row):
     article = Article()
     article.title = self._getFieldValue(row, 'article', 'title')
     article.pagination = self._getFieldValue(row, 'article', 'pagination')
     for i in ['author', 'author2']:
         author_fullname = self._getFieldValue(row, 'article', i)
         if author_fullname:
             author = self.authorFctry.fromFullName(author_fullname)
             article.authors.append(author)
     article.periodique = self.periodique
     return article
Пример #23
0
def update_article():
  title = request.args.get('Title')
  author = request.args.get('Author')
  email = request.args.get('Email')
  date = request.args.get('Date')
  url = request.args.get('URL')
  content = request.args.get('Content')
  status = request.args.get('Status')
  article = Article()
  article.create(title, author, email, date, url, content, status)
  return redirect('/blog', code=302)
Пример #24
0
def source_fetch(source):
    debug("SF: Doing fetch for source: {0}".format(source.url))
    result = _source_fetch(source)
    debug("SF: Done with source fetch for {0}; result type: {1}".format(source.url, (result.method if result else None)))
    added_any = False
    now = datetime.datetime.now()
    to_put = []
    tasks_to_enqueue = []
    if result:
        if result.feed_title:
            source.title = result.feed_title
        if result.brand:
            source.brand = result.brand
        
        titles = [entry['title'] for entry in result.entries if entry['title']]
        source.shared_title_suffix = shared_suffix(titles)
        
        entries = result.entries[:min(25, len(result.entries))]
        entry_ids = [Article.id_for_article(entry['url'], source.url) for entry in entries]
        print "ENTRY IDs:", entry_ids
        print "ENtry id lens: ", str(map(len, entry_ids))
        article_futures = [Article.get_or_insert_async(id) for id in entry_ids]
        articles = [future.get_result() for future in article_futures]
        print "ARTICLE_OBJECTS:", articles
        
        for i, (entry, article) in enumerate(zip(entries, articles)):
            if not article.url:
                added_any = True
                article.added_date = now
                article.added_order = i
                article.source = source.key
                article.url = canonical_url(entry.get('url'))
                article.submission_url = canonical_url(entry.get('submission_url'))
                if entry['published']:
                    article.published = entry['published']
                else:
                    article.published = datetime.datetime.now()
                if not article.title:
                    article.title = entry['title']
                to_put.append(article)
                delay = (i+1) * 4 # wait 5 seconds between each
                tasks_to_enqueue.append(article.create_fetch_task(delay=delay))
    debug("SF: About to put {0} items".format(len(to_put)))
    if len(to_put):
        ndb.put_multi(to_put)
    debug("SF: About to enqueue")
    if len(tasks_to_enqueue):
        taskqueue.Queue('articles').add_async(tasks_to_enqueue)
    debug("SF: done enqueuing")
    if added_any:
        source.most_recent_article_added_date = now
    source_search.add_source_to_index(source)
    source.last_fetched = now
    source.put()
Пример #25
0
def publish():
    if request.method != 'POST':
        return render_template('publish.html')

    title, content = request.form.get('title', ''), request.form.get('content', '')
    a = Article()
    a.title = title
    a.content = content
    a.uid = session['uid']
    db.session.add(a)  # insert
    db.session.commit()
    return '发布成功'
Пример #26
0
def add_article():
    """Adds new clothing article and redirects to the previous category page."""

    category_id = request.form.get('category')
    description = request.form.get('article-description')
    file = request.files['article-image-upload']
    tag_ids = request.form.getlist('article-tags')
    new_tag_string = request.form.get('new-tags')
    purchase_price = request.form.get('purchase-price')

    category = Category.query.get(category_id)

    if not allowed_file(file.filename):
        flash(f'File extension .{file.filename.rsplit(".", 1)[1]} not allowed')
    if file and allowed_file(file.filename):

        # Sanitizes user input
        filename = secure_filename(file.filename)

        # Cloudinary upload function: 1) folders by user and category name,
        # 2) unique filename is true,
        # 3) use cloudinary's AI to remove background
        # ^ (commented out b/c paid service)
        upload_file = upload(
            file,
            folder=f"user/{session['user_email']}/{category.name}",
            unique_filename=1,
            # background_removal = "cloudinary_ai",
        )

        # For purchase_price, an empty string not ok, but okay to pass None
        new_article = Article(user_id=session['user_id'],
                              category_id=category_id,
                              image=upload_file['secure_url'],
                              description=description,
                              purchase_price=purchase_price or None)

        all_tags = []
        for tag_id in tag_ids:
            all_tags.append(Tag.query.filter_by(tag_id=tag_id).one())

        # Any newly created tags should be added to this as well
        all_tags += Tag.parse_str_to_tag(new_tag_string, session['user_id'])

        # Then create all the tag relationships
        for tag in all_tags:
            new_article.add_tag(tag)

        db.session.add(new_article)
        db.session.commit()
        flash(f"Created new item in {category.name}")

    return redirect(f'/categories/{category_id}')
Пример #27
0
 def get(self, id = ''):
     try:
         if id:
             oldobj = Article.get_article_by_id_edit(id)
             Category.remove_postid_from_cat(oldobj.category, str(id))
             Archive.remove_postid_from_archive(oldobj.archive, str(id))
             Tag.remove_postid_from_tags( set(oldobj.tags.split(','))  , str(id))
             Article.del_post_by_id(id)
             increment('Totalblog',NUM_SHARDS,-1)
             cache_key_list = ['/', 'post:%s'% id, 'cat:%s' % quoted_string(oldobj.category)]
             clear_cache_by_pathlist(cache_key_list)
             clear_cache_by_pathlist(['post:%s'%id])
             self.redirect('%s/admin/edit_post/'% (BASE_URL))
     except:
         pass
Пример #28
0
    def wx_get_latest_articles(self):
        k = 'wx_latest'
        v = getMc(k)
        if v:
            return v

        posts = Article.get_articles_by_latest()
        articles_msg = {'articles':[]}
        for post in posts:
            #slug        = slugfy(post['title'])#yobin 20160718
            slug        = post['title']
            desc        = HTML_REG.sub('',post['content'].decode('utf-8')[:DESCRIPTION_CUT_WORDS].encode('utf-8'))
            shorten_url = '%s/t/%s' % (BASE_URL, post['id'])

            article = {
                       'title': slug,
                       'description':desc,
                       'picUrl':WX_DEFAULT_PIC,
                       'url':shorten_url,
                   }
            # 插入文章
            articles_msg['articles'].append(article)
            article = {}

        setMc(k,articles_msg)
        return articles_msg
Пример #29
0
    def wx_search_article(self, k):
        article = Article.get_article_by_keyword(k)
        if article:
            if MYSQL_TO_KVDB_SUPPORT:
                title = article['slug']
                description = article['description']
                url = article['absolute_url']
            else:
                title = article.slug
                description = article.description
                url = article.absolute_url

            picUrl = WX_DEFAULT_PIC
            count = 1
            articles_msg = {'articles': []}
            for i in range(0, count):
                article = {
                    'title': title,
                    'description': description,
                    'picUrl': picUrl,
                    'url': url
                }
                articles_msg['articles'].append(article)
                article = {}
            return articles_msg
        return ''
Пример #30
0
    def wx_get_latest_articles(self):
        k = 'wx_latest'
        v = getMc(k)
        if v:
            return v

        posts = Article.get_articles_by_latest()
        articles_msg = {'articles': []}
        for post in posts:
            #slug        = slugfy(post['title'])#yobin 20160718
            slug = post['title']
            desc = HTML_REG.sub(
                '', post['content'].decode('utf-8')
                [:DESCRIPTION_CUT_WORDS].encode('utf-8'))
            shorten_url = '%s/t/%s' % (BASE_URL, post['id'])

            article = {
                'title': slug,
                'description': desc,
                'picUrl': WX_DEFAULT_PIC,
                'url': shorten_url,
            }
            # 插入文章
            articles_msg['articles'].append(article)
            article = {}

        setMc(k, articles_msg)
        return articles_msg
Пример #31
0
def get_articles(userid):
    user = User.objects(id=userid).first()
    page = int(request.args.get('page'))
    per_page = int(request.args.get('per_page'))
    kws = {'user': user}
    if request.args.get("status") != None:
        kws['status'] = int(request.args.get("status"))
    if request.args.get("channel_id") != None:
        channel = Channel.objects(id=request.args.get("channel_id")).first()
        kws['channel'] = channel
    if request.args.get("begin_pubdate") != None:
        kws['created__gte'] = request.args.get("begin_pubdate")
    if request.args.get("end_pubdate") != None:
        kws['created__lte'] = request.args.get("end_pubdate")
    articles = Article.objects(**kws)
    paginated_articles = articles.skip((page - 1) * per_page).limit(per_page)
    return jsonify({
        "message": 'OK',
        "data": {
            "total_count": articles.count(),
            "page": page,
            "per_page": per_page,
            "results": articles.to_public_json()
        }
    })
Пример #32
0
    def wx_get_article_by_id(self, post_id):
        k = 'wx_post_%s' % (str(post_id))
        v = getMc(k)
        if v:
            return v

        article = Article.get_article_by_id_detail(post_id)
        if article:
            if MYSQL_TO_KVDB_SUPPORT:
                title = article['slug']
                description = article['description']
                url = article['absolute_url']
            else:
                title = article.slug
                description = article.description
                url = article.absolute_url
            picUrl = WX_DEFAULT_PIC
            count = 1

            articles_msg = {'articles': []}
            for i in range(0, count):
                article = {
                    'title': title,
                    'description': description,
                    'picUrl': picUrl,
                    'url': url
                }
                articles_msg['articles'].append(article)
                article = {}
            setMc(k, articles_msg)
            return articles_msg
        return ''
Пример #33
0
    def _get_full_article(self, short_article):
        url = short_article.url
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        try:
            text = self.get_formatted_article(
                text=soup.find('div', class_='itemFullText'),
                lead=soup.find('h2', class_='itemSubtitle'))
            author = soup.find('span', class_='itemAuthor')
            if author is None:
                author = ""
            else:
                author = author.text.split('Piše:')[-1].strip()

            facebook_id = soup.find('meta', {'property': 'fb:app_id'})
            if facebook_id is not None:
                facebook_id = facebook_id['content']
                domain = self._generic_url.split('https://')[1].split('/')[0]
                comments = self._get_facebook_comments(url=url,
                                                       facebook_id=facebook_id,
                                                       domain=domain)
            else:
                comments = []

            if len(comments) > 0:
                logging.info('Total comments: %d' % len(comments))
            full_article = Article(short_article, text, author, comments)
            return full_article
        except AttributeError:
            logging.error("Invalid URL: %s" % url)
        return None
Пример #34
0
def watcher():
    try:
        # Check for email every 10 seconds. 
        print(">> Listening for new emails")
        messages = inbox.Items
        message = messages.GetLast()
        if "[article]" in str.lower(message.subject):
            # Article is found

            # Check if the article is pre-existing 

            db_search = session.query(Article).filter(
                Article.name == str(message.Sender),
                Article.urlslug == generate_slug(message.subject)
            ).first()
            if db_search is not None:
                print("   - Found existing article")
            else:
                sub = str.lower(message.subject)
                new_article = Article(
                    title= sub.split("[article]")[1].strip(),
                    name = str(message.Sender),   
                    body = message.HTMLBody,
                    alias = get_alias(message.Sender.GetExchangeUser().PrimarySmtpAddress),
                    time = dateutil.parser.parse(str(message.SentOn)),
                    urlslug = generate_slug(message.subject)
                    )
                session.add(new_article)
                session.commit()
            
        time.sleep(10)
    except Exception as e:
        print(e)
        time.sleep(10)
Пример #35
0
    def wx_search_article(self, k):
        article = Article.get_article_by_keyword(k)
        if article:
            if MYSQL_TO_KVDB_SUPPORT:
                title = article['slug']
                description = article['description']
                url = article['absolute_url']
            else:
                title = article.slug
                description = article.description
                url = article.absolute_url

            picUrl = WX_DEFAULT_PIC
            count = 1
            articles_msg = {'articles':[]}
            for i in range(0,count):
                article = {
                        'title':title,
                        'description':description,
                        'picUrl':picUrl,
                        'url':url
                    }
                articles_msg['articles'].append(article)
                article = {}
            return articles_msg
        return ''
Пример #36
0
	def get_response_article(self, keyword):
		global PIC_URL
		keyword = str(keyword)
		# 从数据库查询得到若干文章
		article = Article.get_article_by_keyword(keyword)
		# 这里先用测试数据
		if article:
			title = article.slug
			description = article.description
			picUrl = PIC_URL
			url = article.absolute_url
			count = 1
			# 也有可能是若干篇
			# 这里实现相关逻辑,从数据库中获取内容
			
			# 构造图文消息
			articles_msg = {'articles':[]}
			for i in range(0,count):
				article = {
						'title':title,
						'description':description,
						'picUrl':picUrl,
						'url':url
					}
				# 插入文章
				articles_msg['articles'].append(article)
				article = {}
			# 返回文章
			return articles_msg
		else:
			return
Пример #37
0
	def get_response_article_by_id(self, post_id):
		global PIC_URL
		# 从数据库查询得到若干文章
		article = Article.get_article_by_id_detail(post_id)
		# postId为文章id
		if article:
			title = article.slug
			description = article.description
			picUrl = PIC_URL
			url = article.absolute_url
			count = 1
			# 这里实现相关逻辑,从数据库中获取内容
			
			# 构造图文消息
			articles_msg = {'articles':[]}
			for i in range(0,count):
				article = {
						'title':title,
						'description':description,
						'picUrl':picUrl,
						'url':url
					}
				# 插入文章
				articles_msg['articles'].append(article)
				article = {}
			# 返回文章
			return articles_msg
		else:
			return
Пример #38
0
 def edit_view(self):
     article_id = request.args.get('id')
     article = Article.objects(id=article_id).first()
     classifications = Classification.objects.order_by('+name')
     return self.render('write.html',
                        classifications=classifications,
                        article=article)
Пример #39
0
    def _get_full_article(self, short_article):
        url = short_article.url
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        try:
            text = self.get_formatted_article(text=soup.find('div', class_='article-content mt3 mb3'),
                                              lead=soup.find('div', class_='h4 mt0 mb2 regular roboto-slab'))
            author_tag = soup.find('meta', attrs={'name': 'author'})
            author = author_tag['content'] if 'content' in author_tag.attrs else ""

            foreign_id_tag = soup.find('form', class_='clearfix mxn1 comment-form')
            if foreign_id_tag is not None:
                foreign_id = soup.find('form', class_='clearfix mxn1 comment-form')['data-foreign-key']
                comments = self._get_comments(foreign_id)
            else:
                comments = []
                logging.warning("Foreign ID is None.")
            total_comments = int(soup.find('a', class_='px1 light-blue').text)
            if total_comments != len(comments) and len(comments) > 0:
                logging.warning("Scraped wrong number of comments")
            full_article = Article(short_article, text, author, comments)
            return full_article
        except AttributeError:
            with open(r'log/politika_errors.txt', 'a') as f:
                f.write("%s\n" % url)
            logging.error("Invalid URL: %s" % url)
        return None
Пример #40
0
    def wx_get_article_by_id(self, post_id):
        k = 'wx_post_%s' % (str(post_id))
        v = getMc(k)
        if v:
            return v

        article = Article.get_article_by_id_detail(post_id)
        if article:
            if MYSQL_TO_KVDB_SUPPORT:
                title = article['slug']
                description = article['description']
                url = article['absolute_url']
            else:
                title = article.slug
                description = article.description
                url = article.absolute_url
            picUrl = WX_DEFAULT_PIC
            count = 1

            articles_msg = {'articles':[]}
            for i in range(0,count):
                article = {
                        'title':title,
                        'description':description,
                        'picUrl':picUrl,
                        'url':url
                    }
                articles_msg['articles'].append(article)
                article = {}
            setMc(k,articles_msg)
            return articles_msg
        return ''
Пример #41
0
    def _get_full_article(self, short_article):
        url = short_article.url
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        if "article-lock" in response.content.decode('utf-8'):
            logging.error("Pay-wall: %s" % url)
            return None

        try:
            article_text = soup.find('div', class_='article-body article-wrap')
            if article_text.find('article') is not None:
                article_text = article_text.find('article')
            text = self.get_formatted_article(text=article_text,
                                              lead=soup.find('p',
                                                             class_='lead'))
            author = soup.find('div', class_='article-source')
            if author is None:
                author = ""
            else:
                author = author.text

            comments = self._get_comments()
            full_article = Article(short_article, text, author, comments)
            return full_article
        except AttributeError:
            logging.error("Invalid URL: %s" % url)
        return None
Пример #42
0
    def _get_full_article(self, short_article: ShortArticle):
        url = short_article.url
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        try:
            text = self.get_formatted_article(text=soup.find(
                'div', class_='article-text article-video-scroll clearfix'),
                                              lead=soup.find(
                                                  'p',
                                                  {'itemprop': 'description'}))
            author = soup.find('span', class_='inline-flex items-center')
            if author is None:
                author = ""
            else:
                author = author.text.strip()

            facebook_id = soup.find('meta',
                                    {'property': 'fb:app_id'})['content']
            domain = self._generic_url.split('https://')[1].split('/')[0]

            comments = self._get_facebook_comments(url=url,
                                                   facebook_id=facebook_id,
                                                   domain=domain)

            full_article = Article(short_article, text, author, comments)
            return full_article
        except AttributeError:
            logging.error("Invalid URL: %s" % url)
        return None
Пример #43
0
 def get(self, id = ''):
     obj = Article.get_article_by_id_simple(id)
     if obj:
         self.redirect('%s/topic/%d/%s'% (BASE_URL, obj.id, obj.title), 301)
         return
     else:
         self.redirect(BASE_URL)
Пример #44
0
def put_article():
    '''
    Add new article for a user.
    '''
    username = request.headers.get('x-koala-username')
    apikey = request.headers.get('x-koala-key')
    user = locate_user(username, apikey)

    reqjson = request.get_json()

    result = validators.url(reqjson['url'])
    if not result:
        # try again but with http://
        result = validators.url('http://' + reqjson['url'])
        if not result:
            logging.info("Bad URL: %s" % reqjson['url'])
            abort(400)
        else:
            reqjson['url'] = 'http://' + reqjson['url']

    title = reqjson.get('title', reqjson['url'])
    url = reqjson['url']
    date = str(datetime.now())
    read = False
    favorite = False
    owner = user.id

    article = Article.create(title=title, url=url, date=date, read=read, favorite=favorite, owner=owner)

    return jsonify({'id': article.id}), 201
Пример #45
0
    def post(self):
        class_id = self.get_body_argument('class_id', None)
        title = self.get_body_argument('title', None)
        image_url = self.get_body_argument('image_url', None)
        note = self.get_body_argument('note', None)
        content = self.get_body_argument('content', None)

        now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        data = {
            'class_id': class_id,
            'title': title,
            'image_url': image_url,
            'note': note,
            'content': content,
            'author': 'LiJiaF',
            'create_date': now,
            'write_date': now
        }

        log.info('添加文章:' + json.dumps(data))

        try:
            session = DBSession()
            new_article = Article(**data)
            session.add(new_article)
            session.commit()
            session.close()
        except Exception as e:
            log.error(e)
            return self.finish(json.dumps({'code': -1, 'msg': '添加失败'}))

        return self.finish(json.dumps({'code': 0, 'msg': '添加成功'}))
Пример #46
0
def combine_articles(group_id, articles):
    if len(articles) == 1:
        return articles[0]

    keywords = set()
    for article in articles:
        keywords |= set(kw.strip() for kw in article.keywords.split(','))
    new_article['keywords'] = ','.join(keywords)

    titles = defaultdict(int)
    for article in articles:
        titles[article.title] += 1

    _, title = max(
        (count * len(title)**2, title) for title, count in titles.items())
    new_article['title'] = title

    for name in 'pmc pmid doi'.split():
        identifiers = defaultdict(int)
        for article in articles:
            if getattr(article, name):
                identifiers[getattr(article, name)] += 1
        if identifiers:
            _, identifier = max(map(swap, identifiers.items()))
            new_article[name] = identifier

    return Article(**new_article)
Пример #47
0
def load_articles():
    """Load articles from seed-article.txt into database."""

    print("Articles")

    # Delete all rows in table, so if we need to run this a second time,
    # we won't be trying to add duplicate users
    Article.query.delete()

    # Read seed category file and insert data
    for row in open("seed/seed-article-2.txt"):
        row = row.rstrip()
        # Works for original seed data
        # user_id, category_id, description = row.split("|")

        # These are metadata lines in the file
        if not row.startswith('--'):
            article_id, description, image, purchase_price, times_worn, sell_price, user_id, category_id = row.split(
                "|")

            # Prevent passing an empty string into field expecting float
            if not purchase_price:
                purchase_price = None

            article = Article(
                article_id=int(article_id),
                description=description,
                image=image,
                purchase_price=purchase_price,
                times_worn=times_worn,
                user_id=int(user_id),
                category_id=int(category_id),
            )
            db.session.add(article)
    db.session.commit()
Пример #48
0
    def get_response_article(self, keyword):
        global PIC_URL
        keyword = str(keyword)
        # 从数据库查询得到若干文章
        article = Article.get_article_by_keyword(keyword)
        # 这里先用测试数据
        if article:
            title = article.slug
            description = article.description
            picUrl = PIC_URL
            url = article.absolute_url
            count = 1
            # 也有可能是若干篇
            # 这里实现相关逻辑,从数据库中获取内容

            # 构造图文消息
            articles_msg = {'articles': []}
            for i in range(0, count):
                article = {
                    'title': title,
                    'description': description,
                    'picUrl': picUrl,
                    'url': url
                }
                # 插入文章
                articles_msg['articles'].append(article)
                article = {}
            # 返回文章
            return articles_msg
        else:
            return
Пример #49
0
    def get_response_article_by_id(self, post_id):
        global PIC_URL
        # 从数据库查询得到若干文章
        article = Article.get_article_by_id_detail(post_id)
        # postId为文章id
        if article:
            title = article.slug
            description = article.description
            picUrl = PIC_URL
            url = article.absolute_url
            count = 1
            # 这里实现相关逻辑,从数据库中获取内容

            # 构造图文消息
            articles_msg = {'articles': []}
            for i in range(0, count):
                article = {
                    'title': title,
                    'description': description,
                    'picUrl': picUrl,
                    'url': url
                }
                # 插入文章
                articles_msg['articles'].append(article)
                article = {}
            # 返回文章
            return articles_msg
        else:
            return
Пример #50
0
    def _get_full_article(self, short_article):
        url = short_article.url
        response = requests.get(url)

        while response.status_code == 429:
            time.sleep(5)
            print('Retry')
            response = requests.get(url)

        soup = BeautifulSoup(response.content, 'html.parser')
        try:
            text = self.get_formatted_article(
                text=soup.find('div', class_='itemFullText'),
                lead=soup.find('h2', class_='itemSubTitle'))
            author = soup.find('div', class_='col-authorname')
            if author is None:
                author = ""
            else:
                author = author.text
            facebook_id = soup.find('meta',
                                    {'property': 'fb:app_id'})['content']
            domain = self._generic_url.split('https://')[1].split('/')[0]

            comments = self._get_facebook_comments(url=url,
                                                   facebook_id=facebook_id,
                                                   domain=domain)
            if len(comments) > 0:
                logging.info('Total comments: %d' % len(comments))
            full_article = Article(short_article, text, author, comments)
            return full_article
        except AttributeError:
            logging.error("Invalid URL: %s" % url)
        return None
Пример #51
0
 def get(self, id):
     article = Article.get(self.db, id)
     if article is None:
         error = '404: Page Not Found'
         self.render('error.html', error=error, home_title=options.home_title)
     else:
         labels = ' '.join(map(lambda item: '[' + item['detail'] + ']', article['labels']))
         self.render('editArticle.html', article=article, labels=labels)
Пример #52
0
 def get(self, pageId):
     p = Paginator(Article.all(self.db), 5)
     page = p.page(int(pageId))
     isAdmin = self.isAdmin()
     label_list = Label.group(self.db)
     self.render('index.html', articles=page.object_list, label_list=label_list,
             isAdmin=isAdmin, page=page, home_title=options.home_title,
             user=options.user, photo=options.photo)
Пример #53
0
def get_articles(count=10, force=False):
    max_time = datetime.datetime.now() if force else datetime.datetime.now() - datetime.timedelta(hours=1)
    q = Article.query(Article.ml_service_time <= max_time, Article.ml_service_time != None).order(-Article.ml_service_time)
    articles = q.fetch(count)
    for a in articles:
        a.ml_service_time = datetime.datetime.now()
    ndb.put_multi(articles)
    return articles
Пример #54
0
    def post(self):
        self.set_header('Content-Type','application/json')
        rspd = {'status': 201, 'msg':'ok'}

        try:
            tf = {'true':1,'false':0}
            timestamp = int(time())
            post_dic = {
                'category': self.get_argument("cat"),
                'title': self.get_argument("tit"),
                'content': self.get_argument("con"),
                'tags': self.get_argument("tag",'').replace(u',',','),
                'closecomment': self.get_argument("clo",'0'),
                'password': self.get_argument("password",''),
                'add_time': timestamp,
                'edit_time': timestamp,
                'archive': genArchive(),
            }
            if post_dic['tags']:
                tagslist = set([x.strip() for x in post_dic['tags'].split(',')])
                try:
                    tagslist.remove('')
                except:
                    pass
                if tagslist:
                    post_dic['tags'] = ','.join(tagslist)
            post_dic['closecomment'] = tf[post_dic['closecomment'].lower()]
        except:
            rspd['status'] = 500
            rspd['msg'] = '错误: 注意必填的三项'
            self.write(json.dumps(rspd))
            return

        postid = Article.add_new_article(post_dic)
        if postid:
            keyname = 'pv_%s' % (str(postid))
            set_count(keyname,0,0)
            
            Category.add_postid_to_cat(post_dic['category'], str(postid))
            Archive.add_postid_to_archive(genArchive(), str(postid))
            increment('Totalblog')
            if post_dic['tags']:
                Tag.add_postid_to_tags(post_dic['tags'].split(','), str(postid))

            rspd['status'] = 200
            rspd['msg'] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str(postid)
            clear_cache_by_pathlist(['/', 'cat:%s' % quoted_string(post_dic['category'])])

            if not debug:
                add_task('default', '/task/pingrpctask')

            self.write(json.dumps(rspd))
            return
        else:
            rspd['status'] = 500
            rspd['msg'] = '错误: 未知错误,请尝试重新提交'
            self.write(json.dumps(rspd))
            return
Пример #55
0
 def get(self, id=""):
     obj = None
     if id:
         obj = Article.get_article_by_id_edit(id)
     self.echo(
         "admin_editpost.html",
         {"title": "编辑文章", "cats": Category.get_all_cat_name(), "tags": Tag.get_all_tag_name(), "obj": obj},
         layout="_layout_admin.html",
     )
Пример #56
0
    def post(self, id):
        title = self.get_argument('title')
        content_md = self.get_argument('content')
        pattern = r'\[[^\[\]]+\]'
        labels = re.findall(pattern, self.get_argument('labels'))
        content_html = markdown.markdown(content_md, ['codehilite'])

        try:
            Article.update(self.db, id, title, content_md, content_html)
            Label.deleteAll(self.db, id)
            for label in labels:
                detail = label[1:-1].strip()
                Label.create(self.db, id, detail)

            self.redirect('/article/' + id, permanent=True)
        except:
            error = "The post data invalid"
            self.render('error.html', error=error, home_title=options.home_title)
Пример #57
0
    def get(self):
        try:
            objs = Article.get_post_for_homepage()
        except:
            self.redirect('/install')
            return
        if objs:
            if MYSQL_TO_KVDB_SUPPORT:
                fromid = objs[0]['id']
                endid = objs[-1]['id']
                #totalblog = Article.get_totalnum_arti()
                totalblog = get_count('Totalblog',NUM_SHARDS,0)
            else:
                fromid = objs[0].id
                endid = objs[-1].id
                totalblog = get_count('Totalblog',NUM_SHARDS,0)
        else:
            fromid = endid = ''

        allpost =  Article.count_all_post()
        allpage = allpost/EACH_PAGE_POST_NUM
        if allpost%EACH_PAGE_POST_NUM:
            allpage += 1

        output = self.render('index.html', {
            'title': "%s - %s"%(getAttr('SITE_TITLE'),getAttr('SITE_SUB_TITLE')),
            'keywords':getAttr('KEYWORDS'),
            'description':getAttr('SITE_DECR'),
            'objs': objs,
            'cats': Category.get_all_cat_name(),
            'tags': Tag.get_hot_tag_name(),
            'archives': Archive.get_all_archive_name(),
            'page': 1,
            'allpage': allpage,
            'listtype': 'index',
            'fromid': fromid,
            'endid': endid,
            'comments': Comment.get_recent_comments(),
            'links':Link.get_all_links(),
            'isauthor':self.isAuthor(),
            'Totalblog':totalblog,
        },layout='_layout.html')
        self.write(output)
        return output
Пример #58
0
    def get(self, id = '', title = ''):
        tmpl = ''
        obj = Article.get_article_by_id_detail(id)
        if not obj:
            self.redirect(BASE_URL)
            return
        #redirect to right title
        try:
            title = unquote(title).decode('utf-8')
        except:
            pass
        if title != obj.slug:
            self.redirect(obj.absolute_url, 301)
            return
        #
        if obj.password and THEME == 'default':
            rp = self.get_cookie("rp%s" % id, '')
            if rp != obj.password:
                tmpl = '_pw'
        elif obj.password and BLOG_PSW_SUPPORT:
            rp = self.get_cookie("rp%s" % id, '')
            print 'rp===%s' % (str(rp))
            if rp != obj.password:
                tmpl = '_pw'

        keyname = 'pv_%s' % (str(id))
        increment(keyname)#yobin 20120701
        self.set_cookie(keyname, '1', path = "/", expires_days =1)
        self.set_header("Last-Modified", obj.last_modified)
        output = self.render('page%s.html'%tmpl, {
            'title': "%s - %s"%(obj.title, getAttr('SITE_TITLE')),
            'keywords':obj.keywords,
            'description':obj.description,
            'obj': obj,
            'cobjs': obj.coms,
            'postdetail': 'postdetail',
            'cats': Category.get_all_cat_name(),
            'tags': Tag.get_hot_tag_name(),
            'archives': Archive.get_all_archive_name(),
            'page': 1,
            'allpage': 10,
            'comments': Comment.get_recent_comments(),
            'links':Link.get_all_links(),
            'isauthor':self.isAuthor(),
            'hits':get_count(keyname),
            'Totalblog':get_count('Totalblog',NUM_SHARDS,0),
            'listtype': '',
        },layout='_layout.html')
        self.write(output)

        if obj.password and BLOG_PSW_SUPPORT:
            return output
        elif obj.password and THEME == 'default':
            return
        else:
            return output
Пример #59
0
 def get(self, id = ''):
     obj = Article.get_article_by_id_simple(id)
     if obj:
         if MYSQL_TO_KVDB_SUPPORT:
             self.redirect('%s/topic/%s/%s'% (BASE_URL, obj['id'], obj['title']), 301)
         else:
             self.redirect('%s/topic/%d/%s'% (BASE_URL, obj.id, obj.title), 301)
         return
     else:
         self.redirect(BASE_URL)