示例#1
0
    def setup_test(self):
        '''
        Make ourselves a way to quickly setup articles storage.
        '''

        self.article_0 = Article(0, "Some title 0", "2016-09-22",
                                 "Some body text 0",
                                 ['tag0', 'tag1', 'tag2', 'tag3'])

        self.article_1 = Article(1, "Some title 1", "2016-09-22",
                                 "Some body text 1",
                                 ['tag0', 'tag4', 'tag5', 'tag6'])

        self.article_2 = Article(2, "Some title 2", "2016-09-23",
                                 "Some body text 2",
                                 ['tag0', 'tag1', 'tag2', 'tag3'])

        self.article_3 = Article(3, "Some title 3", "2016-09-23",
                                 "Some body text 3",
                                 ['tag0', 'tag1', 'tag2', 'tag3'])

        self.article_4 = Article(4, "Some title 4", "2016-09-23",
                                 "Some body text 4",
                                 ['tag0', 'tag1', 'tag2', 'tag3'])

        self.articles = Articles()
示例#2
0
文件: views.py 项目: retorick/cimensa
def article(request, article_id):
    myArticles = Articles()
    data = {
        'page_title': 'Member Articles',
        'article': myArticles.get_by_id(article_id),
    }
    c = RequestContext(request, data)
    return render_to_response('articles/article.html', c)
示例#3
0
    def run(self):

        # 翻页地址
        page_url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz={}&f=json&offset={}&count=10&is_ok=1&scene=&uin=777&key=777&pass_ticket={}&wxtoken=&appmsg_token=" + self.appmsg_token + "&x5=0f=json"
        # 将 cookie 字典化
        wx_dict = utils.str_to_dict(self.cookie,
                                    join_symbol='; ',
                                    split_symbol='=')
        # 请求地址
        response = requests.get(page_url.format(self.biz,
                                                self.begin_page_index * 10,
                                                wx_dict['pass_ticket']),
                                headers=self.headers,
                                verify=False)
        # 将文章列表字典化
        articles = self.article_list(response.text)
        info = Articles(self.appmsg_token, self.cookie)

        result = []
        for a in articles['list']:
            if 'app_msg_ext_info' in a.keys(
            ) and '' != a.get('app_msg_ext_info').get('content_url', ''):

                read_num, old_like_num, like_num = info.read_like_nums(
                    a.get('app_msg_ext_info').get('content_url'))
                result.append(
                    str(self.num) + '条,' +
                    a.get('app_msg_ext_info').get('title') + ',' +
                    str(read_num) + ',' + str(old_like_num) + ',' +
                    str(like_num))
                time.sleep(2)

            if 'app_msg_ext_info' in a.keys():
                for m in a.get('app_msg_ext_info').get(
                        'multi_app_msg_item_list', []):
                    read_num, old_like_num, like_num = info.read_like_nums(
                        m.get('content_url'))
                    result.append(
                        str(self.num) + '条的副条,' + m.get('title') + ',' +
                        str(read_num) + ',' + str(old_like_num) + ',' +
                        str(like_num))

                    time.sleep(3)

            self.num = self.num + 1

        self.write_file(result)

        self.is_exit_or_continue()
        # 递归调用
        self.run()
示例#4
0
class MyResource(Resource):
    '''
    This is a base class that defines:
     - Main storage service articles which is a simple Python dictionary.
     - All nodes in incoming JSON structure that we are interested in.
     - Helper functions that can be used by all childs of this class.
    '''

    storage = Articles()

    parser = reqparse.RequestParser()
    parser.add_argument('id', type=int)
    parser.add_argument('title', type=str)
    parser.add_argument('date', type=str)
    parser.add_argument('body', type=str)
    parser.add_argument('tags', type=str, action='append')

    def abort_if_article_doesnt_exist(self, id):
        '''Returns HTTP 404 error in case if a given article id key does not exist in articles dict.
        '''
        if not self.storage.article_exists(id):
            abort(404, message=f'Article {id} does not exist')

    def abort_if_article_aleady_exist(self, id):
        '''Returns HTTP 404 error in case if a given article id key already exists in articles dict.
        '''
        if self.storage.article_exists(id):
            abort(404, message=f'Article {id} already exists')
示例#5
0
    def __init__(self, config):
        """
      Prepare the entire system's objects

      config - the configuration object from the click library
    """
        yaml_config = yaml.load(config.obj["config"])
        config.obj["config"].close()
        self.__cache = Cache(db_file=config.obj["database"])
        self.__articles = Articles(key=yaml_config["api_key"], cache=self.__cache)
        self.__analyzer = Analyzer()
示例#6
0
class Manager:
    """
    Actual object to manage everything
  """

    # reference to the entire data storage
    __cache = None

    # reference to object managing articles access and storage
    __articles = None

    # reference to the object doing the analysis
    __analyzer = None

    def __init__(self, config):
        """
      Prepare the entire system's objects

      config - the configuration object from the click library
    """
        yaml_config = yaml.load(config.obj["config"])
        config.obj["config"].close()
        self.__cache = Cache(db_file=config.obj["database"])
        self.__articles = Articles(key=yaml_config["api_key"], cache=self.__cache)
        self.__analyzer = Analyzer()

    def perform_search(self, phrase, training_size=1000):
        """
      Perform the actual search either to cache or ny times

      phrase - the phrase to search by
      training_size - the amount of articles to use and fetch

      returns the list of found articles
    """
        return self.__articles.perform_search(phrase, training_size)

    def analyze_results(self, article_list):
        """
      Perform the regression analysis on the results and print
      them out to the command line
    """
        print("Using %i articles" % (len(article_list),))
        self.__analyzer.process_data(article_list)

    def predict_result(self, date):
        """
      Make a prediction on a date

      date - a datetime to make a prediction on

      return a tuple of the type of article created from the analyzer
    """
        return self.__analyzer.predict(date)
示例#7
0
 def __init__(self, path):
     self.articles = Articles(path)
     self.OPERATORS = {
         'and': '&',
         '&': '&',
         '*': '&',
         'or': '|',
         '|': '|',
         '+': '|',
         'not': '~',
         '~': '~',
         '!': '~',
         '(': '(',
         ')': ')',
     }
     self.PRIORITY = {
         '(': 0,
         ')': 0,
         '|': 1,
         '&': 2,
         '~': 3,
     }
示例#8
0
# Using falcon as it is much faster and flexible solution

import falcon
from os.path import join, isfile, dirname
import os
from articles import Articles
import json
from urllib import parse

DEFAULT_FILE = join(dirname(__file__), '../json-data/reut2-000.json')
pwd = os.path.dirname(__file__)
template_dir = os.path.join(pwd)

articles = Articles(DEFAULT_FILE)


class Healthy(object):
    def on_get(self, req, resp):
        resp.status = falcon.HTTP_200
        resp.body = json.dumps({'status': 'ok'})


class ListArticles(object):
    def on_get(self, req, resp):
        try:
            """Handles GET requests"""
            query = dict(parse.parse_qsl(req.query_string))
            resp.status = falcon.HTTP_200  # This is the default status
            resp.body = json.dumps(articles.get_filtered_view(query),
                                   ensure_ascii=False)
        except KeyError:
示例#9
0
        args = 'min-o=%s_max-op=%s__window=%s_' % (str(
            FLAGS.min_occurrence), str(
                FLAGS.max_occurrence_percentage), str(FLAGS.skip_window))

        f = open(dirname + '/vec/' + args + str(dimension) + '.vec', 'w+')
        f.write('%d %d\n' % (len(w2v), dimension))
        for key, values in w2v.items():
            f.write('%s %s\n' %
                    (key, ' '.join(format(x, '.5f') for x in values)))

        f.close()


# Initialisation
if FLAGS.method == 'articles':
    document = Articles(params=FLAGS)
    document.build_dictionary()

    vocabulary_size = document.get_vocab_size()
    word2id = document.get_word2id()
    id2word = document.get_id2word()
    articles = document.get_articles()

    next_word_idx = 0
    input_word, context_word = build_training_data()
    next_batch_articles(FLAGS.batch_size, FLAGS.skip_window)
else:
    tw = TextWords()
    tw.build_dictionary()
    word2id = tw.get_word2id()
    id2word = tw.get_id2word()
示例#10
0
import sys
import requests

from flask import Flask, Blueprint, request, jsonify
from flask_cors import CORS
from articles import Articles
app = Flask(__name__)
bp = Blueprint('articles', __name__, url_prefix='/articles')

articles = Articles()


@bp.route("/", methods=["GET"])
def list():
    arts = articles.list()
    print('articles:', arts)
    artsList = [a for a in arts.values()]
    return jsonify({'articles': artsList})


@bp.route("/", methods=["POST"])
def add():
    json_data = request.get_json()
    article = articles.add(json_data)
    print('added_article:', article)
    return jsonify(article)


@bp.route("/<article_id>", methods=["GET"])
def get(article_id):
    article = articles.get(article_id)
示例#11
0
文件: server.py 项目: Efecca/diploma
def articles():
    res = Articles().getArticles()
    print(res)
    return jsonify(res)
示例#12
0
class TestArticlesStorage(unittest.TestCase):
    '''
    Tests for Articles module.
    '''
    def setup_test(self):
        '''
        Make ourselves a way to quickly setup articles storage.
        '''

        self.article_0 = Article(0, "Some title 0", "2016-09-22",
                                 "Some body text 0",
                                 ['tag0', 'tag1', 'tag2', 'tag3'])

        self.article_1 = Article(1, "Some title 1", "2016-09-22",
                                 "Some body text 1",
                                 ['tag0', 'tag4', 'tag5', 'tag6'])

        self.article_2 = Article(2, "Some title 2", "2016-09-23",
                                 "Some body text 2",
                                 ['tag0', 'tag1', 'tag2', 'tag3'])

        self.article_3 = Article(3, "Some title 3", "2016-09-23",
                                 "Some body text 3",
                                 ['tag0', 'tag1', 'tag2', 'tag3'])

        self.article_4 = Article(4, "Some title 4", "2016-09-23",
                                 "Some body text 4",
                                 ['tag0', 'tag1', 'tag2', 'tag3'])

        self.articles = Articles()

    def teardown_test(self):
        self.articles = None

    def test_article_add(self):
        '''
        Test function Articles.add() when it successfully adds a new article.
        '''

        # Setup test
        self.setup_test()

        result = self.articles.add(self.article_0)
        compare(self.article_0, result)

        # Teardown test
        self.teardown_test()

    def test_article_add_existing(self):
        '''
        Test function Articles.add() throws exception when it fails to add a new article when article with such id already exists.
        '''

        # Setup test
        self.setup_test()
        self.articles.add(self.article_0)

        with self.assertRaises(Exception):
            self.articles.add(self.article_0)

        # Teardown test
        self.teardown_test()

    def test_get_all(self):
        '''
        Test function Articles.get_all
        '''

        # Setup test
        self.setup_test()
        self.articles.add(self.article_0)
        self.articles.add(self.article_1)

        expected = OrderedDict()
        expected[0] = self.article_0
        expected[1] = self.article_1

        result = self.articles.get_all()

        compare(expected, result)

        # Teardown test
        self.teardown_test()

    def test_get(self):
        '''
        Test that fucntion Articles.get() resturns correct item
        '''

        # Setup test
        self.setup_test()
        self.articles.add(self.article_0)

        result = self.articles.get(0)
        compare(result, self.article_0)

        # Teardown test
        self.teardown_test()

    def test_get_non_existing(self):
        '''
        Test that function Articles.get() throws exception when requested item is not there
        '''

        # Setup test
        self.setup_test()
        self.articles.add(self.article_0)

        with self.assertRaises(Exception):
            self.articles.get(1)

        # Teardown test
        self.teardown_test()

    def test_get_article_ids(self):
        '''
        Test that function Articles.get_article_ids(date, n) returns correct list of article ids for a given date. 
        '''

        # Setup test
        self.setup_test()
        self.articles.add(self.article_0)
        self.articles.add(self.article_1)
        self.articles.add(self.article_2)
        self.articles.add(self.article_3)
        self.articles.add(self.article_4)

        result = self.articles.get_article_ids('2016-09-23')
        expected = [2, 3, 4]

        self.assertEqual(result, expected)

        # Teardown test
        self.teardown_test()

    def test_get_count(self):
        '''
        Test that function Articles.get_count(tag_name, date) returns the number of occurrences of the given tag_name across all articles submitted on the given date.
        '''

        # Setup test
        self.setup_test()
        self.articles.add(self.article_0)
        self.articles.add(self.article_1)
        self.articles.add(self.article_2)
        self.articles.add(self.article_3)
        self.articles.add(self.article_4)

        result = self.articles.get_count('tag0', '2016-09-23')

        self.assertEqual(result, 3)

        # Teardown test
        self.teardown_test()

    def test_get_last_article_ids(self):
        '''
        Test that function Articles.get_last_article_ids(tag_name, date, n) returns the last n article ids that have tag_name on the given date.
        '''

        # Setup test
        self.setup_test()
        self.articles.add(self.article_0)
        self.articles.add(self.article_1)
        self.articles.add(self.article_2)
        self.articles.add(self.article_3)
        self.articles.add(self.article_4)

        result = self.articles.get_last_article_ids('tag0', '2016-09-23', 2)
        expected = [3, 4]

        self.assertEqual(result, expected)

        # Teardown test
        self.teardown_test()

    def test_get_related_tags(self):
        '''
        Test that function Articles.get_related_tags(tag_name, date) returns the list of tags that are on the articles that the current tag is on for the same day.
        '''

        # Setup test
        self.setup_test()
        self.articles.add(self.article_0)
        self.articles.add(self.article_1)
        self.articles.add(self.article_2)
        self.articles.add(self.article_3)
        self.articles.add(self.article_4)

        result = self.articles.get_related_tags('tag0', '2016-09-22')
        result.sort()
        expected = ['tag1', 'tag2', 'tag3', 'tag4', 'tag5', 'tag6']
        expected.sort()

        self.assertEqual(result, expected)

        # Teardown test
        self.teardown_test()
示例#13
0
文件: app.py 项目: mnezh/h2o-homework
#!/usr/bin/env python
'''
A smallest possible transport layer on a top of article query service.
Handles JSON serialization of datetime correcly.
'''
import os
import sys
from flask import Flask, request, jsonify, make_response
from serialize_datetime import CustomJSONEncoder
from articles import Articles

DEFAULT_DATA = '../data/reut2-000.json'
json_file = sys.argv[1] if len(sys.argv) > 1 else os.path.join(
    os.path.dirname(__file__), DEFAULT_DATA)
articles = Articles(json_file)
app = Flask('Reuters API')
app.json_encoder = CustomJSONEncoder


@app.route("/article", methods=["GET"])
@app.route('/article/<new_id>')
def get_article(new_id=None):
    if new_id:
        article = articles.find_first({'newid': new_id})
        if article:
            return jsonify(article)
        return make_response(jsonify({'error': 'Not found'}), 404)
    else:
        return jsonify(articles.find_all(request.args))

示例#14
0
def loaded_articles():
    fixture = os.path.join(os.path.dirname(__file__), FIXTURE)
    return Articles(fixture)