def setup_test(self): ''' Make ourselves a way to quickly setup articles storage. ''' self.article_0 = Article(0, "Some title 0", "2016-09-22", "Some body text 0", ['tag0', 'tag1', 'tag2', 'tag3']) self.article_1 = Article(1, "Some title 1", "2016-09-22", "Some body text 1", ['tag0', 'tag4', 'tag5', 'tag6']) self.article_2 = Article(2, "Some title 2", "2016-09-23", "Some body text 2", ['tag0', 'tag1', 'tag2', 'tag3']) self.article_3 = Article(3, "Some title 3", "2016-09-23", "Some body text 3", ['tag0', 'tag1', 'tag2', 'tag3']) self.article_4 = Article(4, "Some title 4", "2016-09-23", "Some body text 4", ['tag0', 'tag1', 'tag2', 'tag3']) self.articles = Articles()
def article(request, article_id): myArticles = Articles() data = { 'page_title': 'Member Articles', 'article': myArticles.get_by_id(article_id), } c = RequestContext(request, data) return render_to_response('articles/article.html', c)
def run(self): # 翻页地址 page_url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz={}&f=json&offset={}&count=10&is_ok=1&scene=&uin=777&key=777&pass_ticket={}&wxtoken=&appmsg_token=" + self.appmsg_token + "&x5=0f=json" # 将 cookie 字典化 wx_dict = utils.str_to_dict(self.cookie, join_symbol='; ', split_symbol='=') # 请求地址 response = requests.get(page_url.format(self.biz, self.begin_page_index * 10, wx_dict['pass_ticket']), headers=self.headers, verify=False) # 将文章列表字典化 articles = self.article_list(response.text) info = Articles(self.appmsg_token, self.cookie) result = [] for a in articles['list']: if 'app_msg_ext_info' in a.keys( ) and '' != a.get('app_msg_ext_info').get('content_url', ''): read_num, old_like_num, like_num = info.read_like_nums( a.get('app_msg_ext_info').get('content_url')) result.append( str(self.num) + '条,' + a.get('app_msg_ext_info').get('title') + ',' + str(read_num) + ',' + str(old_like_num) + ',' + str(like_num)) time.sleep(2) if 'app_msg_ext_info' in a.keys(): for m in a.get('app_msg_ext_info').get( 'multi_app_msg_item_list', []): read_num, old_like_num, like_num = info.read_like_nums( m.get('content_url')) result.append( str(self.num) + '条的副条,' + m.get('title') + ',' + str(read_num) + ',' + str(old_like_num) + ',' + str(like_num)) time.sleep(3) self.num = self.num + 1 self.write_file(result) self.is_exit_or_continue() # 递归调用 self.run()
class MyResource(Resource): ''' This is a base class that defines: - Main storage service articles which is a simple Python dictionary. - All nodes in incoming JSON structure that we are interested in. - Helper functions that can be used by all childs of this class. ''' storage = Articles() parser = reqparse.RequestParser() parser.add_argument('id', type=int) parser.add_argument('title', type=str) parser.add_argument('date', type=str) parser.add_argument('body', type=str) parser.add_argument('tags', type=str, action='append') def abort_if_article_doesnt_exist(self, id): '''Returns HTTP 404 error in case if a given article id key does not exist in articles dict. ''' if not self.storage.article_exists(id): abort(404, message=f'Article {id} does not exist') def abort_if_article_aleady_exist(self, id): '''Returns HTTP 404 error in case if a given article id key already exists in articles dict. ''' if self.storage.article_exists(id): abort(404, message=f'Article {id} already exists')
def __init__(self, config): """ Prepare the entire system's objects config - the configuration object from the click library """ yaml_config = yaml.load(config.obj["config"]) config.obj["config"].close() self.__cache = Cache(db_file=config.obj["database"]) self.__articles = Articles(key=yaml_config["api_key"], cache=self.__cache) self.__analyzer = Analyzer()
class Manager: """ Actual object to manage everything """ # reference to the entire data storage __cache = None # reference to object managing articles access and storage __articles = None # reference to the object doing the analysis __analyzer = None def __init__(self, config): """ Prepare the entire system's objects config - the configuration object from the click library """ yaml_config = yaml.load(config.obj["config"]) config.obj["config"].close() self.__cache = Cache(db_file=config.obj["database"]) self.__articles = Articles(key=yaml_config["api_key"], cache=self.__cache) self.__analyzer = Analyzer() def perform_search(self, phrase, training_size=1000): """ Perform the actual search either to cache or ny times phrase - the phrase to search by training_size - the amount of articles to use and fetch returns the list of found articles """ return self.__articles.perform_search(phrase, training_size) def analyze_results(self, article_list): """ Perform the regression analysis on the results and print them out to the command line """ print("Using %i articles" % (len(article_list),)) self.__analyzer.process_data(article_list) def predict_result(self, date): """ Make a prediction on a date date - a datetime to make a prediction on return a tuple of the type of article created from the analyzer """ return self.__analyzer.predict(date)
def __init__(self, path): self.articles = Articles(path) self.OPERATORS = { 'and': '&', '&': '&', '*': '&', 'or': '|', '|': '|', '+': '|', 'not': '~', '~': '~', '!': '~', '(': '(', ')': ')', } self.PRIORITY = { '(': 0, ')': 0, '|': 1, '&': 2, '~': 3, }
# Using falcon as it is much faster and flexible solution import falcon from os.path import join, isfile, dirname import os from articles import Articles import json from urllib import parse DEFAULT_FILE = join(dirname(__file__), '../json-data/reut2-000.json') pwd = os.path.dirname(__file__) template_dir = os.path.join(pwd) articles = Articles(DEFAULT_FILE) class Healthy(object): def on_get(self, req, resp): resp.status = falcon.HTTP_200 resp.body = json.dumps({'status': 'ok'}) class ListArticles(object): def on_get(self, req, resp): try: """Handles GET requests""" query = dict(parse.parse_qsl(req.query_string)) resp.status = falcon.HTTP_200 # This is the default status resp.body = json.dumps(articles.get_filtered_view(query), ensure_ascii=False) except KeyError:
args = 'min-o=%s_max-op=%s__window=%s_' % (str( FLAGS.min_occurrence), str( FLAGS.max_occurrence_percentage), str(FLAGS.skip_window)) f = open(dirname + '/vec/' + args + str(dimension) + '.vec', 'w+') f.write('%d %d\n' % (len(w2v), dimension)) for key, values in w2v.items(): f.write('%s %s\n' % (key, ' '.join(format(x, '.5f') for x in values))) f.close() # Initialisation if FLAGS.method == 'articles': document = Articles(params=FLAGS) document.build_dictionary() vocabulary_size = document.get_vocab_size() word2id = document.get_word2id() id2word = document.get_id2word() articles = document.get_articles() next_word_idx = 0 input_word, context_word = build_training_data() next_batch_articles(FLAGS.batch_size, FLAGS.skip_window) else: tw = TextWords() tw.build_dictionary() word2id = tw.get_word2id() id2word = tw.get_id2word()
import sys import requests from flask import Flask, Blueprint, request, jsonify from flask_cors import CORS from articles import Articles app = Flask(__name__) bp = Blueprint('articles', __name__, url_prefix='/articles') articles = Articles() @bp.route("/", methods=["GET"]) def list(): arts = articles.list() print('articles:', arts) artsList = [a for a in arts.values()] return jsonify({'articles': artsList}) @bp.route("/", methods=["POST"]) def add(): json_data = request.get_json() article = articles.add(json_data) print('added_article:', article) return jsonify(article) @bp.route("/<article_id>", methods=["GET"]) def get(article_id): article = articles.get(article_id)
def articles(): res = Articles().getArticles() print(res) return jsonify(res)
class TestArticlesStorage(unittest.TestCase): ''' Tests for Articles module. ''' def setup_test(self): ''' Make ourselves a way to quickly setup articles storage. ''' self.article_0 = Article(0, "Some title 0", "2016-09-22", "Some body text 0", ['tag0', 'tag1', 'tag2', 'tag3']) self.article_1 = Article(1, "Some title 1", "2016-09-22", "Some body text 1", ['tag0', 'tag4', 'tag5', 'tag6']) self.article_2 = Article(2, "Some title 2", "2016-09-23", "Some body text 2", ['tag0', 'tag1', 'tag2', 'tag3']) self.article_3 = Article(3, "Some title 3", "2016-09-23", "Some body text 3", ['tag0', 'tag1', 'tag2', 'tag3']) self.article_4 = Article(4, "Some title 4", "2016-09-23", "Some body text 4", ['tag0', 'tag1', 'tag2', 'tag3']) self.articles = Articles() def teardown_test(self): self.articles = None def test_article_add(self): ''' Test function Articles.add() when it successfully adds a new article. ''' # Setup test self.setup_test() result = self.articles.add(self.article_0) compare(self.article_0, result) # Teardown test self.teardown_test() def test_article_add_existing(self): ''' Test function Articles.add() throws exception when it fails to add a new article when article with such id already exists. ''' # Setup test self.setup_test() self.articles.add(self.article_0) with self.assertRaises(Exception): self.articles.add(self.article_0) # Teardown test self.teardown_test() def test_get_all(self): ''' Test function Articles.get_all ''' # Setup test self.setup_test() self.articles.add(self.article_0) self.articles.add(self.article_1) expected = OrderedDict() expected[0] = self.article_0 expected[1] = self.article_1 result = self.articles.get_all() compare(expected, result) # Teardown test self.teardown_test() def test_get(self): ''' Test that fucntion Articles.get() resturns correct item ''' # Setup test self.setup_test() self.articles.add(self.article_0) result = self.articles.get(0) compare(result, self.article_0) # Teardown test self.teardown_test() def test_get_non_existing(self): ''' Test that function Articles.get() throws exception when requested item is not there ''' # Setup test self.setup_test() self.articles.add(self.article_0) with self.assertRaises(Exception): self.articles.get(1) # Teardown test self.teardown_test() def test_get_article_ids(self): ''' Test that function Articles.get_article_ids(date, n) returns correct list of article ids for a given date. ''' # Setup test self.setup_test() self.articles.add(self.article_0) self.articles.add(self.article_1) self.articles.add(self.article_2) self.articles.add(self.article_3) self.articles.add(self.article_4) result = self.articles.get_article_ids('2016-09-23') expected = [2, 3, 4] self.assertEqual(result, expected) # Teardown test self.teardown_test() def test_get_count(self): ''' Test that function Articles.get_count(tag_name, date) returns the number of occurrences of the given tag_name across all articles submitted on the given date. ''' # Setup test self.setup_test() self.articles.add(self.article_0) self.articles.add(self.article_1) self.articles.add(self.article_2) self.articles.add(self.article_3) self.articles.add(self.article_4) result = self.articles.get_count('tag0', '2016-09-23') self.assertEqual(result, 3) # Teardown test self.teardown_test() def test_get_last_article_ids(self): ''' Test that function Articles.get_last_article_ids(tag_name, date, n) returns the last n article ids that have tag_name on the given date. ''' # Setup test self.setup_test() self.articles.add(self.article_0) self.articles.add(self.article_1) self.articles.add(self.article_2) self.articles.add(self.article_3) self.articles.add(self.article_4) result = self.articles.get_last_article_ids('tag0', '2016-09-23', 2) expected = [3, 4] self.assertEqual(result, expected) # Teardown test self.teardown_test() def test_get_related_tags(self): ''' Test that function Articles.get_related_tags(tag_name, date) returns the list of tags that are on the articles that the current tag is on for the same day. ''' # Setup test self.setup_test() self.articles.add(self.article_0) self.articles.add(self.article_1) self.articles.add(self.article_2) self.articles.add(self.article_3) self.articles.add(self.article_4) result = self.articles.get_related_tags('tag0', '2016-09-22') result.sort() expected = ['tag1', 'tag2', 'tag3', 'tag4', 'tag5', 'tag6'] expected.sort() self.assertEqual(result, expected) # Teardown test self.teardown_test()
#!/usr/bin/env python ''' A smallest possible transport layer on a top of article query service. Handles JSON serialization of datetime correcly. ''' import os import sys from flask import Flask, request, jsonify, make_response from serialize_datetime import CustomJSONEncoder from articles import Articles DEFAULT_DATA = '../data/reut2-000.json' json_file = sys.argv[1] if len(sys.argv) > 1 else os.path.join( os.path.dirname(__file__), DEFAULT_DATA) articles = Articles(json_file) app = Flask('Reuters API') app.json_encoder = CustomJSONEncoder @app.route("/article", methods=["GET"]) @app.route('/article/<new_id>') def get_article(new_id=None): if new_id: article = articles.find_first({'newid': new_id}) if article: return jsonify(article) return make_response(jsonify({'error': 'Not found'}), 404) else: return jsonify(articles.find_all(request.args))
def loaded_articles(): fixture = os.path.join(os.path.dirname(__file__), FIXTURE) return Articles(fixture)