def _mock_posts(request, context) -> str: """Mock crimson hexagon api call for requests_mock.""" params = parse_qs(urlparse(request.url).query) start_date = dateutil.parser.parse(params['startDate'][0]) end_date = dateutil.parser.parse(params['endDate'][0]) posts = get_mock_data(start_date, end_date) results = ','.join([ '{ "url": "http://twitter.com/%s/status/%s"}' % (p['author'], p['post_id']) for p in posts ]) context.status_code = 200 context.headers = {'Content-Type': 'application/json; charset=UTF-8'} json = \ """ { "results": [%s], "resultsPage": 0, "resultsPageSize": 10, "resultsTotal": 6563, "startDate": "%s", "endDate": "%s" } """ % (results, start_date, end_date) return json
def fetch_posts_from_api(self, query: str, start_date: datetime, end_date: datetime) -> list: """Return posts from a csv that are within the given date range.""" if self.mock_enabled: query = self._get_csv_string_from_dicts(get_mock_data()) all_posts = self._get_dicts_from_csv_string(query) required_fields = ['content', 'author', 'publish_date'] for post in all_posts: for field in required_fields: if field not in post: raise McPostsGenericDataException(f"Missing required field: {field}") post['data'] = {} if 'channel' not in post: post['channel'] = post['author'] if 'post_id' not in post: post['post_id'] = uuid.uuid4().hex post['post_id'] =str(post['post_id']) posts = filter_posts_for_date_range(all_posts, start_date, end_date) return posts
def _mock_pushshift(request, context) -> str: """Mock response from pushshift based on posts.get_mock_data.""" filters = request.json()['query']['function_score']['query']['bool']['must'] start_date = None end_date = None for filter in filters: if not 'range' in filter: continue created_utc = filter['range']['created_utc'] if 'gte' in created_utc: start_date = datetime.datetime.fromtimestamp(created_utc['gte']) elif 'lt' in created_utc: end_date = datetime.datetime.fromtimestamp(created_utc['lt']) assert((end_date is not None) and (start_date is not None)) base_dir = os.path.dirname(os.path.realpath(__file__)) response_template_file = base_dir + "/pushshift_response.json.jinja" posts = get_mock_data(start_date, end_date) for post in posts: post['publish_epoch'] = dateutil.parser.parse(post['publish_date']).timestamp() with open(response_template_file) as f: template = Template(f.read()) response_json = template.render(posts=posts) context.status_code = 200 context.headers = {'Content-Type': 'application/json; charset=UTF-8'} return response_json
def fetch_posts_from_api( self, query: str, start_date: datetime, end_date: datetime, sample: Optional[int] = None, page_size: Optional[int] = None, ) -> list: """Return posts from a csv that are within the given date range.""" db = mediawords.db.connect_to_db() assert sample is None, "Sampling is not implemented." assert page_size is None, "Page size limiting is not supported." if self.mock_enabled: query = self._insert_mock_data(db, get_mock_data()) table = query if re.search(r'[^[a-z][A-Z][0-9]_]', table): raise McPostgresGenericDataException( f'illegal table name: {table}') posts = db.query( f""" select content, publish_date, author, post_id, channel from {table} where publish_date::timestamp between %(a)s and %(b)s """, { 'a': start_date, 'b': end_date }).hashes() return posts
def _get_mock_json(self, start_date: datetime, end_date: datetime): """return json in googler format derived from get_mock_data().""" mock_data = get_mock_data(start_date, end_date) json_data = [] for d in mock_data: json_data.append({ 'abstract': d['content'], 'url': 'http://foo.bar/' + d['post_id'], 'title': d['content'], 'metadata': dateutil.parser.parse(d['publish_date']).strftime('%b %e, %Y,') }) return encode_json(json_data)
def test_mock_data(self, query: str = '') -> None: """Run test of object using mock data. This should work on any class, as long as fetch_post() is implemented to return the data from get_mock_data when mock_enabled = True. """ self.mock_enabled = True expected_posts = get_mock_data() start_date = dateutil.parser.parse(expected_posts[0]['publish_date']) end_date = dateutil.parser.parse(expected_posts[-1]['publish_date']) got_posts = self.fetch_posts(query, start_date, end_date) assert len(got_posts) == len(expected_posts) for i, got_post in enumerate(got_posts): self.validate_mock_post(got_post, expected_posts[i])
def _mock_ch_posts(request, context) -> str: """Mock crimson hexagon api call for requests_mock.""" params = parse_qs(urlparse(request.url).query) start_date = dateutil.parser.parse(params['start'][0]) end_date = dateutil.parser.parse(params['end'][0]) posts = get_mock_data(start_date, end_date) ch_posts = [] for post in posts: url = 'http://twitter.com/%s/status/%s' % (post['author'], post['post_id']) p = """\ { "url": "%s", "title": "", "type": "Twitter", "language": "en", "assignedCategoryId": 25841371963, "assignedEmotionId": 25841371954, "categoryScores": [ { "categoryId": 25841371962, "categoryName": "Basic Neutral", "score": 0 }, { "categoryId": 25841371963, "categoryName": "Basic Negative", "score": 1 }, { "categoryId": 25841371960, "categoryName": "Basic Positive", "score": 0 } ], "emotionScores": [ { "emotionId": 25841371954, "emotionName": "Disgust", "score": 0.4 }, { "emotionId": 25841371955, "emotionName": "Joy", "score": 0.01 }, { "emotionId": 25841371958, "emotionName": "Neutral", "score": 0.01 }, { "emotionId": 25841371959, "emotionName": "Fear", "score": 0.09 }, { "emotionId": 25841371956, "emotionName": "Sadness", "score": 0.22 }, { "emotionId": 25841371957, "emotionName": "Anger", "score": 0.16 }, { "emotionId": 25841371961, "emotionName": "Surprise", "score": 0.12 } ] }\ """ % url ch_posts.append(p) context.status_code = 200 context.headers = {'Content-Type': 'application/json; charset=UTF-8'} json = '{"status": "success", "posts":[%s]}' % ',\n'.join(ch_posts) return json