def test_search_by_tag_or_tag(wa_video): catcha = wa_video common_tag_value = 'testing_tag_even' common_tag = make_wa_tag(common_tag_value) for i in [1, 2, 3, 4, 5]: c = deepcopy(catcha) c['id'] = '{}{}'.format(catcha['id'], i) tag = make_wa_tag(tagname='testing_tag{}'.format(i)) c['body']['items'].append(tag) if i % 2 == 0: c['body']['items'].append(common_tag) x = CRUD.create_anno(c) payload = make_jwt_payload(user=catcha['creator']['id']) request = make_json_request( method='get', # to send a list of userids query_string='tag={}&tag={}'.format(common_tag_value, 'testing_tag3')) request.catchjwt = payload response = search_api(request) resp = json.loads(response.content.decode('utf-8')) assert response.status_code == 200 assert resp['total'] == 3 assert len(resp['rows']) == 3 for a in resp['rows']: if not Catcha.has_tag(a, 'testing_tag3'): assert Catcha.has_tag(a, common_tag_value) is True
def test_search_by_target_source_ok(wa_audio, wa_image): catcha1 = wa_audio catcha2 = wa_image tsource = catcha1['target']['items'][0]['source'] ttype = catcha1['target']['items'][0]['type'] for i in [1, 2, 3, 4]: for catcha in [catcha1, catcha2]: c = deepcopy(catcha) c['id'] = '{}{}'.format(catcha['id'], i) x = CRUD.create_anno(c) payload = make_jwt_payload() request = make_json_request( method='get', query_string='target_source={}'.format(tsource)) request.catchjwt = payload response = search_api(request) resp = json.loads(response.content.decode('utf-8')) assert response.status_code == 200 assert resp['total'] == 4 for a in resp['rows']: assert Catcha.has_target_source(a, tsource, ttype) assert Catcha.has_target_source(a, tsource)
def handle(self, *args, **kwargs): filepath = kwargs['filepath'] with open(filepath, 'r') as f: annojs_list = json.load(f) catcha_list = [] failed = [] for ajs in annojs_list: # workaround for input with really old catch-annojs if 'media' in ajs: if 'ranges' in ajs and len(ajs['ranges']) > 0: if 'start' not in ajs['ranges'][0]: ajs['ranges'][0]['start'] = '' ajs['ranges'][0]['end'] = '' else: ajs['error_msg'] = 'missing _media_ property' failed.append(ajs) continue # input format not acceptable try: catcha = Catcha.normalize(ajs) except Exception as e: ajs['error_msg'] = 'normalize error: {}'.format(str(e)) failed.append(ajs) else: catcha_list.append(catcha) resp = { 'total_success': len(catcha_list), 'total_failed': len(failed), 'catcha_list': catcha_list, 'annojs_failed_list': failed, } print(json.dumps(resp, indent=4))
def test_create_compat_annojs(js_text): js = js_text c = Consumer._default_manager.create() payload = make_jwt_payload(apikey=c.consumer, user=js['user']['id']) token = make_encoded_token(c.secret_key, payload) client = Client() compat_create_url = reverse('compat_create') response = client.post(compat_create_url, data=json.dumps(js), HTTP_X_ANNOTATOR_AUTH_TOKEN=token, content_type='application/json') assert response.status_code == 200 resp = json.loads(response.content.decode('utf-8')) assert resp['id'] != js['id'] # should not preserve `id` in input annojs assert resp['user']['id'] == payload['userId'] assert set(resp['tags']) == set(js['tags']) assert resp['contextId'] == js['contextId'] x = Anno._default_manager.get(pk=resp['id']) assert x.creator_id == payload['userId'] catcha = AnnoJS.convert_to_catcha(resp) assert Catcha.are_similar(catcha, x.serialized)
def test_body_sanitize(): body_unsafe_text = [ ' < script same_attr=blah other_attr="pooh"></scritp>', '<script>', 'something <\tscript\t somethingelse="{}">'.format('blah'), ] catcha = make_wa_object() for b_text in body_unsafe_text: catcha['body']['items'][0]['value'] = b_text with pytest.raises(InvalidInputWebAnnotationError) as e: safe = Catcha.safe_body_text_value(catcha) catcha['body']['items'][0]['value'] = \ 'body of annotation that is safe and has no script tags.' safe = Catcha.safe_body_text_value(catcha) assert safe
def convert_to_catcha(annojs_list): catcha_list = [] error_list = [] for annojs in annojs_list: try: catcha = Catcha.normalize(annojs) except Exception as e: click.echo('CONVERSION ERROR: {} -- '.format(e)) error_list.append(annojs) raise e else: catcha_list.append(catcha) return (catcha_list, error_list)
def test_search_replies_catcha_ok(wa_list): anno_list = [] for wa in wa_list: x = CRUD.create_anno(wa) anno_list.append(x) c = Consumer._default_manager.create() payload = make_jwt_payload(apikey=c.consumer) token = make_encoded_token(c.secret_key, payload) client = Client() # create some replies reply_to = anno_list[0] wa_replies = [] for i in range(1, 5): wa = make_wa_object(age_in_hours=1, media=ANNO, reply_to=reply_to.anno_id, user=payload['userId']) wa_replies.append(wa) response = client.post('{}'.format( reverse('crud_api', kwargs={'anno_id': wa['id']})), data=json.dumps(wa), HTTP_AUTHORIZATION='Token {}'.format(token), content_type='application/json') assert response.status_code == 200 # search for the replies search_url = ('{}?context_id={}&collection_id={}&media=Annotation&' 'limit=-1&target_source_id={}').format( reverse('create_or_search'), reply_to.raw['platform']['context_id'], reply_to.raw['platform']['collection_id'], reply_to.anno_id) response = client.get(search_url, HTTP_AUTHORIZATION='token {}'.format(token)) assert response.status_code == 200 resp = response.json() assert resp['total'] == 4 for catcha in resp['rows']: assert catcha['creator']['id'] == payload['userId'] # find target with media type Annotation target_item = Catcha.fetch_target_item_by_media(catcha, ANNO) assert target_item is not None assert target_item['source'] == reply_to.anno_id
def test_search_replies_js_with_uri(js_list): anno_list = [] for js in js_list: wa = Catcha.normalize(js) x = CRUD.create_anno(wa) anno_list.append(x) c = Consumer._default_manager.create() payload = make_jwt_payload(apikey=c.consumer) token = make_encoded_token(c.secret_key, payload) client = Client() # create some replies reply_to = anno_list[0] js_replies = [] compat_create_url = reverse('compat_create') for i in range(1, 5): js = make_annotatorjs_object(age_in_hours=1, media=ANNO, reply_to=reply_to.anno_id, user=payload['userId']) js_replies.append(js) response = client.post(compat_create_url, data=json.dumps(js), HTTP_X_ANNOTATOR_AUTH_TOKEN=token, content_type='application/json') assert response.status_code == 200 # search for the replies search_url = ('{}?context_id={}&collection_id={}&media=comment&' 'limit=-1&uri=someFakeId&parentid={}').format( reverse('compat_search'), reply_to.raw['platform']['context_id'], reply_to.raw['platform']['collection_id'], reply_to.anno_id) response = client.get(search_url, HTTP_X_ANNOTATOR_AUTH_TOKEN=token) assert response.status_code == 200 resp = response.json() assert resp['total'] == 4 for annojs in resp['rows']: assert annojs['media'] == 'comment' assert annojs['parent'] == reply_to.anno_id assert annojs['user']['id'] == payload['userId']
def test_long_annotatorjs(): here = os.path.abspath(os.path.dirname(__file__)) # filename = os.path.join(here, 'annojs_3K_sorted.json') # filename = os.path.join(here, 'annojs_HxAT101.json') filename = os.path.join(here, 'annojs_sample_1.json') sample = readfile_into_jsonobj(filename) created_list = [] failed_to_create = [] client = Client() c = Consumer._default_manager.create() no_media = [] for js in sample: if 'media' not in js: no_media.append(js['id']) failed_to_create.append(js['id']) continue # prep and remove insipient props for compare easiness # js['id'] = str(js['id']) js['uri'] = str(js['uri']) del(js['archived']) del(js['deleted']) if 'citation' in js: del(js['citation']) if 'quote' in js and not js['quote']: del(js['quote']) if 'parent' not in js: js['parent'] = '0' if 'contextId' not in js: js['contextId'] = 'unknown' if 'collectionId' not in js: js['collectionId'] = 'unknown' payload = make_jwt_payload( apikey=c.consumer, user=js['user']['id']) token = make_encoded_token(c.secret_key, payload) url = reverse('compat_create') response = client.post( url, data=json.dumps(js), HTTP_X_ANNOTATOR_AUTH_TOKEN=token, content_type='application/json') ''' if response.status_code != 200: print('failed to create js({}): {}\n{}'.format( js['id'], response.content, json.dumps(js, sort_keys=True, indent=4))) failed_to_create.append(js['id']) assert response.status_code == 200 else: resp = json.loads(response.content) ''' assert response.status_code == 200 resp = json.loads(response.content) how_many = len(no_media) if how_many > 0: print('******* found ({}) annotations with no_media: {}'.format( len(no_media), no_media)) # able to insert all annotatorjs! now comparing counter = 0 pulled_from_db = 0 for js in sample: if js['id'] in failed_to_create: print('skipping not created anno({})'.format(js['id'])) continue # skip if could not create created_anno = Anno._default_manager.get(pk=js['id']) pulled_from_db += 1 created_js = AnnoJS.convert_from_anno(created_anno) if AnnoJS.are_similar(js, created_js): catcha = AnnoJS.convert_to_catcha(js) assert Catcha.are_similar(catcha, created_anno.serialized) else: # sometimes the tags are repeated or not present! js['tags'] = list(set(js['tags'])) if 'tags' in js else [] created_js['tags'] = created_js['tags'] if 'tags' in created_js else [] if AnnoJS.are_similar(js, created_js): print('---------- AnnoJS similar({}), after tags sorted:'.format(js['id'])) else: counter += 1 ''' print('---------- AnnoJS not similar({}):'.format(js['id'])) print('---------- (->) {}'.format(json.dumps(js, sort_keys=True, indent=4))) print('---------- (<-) {}'.format(json.dumps(created_js, sort_keys=True, indent=4))) ''' assert counter == 0
def test_catcha_normalize_ok(wa_image): catch = wa_image catcha = Catcha.normalize(catch) assert json.dumps(catch, sort_keys=True, indent=4) == json.dumps(catcha, sort_keys=True, indent=4)