def test_tags_with_languages(self): Edit.ingest_jsonlines('store/testdata/qs_batch_with_terms.json') batch = Batch.objects.get() self.assertEqual(['wbsetdescription-add', 'lang-eu'], list(batch.tag_ids)) lang_tag = batch.tags.order_by('priority')[0] self.assertEqual('eu', lang_tag.display_name)
def test_extract(self): Edit.ingest_jsonlines('store/testdata/one_qs_batch.json') batch = Batch.objects.get() self.assertEqual([ 'wbcreateclaim-create', 'prop-P18', 'prop-P2534', 'prop-P3896', 'prop-P856' ], list(batch.tag_ids))
def test_extract(self): Edit.ingest_jsonlines('store/testdata/one_qs_batch.json') batch = Batch.objects.get() last_edit = batch.edits.order_by('-timestamp')[0] # tag extraction on the latest edit does not return any *new* tag self.assertEquals([], [tag.id for tag in Tag.extract(last_edit)]) self.assertEquals(['wbcreateclaim-create'], list(batch.tag_ids))
def test_tag_former_batches(self): Edit.ingest_jsonlines('store/testdata/one_qs_batch.json') Tag.objects.all().delete() Tag.retag_all_batches() batch = Batch.objects.get() self.assertEqual([ 'wbcreateclaim-create', 'prop-P18', 'prop-P2534', 'prop-P3896', 'prop-P856' ], list(batch.tag_ids))
def setUpClass(cls): Edit.ingest_jsonlines('store/testdata/one_or_batch.json') cls.batch = Batch.objects.get() cls.client = Client() cls.mary = User(username='******', password='******') cls.mary.save() mary_auth = UserSocialAuth( user=cls.mary, provider='wikidata', uid='39834872', extra_data= {"access_token": {"oauth_token": "12345", "oauth_token_secret": "67890"}, "auth_time": 1520695332}) mary_auth.save() cls.john = User(username='******', password='******') cls.john.save()
def test_deletion_batch(self): Edit.ingest_jsonlines('store/testdata/deletion_edit.json') self.assertEqual(1, Batch.objects.count()) batch = Batch.objects.get() self.assertEqual(['delete'], list(batch.tag_ids))
def test_extract_editentity(self): Edit.ingest_jsonlines('store/testdata/one_or_batch.json') batch = Batch.objects.get() self.assertEqual(['wbeditentity-update'], list(batch.tag_ids))
def setUp(self): self.testdir = os.path.dirname(os.path.abspath(__file__)) Edit.ingest_jsonlines( os.path.join(self.testdir, 'data', 'batches_to_inspect.json'))
LOOKBEHIND_OFFSET = timedelta(minutes=5) if __name__ == '__main__': os.environ.setdefault("DJANGO_SETTINGS_MODULE", "editgroups.settings") import django django.setup() from store.stream import WikiEditStream from store.utils import grouper from store.models import Edit print('Listening to edits...') s = WikiEditStream() utcnow = datetime.utcnow() try: latest_edit_seen = Edit.objects.order_by('-timestamp')[0].timestamp fetch_from = latest_edit_seen - LOOKBEHIND_OFFSET except IndexError: fetch_from = None print('Starting from offset %s' % fetch_from.isoformat() if fetch_from else 'now') for i, batch in enumerate(grouper(s.stream(fetch_from), 50)): if i % 50 == 0: print('batch %d' % i) print(datetime.fromtimestamp(batch[0].get('timestamp'))) sys.stdout.flush() Edit.ingest_edits(batch) print('End of stream')