def test_mix_sources_2(self): moxer = mox.Mox() moxer.StubOutWithMock(ArtifactSource, "get_multiple_by_name") moxer.StubOutWithMock(ArtifactContent, "all") source_names = ("source/1", "source/2") sources = [MockEntity(key_name=name, name=name) for name in source_names] def _content(id): # toggles between sources source = sources[id % 2] id_str = str(id) return MockEntity(key_name=id_str, guid=id_str, body=generate_phrase(5), source=source, source_name=source.name) ArtifactSource.get_multiple_by_name(source_names).AndReturn(sources) # performs ArtifactContent query for each source ArtifactContent.all().AndReturn(MockQuery(xrange(12), create_call=_content)) ArtifactContent.all().AndReturn(MockQuery(xrange(12), create_call=_content)) moxer.ReplayAll() mixed_sources, text = new_default_mixer().mix_sources(source_names) self.assertEquals(len(mixed_sources), len(source_names)) moxer.VerifyAll()
def test_mix_sources_1(self): moxer = mox.Mox() moxer.StubOutWithMock(ArtifactSource, "get_multiple_by_name") moxer.StubOutWithMock(ArtifactContent, "all") source_name = "source/1" source = MockEntity(key_name=source_name, name=source_name) def _content(id): id_str = str(id) return MockEntity(key_name=id_str, guid=id_str, body=generate_phrase(5), source=source, source_name=source.name) ArtifactSource.get_multiple_by_name(source_name).AndReturn((source, )) ArtifactContent.all().AndReturn( MockQuery(xrange(12), create_call=_content)) moxer.ReplayAll() sources, text = new_default_mixer().mix_sources(source_name) self.assertEquals(len(sources), 1) moxer.VerifyAll()
def test_create(self): accessor_save_kw = self.__keywords() source_name = accessor_save_kw['source'] content_type = accessor_save_kw['content_type'] body = accessor_save_kw['body'] self.moxer.StubOutWithMock(ArtifactInfo, "all", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactSource, "get_or_create", use_mock_anything=True) self.moxer.StubOutWithMock(Counters, "source_counter", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactInfo, "create", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactContent, "create", use_mock_anything=True) source = MockEntity(key_name=source_name) ArtifactInfo.all(keys_only=True).AndReturn(MockQuery(None, keys_only=True)) ArtifactSource.get_or_create(source_name).AndReturn(source) counter = self.moxer.CreateMockAnything() Counters.source_counter(source_name).AndReturn(counter) counter.increment() # TODO: I wish I could ignore keywords md5 = ArtifactAccessor._content_md5(source_name, content_type, body) info_save_kw = dict(source=source, source_name=source_name, content_type=content_type, content_md5=md5) info_key = MockKey(name=self.test_id) ArtifactInfo.create(**info_save_kw).AndReturn(info_key) content_save_kw = dict(source=source, source_name=source_name, info=info_key, body=body) ArtifactContent.create(info_key.name(), **content_save_kw).AndReturn(MockKey(name=self.test_id)) self.moxer.ReplayAll() info, content, source = ArtifactAccessor.create(**accessor_save_kw) print 'info:%s, content:%s, source:%s' % (info, content, source) self.moxer.VerifyAll()
def test_mix_sources_2(self): moxer = mox.Mox() moxer.StubOutWithMock(ArtifactSource, "get_multiple_by_name") moxer.StubOutWithMock(ArtifactContent, "all") source_names = ("source/1", "source/2") sources = [ MockEntity(key_name=name, name=name) for name in source_names ] def _content(id): # toggles between sources source = sources[id % 2] id_str = str(id) return MockEntity(key_name=id_str, guid=id_str, body=generate_phrase(5), source=source, source_name=source.name) ArtifactSource.get_multiple_by_name(source_names).AndReturn(sources) # performs ArtifactContent query for each source ArtifactContent.all().AndReturn( MockQuery(xrange(12), create_call=_content)) ArtifactContent.all().AndReturn( MockQuery(xrange(12), create_call=_content)) moxer.ReplayAll() mixed_sources, text = new_default_mixer().mix_sources(source_names) self.assertEquals(len(mixed_sources), len(source_names)) moxer.VerifyAll()
def test_delete_by_name_missing_source(self): self.m.StubOutWithMock(ArtifactSource, "get_by_name") name = "mhawthorne" ArtifactSource.get_by_name(name) self.m.ReplayAll() self.assertRaises(NotFoundException, ArtifactSourceAccessor.delete_by_name, name) self.m.VerifyAll()
def test_mix_random_limit_sources_1(self): moxer = mox.Mox() moxer.StubOutWithMock(ArtifactSource, "all") moxer.StubOutWithMock(ArtifactContent, "all") ArtifactSource.all().AndReturn(MockQuery(xrange(4), create_call=_source)) ArtifactContent.all().AndReturn(MockQuery(xrange(12), create_call=_content)) moxer.ReplayAll() sources, text = new_default_mixer().mix_random_limit_sources(1) self.assertEquals(len(sources), 1) moxer.VerifyAll()
def test_delete_by_name_deletes_source_with_referencing_feed(self): # self.m.StubOutWithMock(antonym_model, 'ArtifactSource') self.m.StubOutWithMock(ArtifactSource, "get_by_name") self.m.StubOutWithMock(FeedAccessor, "get_by_source_name") name = "mhawthorne" source = MockEntity(key_name=name) ArtifactSource.get_by_name(name).AndReturn(source) FeedAccessor.get_by_source_name(name, return_none=True).AndReturn(MockEntity(key=name, url="http://real.ly")) self.m.ReplayAll() self.assertRaises(ConflictingDataException, ArtifactSourceAccessor.delete_by_name, name) self.m.VerifyAll()
def test_create(self): accessor_save_kw = self.__keywords() source_name = accessor_save_kw['source'] content_type = accessor_save_kw['content_type'] body = accessor_save_kw['body'] self.moxer.StubOutWithMock(ArtifactInfo, "all", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactSource, "get_or_create", use_mock_anything=True) self.moxer.StubOutWithMock(Counters, "source_counter", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactInfo, "create", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactContent, "create", use_mock_anything=True) source = MockEntity(key_name=source_name) ArtifactInfo.all(keys_only=True).AndReturn( MockQuery(None, keys_only=True)) ArtifactSource.get_or_create(source_name).AndReturn(source) counter = self.moxer.CreateMockAnything() Counters.source_counter(source_name).AndReturn(counter) counter.increment() # TODO: I wish I could ignore keywords md5 = ArtifactAccessor._content_md5(source_name, content_type, body) info_save_kw = dict(source=source, source_name=source_name, content_type=content_type, content_md5=md5) info_key = MockKey(name=self.test_id) ArtifactInfo.create(**info_save_kw).AndReturn(info_key) content_save_kw = dict(source=source, source_name=source_name, info=info_key, body=body) ArtifactContent.create(info_key.name(), **content_save_kw).AndReturn( MockKey(name=self.test_id)) self.moxer.ReplayAll() info, content, source = ArtifactAccessor.create(**accessor_save_kw) print 'info:%s, content:%s, source:%s' % (info, content, source) self.moxer.VerifyAll()
def test_delete_by_name_deletes_source_with_referencing_feed(self): # self.m.StubOutWithMock(antonym_model, 'ArtifactSource') self.m.StubOutWithMock(ArtifactSource, "get_by_name") self.m.StubOutWithMock(FeedAccessor, "get_by_source_name") name = "mhawthorne" source = MockEntity(key_name=name) ArtifactSource.get_by_name(name).AndReturn(source) FeedAccessor.get_by_source_name(name, return_none=True).AndReturn( MockEntity(key=name, url="http://real.ly")) self.m.ReplayAll() self.assertRaises(ConflictingDataException, ArtifactSourceAccessor.delete_by_name, name) self.m.VerifyAll()
def test_mix_random_limit_sources_1(self): moxer = mox.Mox() moxer.StubOutWithMock(ArtifactSource, "all") moxer.StubOutWithMock(ArtifactContent, "all") ArtifactSource.all().AndReturn( MockQuery(xrange(4), create_call=_source)) ArtifactContent.all().AndReturn( MockQuery(xrange(12), create_call=_content)) moxer.ReplayAll() sources, text = new_default_mixer().mix_random_limit_sources(1) self.assertEquals(len(sources), 1) moxer.VerifyAll()
def mix_sources(self, *source_names): sources = ArtifactSource.get_multiple_by_name(*source_names) missing = filter(lambda i: i is None, sources) if missing: raise NotFoundException("1 or more sources not found: %s" % source_names) return self.__random_content_for_sources(sources)
def delete_by_name(cls, source_name): source = ArtifactSource.get_by_name(source_name) logging.debug("delete_by_name source: %s" % source) if not source: raise NotFoundException('ArtifactSource %s' % source_name) # checks for feeds linked to source feed = FeedAccessor.get_by_source_name(source_name, return_none=True) if feed: raise ConflictingDataException( "ArtifactSource '%s' is referenced by Feed '%s'" % (source_name, feed.url)) # finds and deletes artifacts for source info_keys = ArtifactInfo.find_by_source(source, keys_only=True) content_keys = ArtifactContent.find_by_source(source) # zips keys to delete info/content pairs back-to-back for artifact_keys in zip(info_keys, content_keys): db.delete(artifact_keys) # deletes extras if info/content sizes don't match # (this would be a data bug somewhere) content_len = content_keys.count() info_len = info_keys.count() if content_len < info_len: db.delete(content_keys[info_len:]) elif info_len > content_len: db.delete(info_keys[content_len:]) # deletes source db.delete(source)
def post(self, **kw): helper = RequestHelper(self) results = {} source_q = ArtifactSource.all() for s in source_q: artifact_q = ArtifactInfo.find_by_source(s) count = len([a for a in artifact_q]) counter = Counters.source_counter(s.name) old_count = counter.count() counter.set(count) source_result = {'old': old_count} # if source is linked to a feed, I can't delete it feed = Feed.get_by_source(s, return_none=True) if feed: source_result['feed'] = feed.url if not count and not feed: s.delete() source_result['deleted'] = True if count: source_result['new'] = count results[s.name] = source_result helper.write_json(results)
def post(self, **kw): helper = RequestHelper(self) results = {} source_q = ArtifactSource.all() for s in source_q: artifact_q = ArtifactInfo.find_by_source(s) count = len([a for a in artifact_q]) counter = Counters.source_counter(s.name) old_count = counter.count() counter.set(count) source_result = { 'old': old_count } # if source is linked to a feed, I can't delete it feed = Feed.get_by_source(s, return_none=True) if feed: source_result['feed'] = feed.url if not count and not feed: s.delete() source_result['deleted'] = True if count: source_result['new'] = count results[s.name] = source_result helper.write_json(results)
def delete_by_name(cls, source_name): source = ArtifactSource.get_by_name(source_name) logging.debug("delete_by_name source: %s" % source) if not source: raise NotFoundException("ArtifactSource %s" % source_name) # checks for feeds linked to source feed = FeedAccessor.get_by_source_name(source_name, return_none=True) if feed: raise ConflictingDataException("ArtifactSource '%s' is referenced by Feed '%s'" % (source_name, feed.url)) # finds and deletes artifacts for source info_keys = ArtifactInfo.find_by_source(source, keys_only=True) content_keys = ArtifactContent.find_by_source(source) # zips keys to delete info/content pairs back-to-back for artifact_keys in zip(info_keys, content_keys): db.delete(artifact_keys) # deletes extras if info/content sizes don't match # (this would be a data bug somewhere) content_len = content_keys.count() info_len = info_keys.count() if content_len < info_len: db.delete(content_keys[info_len:]) elif info_len > content_len: db.delete(info_keys[content_len:]) # deletes source db.delete(source)
def find_artifact_counts(cls, **kw): counts = {} for src in ArtifactSource.all(): c = Counters.source_counter(src.name) counts[src.name] = dict(counter=c.count(), info=cls.count_infos(src), content=cls.count_content(src)) return counts
def test_mix_sources_1(self): moxer = mox.Mox() moxer.StubOutWithMock(ArtifactSource, "get_multiple_by_name") moxer.StubOutWithMock(ArtifactContent, "all") source_name = "source/1" source = MockEntity(key_name=source_name, name=source_name) def _content(id): id_str = str(id) return MockEntity(key_name=id_str, guid=id_str, body=generate_phrase(5), source=source, source_name=source.name) ArtifactSource.get_multiple_by_name(source_name).AndReturn((source, )) ArtifactContent.all().AndReturn(MockQuery(xrange(12), create_call=_content)) moxer.ReplayAll() sources, text = new_default_mixer().mix_sources(source_name) self.assertEquals(len(sources), 1) moxer.VerifyAll()
def test_words(words, text_call): for i in xrange(len(words)): moxer = mox.Mox() moxer.StubOutWithMock(ArtifactSource, "all") moxer.StubOutWithMock(ArtifactContent, "all") ArtifactSource.all().AndReturn(MockQuery(xrange(4), create_call=_source)) word = words[i] source_content = _content(word) source_text = source_content.body ArtifactContent.all().AndReturn(MockQuery(xrange(1), create_call=lambda id: source_content)) moxer.ReplayAll() mixer = new_default_mixer() sources, mixed_text = mixer.mix_random_limit_sources(1) print "('%s') '%s' -> '%s'" % (word, source_text, mixed_text) text_call(word, source_text, mixed_text) moxer.VerifyAll()
def test_delete_by_name_deletes_source_with_no_referencing_feed(self): self.m.StubOutWithMock(ArtifactSource, "get_by_name") self.m.StubOutWithMock(FeedAccessor, "get_by_source_name") self.m.StubOutWithMock(ArtifactInfo, "find_by_source") self.m.StubOutWithMock(ArtifactContent, "find_by_source") self.m.StubOutWithMock(db, "delete") self.m.StubOutWithMock(memcache, "delete") name = "mhawthorne" source = MockEntity(key_name=name) ArtifactSource.get_by_name(name).AndReturn(source) FeedAccessor.get_by_source_name(name, return_none=True) ArtifactInfo.find_by_source(source, keys_only=True).AndReturn(MockQuery(range(0,0))) ArtifactContent.find_by_source(source).AndReturn(MockQuery(range(0,0))) db.delete(source) memcache.delete(IsA(str)).AndReturn(1) self.m.ReplayAll() ArtifactSourceAccessor.delete_by_name(name) self.m.VerifyAll()
def test_delete_by_name_deletes_source_with_no_referencing_feed(self): self.m.StubOutWithMock(ArtifactSource, "get_by_name") self.m.StubOutWithMock(FeedAccessor, "get_by_source_name") self.m.StubOutWithMock(ArtifactInfo, "find_by_source") self.m.StubOutWithMock(ArtifactContent, "find_by_source") self.m.StubOutWithMock(db, "delete") self.m.StubOutWithMock(memcache, "delete") name = "mhawthorne" source = MockEntity(key_name=name) ArtifactSource.get_by_name(name).AndReturn(source) FeedAccessor.get_by_source_name(name, return_none=True) ArtifactInfo.find_by_source(source, keys_only=True).AndReturn( MockQuery(range(0, 0))) ArtifactContent.find_by_source(source).AndReturn(MockQuery(range(0, 0))) db.delete(source) memcache.delete(IsA(str)).AndReturn(1) self.m.ReplayAll() ArtifactSourceAccessor.delete_by_name(name) self.m.VerifyAll()
def test_words(words, text_call): for i in xrange(len(words)): moxer = mox.Mox() moxer.StubOutWithMock(ArtifactSource, "all") moxer.StubOutWithMock(ArtifactContent, "all") ArtifactSource.all().AndReturn( MockQuery(xrange(4), create_call=_source)) word = words[i] source_content = _content(word) source_text = source_content.body ArtifactContent.all().AndReturn( MockQuery(xrange(1), create_call=lambda id: source_content)) moxer.ReplayAll() mixer = new_default_mixer() sources, mixed_text = mixer.mix_random_limit_sources(1) print "('%s') '%s' -> '%s'" % (word, source_text, mixed_text) text_call(word, source_text, mixed_text) moxer.VerifyAll()
def _create(cls, source_name, body, content_md5, **kw): # saves source, if unique source_key = ArtifactSource.get_or_create(source_name) # saves ArtifactInfo a_info_key = ArtifactInfo.create(content_md5=content_md5, source=source_key, source_name=source_name, **kw) # saves ArtifactContent guid = a_info_key.name() a_content_key = ArtifactContent.create( guid, body=body, source=source_key, source_name=source_name, info=a_info_key ) # bump source counter # it's important to do this AFTER the artifacts are saved Counters.source_counter(source_name).increment() return a_info_key, a_content_key, source_key
def mix_random_limit_sources(self, source_count, degrade=False): """ params: source_count - number of sources to mix degrade - if True, mix even if source_count sources cannot be found returns: ((sources), mixed_content) """ # choose random sources source_q = ArtifactSource.all() q_count = source_q.count() if (q_count < source_count): if degrade: logging.debug("mix_random_limit_sources requested %d sources; degrading to %d" % (source_count, q_count)) source_count = q_count else: raise MissingDataException("insufficient ArtifactSources found (%d < %d)" % \ (q_count, source_count)) sources = random_query_results(source_q, source_count) return self.__random_content_for_sources(sources)
def mix_random_limit_sources(self, source_count, degrade=False): """ params: source_count - number of sources to mix degrade - if True, mix even if source_count sources cannot be found returns: ((sources), mixed_content) """ # choose random sources source_q = ArtifactSource.all() q_count = source_q.count() if (q_count < source_count): if degrade: logging.debug( "mix_random_limit_sources requested %d sources; degrading to %d" % (source_count, q_count)) source_count = q_count else: raise MissingDataException("insufficient ArtifactSources found (%d < %d)" % \ (q_count, source_count)) sources = random_query_results(source_q, source_count) return self.__random_content_for_sources(sources)
def _create(cls, source_name, body, content_md5, **kw): # saves source, if unique source_key = ArtifactSource.get_or_create(source_name) # saves ArtifactInfo a_info_key = ArtifactInfo.create(content_md5=content_md5, source=source_key, source_name=source_name, **kw) # saves ArtifactContent guid = a_info_key.name() a_content_key = ArtifactContent.create(guid, body=body, source=source_key, source_name=source_name, info=a_info_key) # bump source counter # it's important to do this AFTER the artifacts are saved Counters.source_counter(source_name).increment() return a_info_key, a_content_key, source_key
def create(cls, source_name, **kw): return ArtifactSource.create(source_name, **kw)
def get_by_name(cls, source_name, **kw): return ArtifactSource.get_by_name(source_name, **kw)
def get(self, **kw): helper = RequestHelper(self) results = [] for s in ArtifactSource.all().fetch(100, 0): results.append(source_hash(s)) helper.write_json(results)