def test_too_much_write(self): # writing too much data should result in failure expected_length = 16 content = b'0' * 32 blob_hash = random_lbry_hash() blob_file = BlobFile(self.blob_dir, blob_hash, expected_length) writer, finished_d = blob_file.open_for_writing(peer=1) writer.write(content) out = yield self.assertFailure(finished_d, InvalidDataError)
async def make_sd_blob(self): sd_hash = self.calculate_sd_hash() sd_data = self.as_json() sd_blob = BlobFile(self.loop, self.blob_dir, sd_hash, len(sd_data)) if not sd_blob.get_is_verified(): writer = sd_blob.open_for_writing() writer.write(sd_data) await sd_blob.verified.wait() await sd_blob.close() return sd_blob
def test_bad_hash(self): # test a write that should fail because its content's hash # does not equal the blob_hash length = 64 content = b'0' * length blob_hash = random_lbry_hash() blob_file = BlobFile(self.blob_dir, blob_hash, length) writer, finished_d = blob_file.open_for_writing(peer=1) writer.write(content) yield self.assertFailure(finished_d, InvalidDataError)
def _get_blob(self, blob_hash: str, length: typing.Optional[int] = None): if self.config.save_blobs: return BlobFile(self.loop, blob_hash, length, self.blob_completed, self.blob_dir) else: if is_valid_blobhash(blob_hash) and os.path.isfile( os.path.join(self.blob_dir, blob_hash)): return BlobFile(self.loop, blob_hash, length, self.blob_completed, self.blob_dir) return BlobBuffer(self.loop, blob_hash, length, self.blob_completed, self.blob_dir)
def test_delete_fail(self): # deletes should fail if being written to blob_file = BlobFile(self.blob_dir, self.fake_content_hash, self.fake_content_len) writer, finished_d = blob_file.open_for_writing(peer=1) with self.assertRaises(ValueError): blob_file.delete() writer.write(self.fake_content) writer.close() # deletes should fail if being read and not closed blob_file = BlobFile(self.blob_dir, self.fake_content_hash, self.fake_content_len) self.assertTrue(blob_file.verified) r = blob_file.open_for_reading() # must be set to variable otherwise it gets garbage collected with self.assertRaises(ValueError): blob_file.delete()
def setUp(self): self.db_dir, self.blob_dir = mk_db_and_blob_dir() self.blob_manager = MagicMock() self.client = MagicMock() self.blob_hash = ('d17272b17a1ad61c4316ac13a651c2b0952063214a81333e' '838364b01b2f07edbd165bb7ec60d2fb2f337a2c02923852') self.blob = BlobFile(self.blob_dir, self.blob_hash) self.blob_manager.get_blob.side_effect = lambda _: self.blob self.response = MagicMock(code=200, length=400) self.client.get.side_effect = lambda uri: defer.succeed(self.response) self.downloader = HTTPBlobDownloader(self.blob_manager, [self.blob_hash], ['server1'], self.client, retry=False) self.downloader.interval = 0
def get_blob(self, blob_hash, length: typing.Optional[int] = None): if blob_hash in self.blobs: if length and self.blobs[blob_hash].length is None: self.blobs[blob_hash].set_length(length) else: self.blobs[blob_hash] = BlobFile(self.loop, self.blob_dir, blob_hash, length, self.blob_completed) return self.blobs[blob_hash]
def test_good_write_and_read(self): # test a write that should succeed blob_file = BlobFile(self.blob_dir, self.fake_content_hash, self.fake_content_len) self.assertFalse(blob_file.verified) writer, finished_d = blob_file.open_for_writing(peer=1) writer.write(self.fake_content) writer.close() out = yield finished_d self.assertIsInstance(out, BlobFile) self.assertTrue(out.verified) self.assertEqual(self.fake_content_len, out.get_length()) # read from the instance used to write to, and verify content f = blob_file.open_for_reading() c = f.read() self.assertEqual(c, self.fake_content) self.assertFalse(out.is_downloading()) # read from newly declared instance, and verify content del blob_file blob_file = BlobFile(self.blob_dir, self.fake_content_hash, self.fake_content_len) self.assertTrue(blob_file.verified) f = blob_file.open_for_reading() self.assertEqual(1, blob_file.readers) c = f.read() self.assertEqual(c, self.fake_content) # close reader f.close() self.assertEqual(0, blob_file.readers)
def creator_finished(self, blob_creator, should_announce): log.debug("blob_creator.blob_hash: %s", blob_creator.blob_hash) if blob_creator.blob_hash is None: raise Exception("Blob hash is None") if blob_creator.blob_hash in self.blobs: raise Exception("Creator finished for blob that is already marked as completed") if blob_creator.length is None: raise Exception("Blob has a length of 0") new_blob = BlobFile(self.blob_dir, blob_creator.blob_hash, blob_creator.length) self.blobs[blob_creator.blob_hash] = new_blob return self.blob_completed(new_blob, should_announce)
def test_multiple_writers(self): # start first writer and write half way, and then start second writer and write everything blob_hash = self.fake_content_hash blob_file = BlobFile(self.blob_dir, blob_hash, self.fake_content_len) writer_1, finished_d_1 = blob_file.open_for_writing(peer=1) writer_1.write(self.fake_content[:self.fake_content_len // 2]) writer_2, finished_d_2 = blob_file.open_for_writing(peer=2) writer_2.write(self.fake_content) out_2 = yield finished_d_2 out_1 = yield self.assertFailure(finished_d_1, DownloadCanceledError) self.assertIsInstance(out_2, BlobFile) self.assertTrue(out_2.verified) self.assertEqual(self.fake_content_len, out_2.get_length()) f = blob_file.open_for_reading() c = f.read() self.assertEqual(self.fake_content_len, len(c)) self.assertEqual(bytearray(c), self.fake_content)
def _from_stream_descriptor_blob(cls, loop: asyncio.BaseEventLoop, blob_dir: str, blob: BlobFile) -> 'StreamDescriptor': assert os.path.isfile(blob.file_path) with open(blob.file_path, 'rb') as f: json_bytes = f.read() try: decoded = json.loads(json_bytes.decode()) except json.JSONDecodeError: blob.delete() raise InvalidStreamDescriptorError("Does not decode as valid JSON") if decoded['blobs'][-1]['length'] != 0: raise InvalidStreamDescriptorError( "Does not end with a zero-length blob.") if any( [blob_info['length'] == 0 for blob_info in decoded['blobs'][:-1]]): raise InvalidStreamDescriptorError( "Contains zero-length data blob") if 'blob_hash' in decoded['blobs'][-1]: raise InvalidStreamDescriptorError( "Stream terminator blob should not have a hash") if any([ i != blob_info['blob_num'] for i, blob_info in enumerate(decoded['blobs']) ]): raise InvalidStreamDescriptorError( "Stream contains out of order or skipped blobs") descriptor = cls( loop, blob_dir, binascii.unhexlify(decoded['stream_name']).decode(), decoded['key'], binascii.unhexlify(decoded['suggested_file_name']).decode(), [ BlobInfo(info['blob_num'], info['length'], info['iv'], info.get('blob_hash')) for info in decoded['blobs'] ], decoded['stream_hash'], blob.blob_hash) if descriptor.get_stream_hash() != decoded['stream_hash']: raise InvalidStreamDescriptorError( "Stream hash does not match stream metadata") return descriptor
def test_delete_fail(self): # deletes should fail if being written to blob_file = BlobFile(self.blob_dir, self.fake_content_hash, self.fake_content_len) writer, finished_d = blob_file.open_for_writing(peer=1) yield self.assertFailure(blob_file.delete(), ValueError) writer.write(self.fake_content) writer.close() # deletes should fail if being read and not closed blob_file = BlobFile(self.blob_dir, self.fake_content_hash, self.fake_content_len) self.assertTrue(blob_file.verified) f = blob_file.open_for_reading() yield self.assertFailure(blob_file.delete(), ValueError)
def test_delete(self): blob_file = BlobFile(self.blob_dir, self.fake_content_hash, self.fake_content_len) writer, finished_d = blob_file.open_for_writing(peer=1) writer.write(self.fake_content) out = yield finished_d out = yield blob_file.delete() blob_file = BlobFile(self.blob_dir, self.fake_content_hash) self.assertFalse(blob_file.verified)
async def test_decode_corrupt_blob_raises_proper_exception_and_deletes_corrupt_file( self): loop = asyncio.get_event_loop() tmp_dir = tempfile.mkdtemp() self.addCleanup(lambda: shutil.rmtree(tmp_dir)) sd_hash = '9313d1807551186126acc3662e74d9de29cede78d4f133349ace846273ef116b9bb86be86c54509eb84840e4b032f6b2' with open(os.path.join(tmp_dir, sd_hash), 'wb') as handle: handle.write(b'doesnt work') blob = BlobFile(loop, tmp_dir, sd_hash) self.assertTrue(blob.file_exists) self.assertIsNotNone(blob.length) with self.assertRaises(InvalidStreamDescriptorError): await StreamDescriptor.from_stream_descriptor_blob( loop, tmp_dir, blob) self.assertFalse(blob.file_exists) # fixme: this is an emergency PR, plase move this to blob_file tests later self.assertIsNone(blob.length)
async def make_sd_blob(self, blob_file_obj: typing.Optional[AbstractBlob] = None, old_sort: typing.Optional[bool] = False, blob_completed_callback: typing.Optional[typing.Callable[['AbstractBlob'], None]] = None): sd_hash = self.calculate_sd_hash() if not old_sort else self.calculate_old_sort_sd_hash() if not old_sort: sd_data = self.as_json() else: sd_data = self.old_sort_json() sd_blob = blob_file_obj or BlobFile(self.loop, sd_hash, len(sd_data), blob_completed_callback, self.blob_dir) if blob_file_obj: blob_file_obj.set_length(len(sd_data)) if not sd_blob.get_is_verified(): writer = sd_blob.get_blob_writer() writer.write(sd_data) await sd_blob.verified.wait() sd_blob.close() return sd_blob
async def make_sd_blob(self, blob_file_obj: typing.Optional[BlobFile] = None, old_sort: typing.Optional[bool] = False): sd_hash = self.calculate_sd_hash( ) if not old_sort else self.calculate_old_sort_sd_hash() if not old_sort: sd_data = self.as_json() else: sd_data = self.old_sort_json() sd_blob = blob_file_obj or BlobFile(self.loop, self.blob_dir, sd_hash, len(sd_data)) if blob_file_obj: blob_file_obj.set_length(len(sd_data)) if not sd_blob.get_is_verified(): writer = sd_blob.open_for_writing() writer.write(sd_data) await sd_blob.verified.wait() sd_blob.close() return sd_blob
def test_multiple_writers_save_at_same_time(self): blob_hash = self.fake_content_hash blob_file = BlobFile(self.blob_dir, blob_hash, self.fake_content_len) writer_1, finished_d_1 = blob_file.open_for_writing(peer=1) writer_2, finished_d_2 = blob_file.open_for_writing(peer=2) blob_file.save_verified_blob(writer_1) # second write should fail to save yield self.assertFailure(blob_file.save_verified_blob(writer_2), DownloadCanceledError) # schedule a close, just to leave the reactor clean finished_d_1.addBoth(lambda x:None) finished_d_2.addBoth(lambda x:None) self.addCleanup(writer_1.close) self.addCleanup(writer_2.close)
def test_close_on_incomplete_write(self): # write all but 1 byte of data, blob_file = BlobFile(self.blob_dir, self.fake_content_hash, self.fake_content_len) writer, finished_d = blob_file.open_for_writing(peer=1) writer.write(self.fake_content[:self.fake_content_len-1]) writer.close() yield self.assertFailure(finished_d, DownloadCanceledError) # writes after close will throw a IOError exception with self.assertRaises(IOError): writer.write(self.fake_content) # another call to close will do nothing writer.close() # file should not exist, since we did not finish write blob_file_2 = BlobFile(self.blob_dir, self.fake_content_hash, self.fake_content_len) out = blob_file_2.open_for_reading() self.assertIsNone(out)
def _make_new_blob(self, blob_hash, length=None): log.debug('Making a new blob for %s', blob_hash) blob = BlobFile(self.blob_dir, blob_hash, length) self.blobs[blob_hash] = blob return blob
class HTTPBlobDownloaderTest(unittest.TestCase): def setUp(self): self.db_dir, self.blob_dir = mk_db_and_blob_dir() self.blob_manager = MagicMock() self.client = MagicMock() self.blob_hash = ('d17272b17a1ad61c4316ac13a651c2b0952063214a81333e' '838364b01b2f07edbd165bb7ec60d2fb2f337a2c02923852') self.blob = BlobFile(self.blob_dir, self.blob_hash) self.blob_manager.get_blob.side_effect = lambda _: defer.succeed(self. blob) self.response = MagicMock(code=200, length=400) self.client.get.side_effect = lambda uri: defer.succeed(self.response) self.downloader = HTTPBlobDownloader(self.blob_manager, [self.blob_hash], ['server1'], self.client, retry=False) self.downloader.interval = 0 def tearDown(self): self.downloader.stop() rm_db_and_blob_dir(self.db_dir, self.blob_dir) @defer.inlineCallbacks def test_download_successful(self): self.client.collect.side_effect = collect yield self.downloader.start() self.blob_manager.get_blob.assert_called_with(self.blob_hash) self.client.get.assert_called_with('http://{}/{}'.format( 'server1', self.blob_hash)) self.client.collect.assert_called() self.assertEqual(self.blob.get_length(), self.response.length) self.assertTrue(self.blob.get_is_verified()) self.assertEqual(self.blob.writers, {}) @defer.inlineCallbacks def test_download_invalid_content(self): self.client.collect.side_effect = bad_collect yield self.downloader.start() self.assertEqual(self.blob.get_length(), self.response.length) self.assertFalse(self.blob.get_is_verified()) self.assertEqual(self.blob.writers, {}) @defer.inlineCallbacks def test_peer_finished_first_causing_a_write_on_closed_handle(self): self.client.collect.side_effect = lambda response, write: defer.fail( IOError('I/O operation on closed file')) yield self.downloader.start() self.blob_manager.get_blob.assert_called_with(self.blob_hash) self.client.get.assert_called_with('http://{}/{}'.format( 'server1', self.blob_hash)) self.client.collect.assert_called() self.assertEqual(self.blob.get_length(), self.response.length) self.assertEqual(self.blob.writers, {}) @defer.inlineCallbacks def test_download_transfer_failed(self): self.client.collect.side_effect = lambda response, write: defer.fail( Exception()) yield self.downloader.start() self.assertEqual(len(self.client.collect.mock_calls), self.downloader.max_failures) self.blob_manager.get_blob.assert_called_with(self.blob_hash) self.assertEqual(self.blob.get_length(), self.response.length) self.assertFalse(self.blob.get_is_verified()) self.assertEqual(self.blob.writers, {}) @defer.inlineCallbacks def test_blob_not_found(self): self.response.code = 404 yield self.downloader.start() self.blob_manager.get_blob.assert_called_with(self.blob_hash) self.client.get.assert_called_with('http://{}/{}'.format( 'server1', self.blob_hash)) self.client.collect.assert_not_called() self.assertFalse(self.blob.get_is_verified()) self.assertEqual(self.blob.writers, {}) def test_stop(self): self.client.collect.side_effect = lambda response, write: defer.Deferred( ) self.downloader.start( ) # hangs if yielded, as intended, to simulate a long ongoing write while we call stop self.downloader.stop() self.blob_manager.get_blob.assert_called_with(self.blob_hash) self.client.get.assert_called_with('http://{}/{}'.format( 'server1', self.blob_hash)) self.client.collect.assert_called() self.assertEqual(self.blob.get_length(), self.response.length) self.assertFalse(self.blob.get_is_verified()) self.assertEqual(self.blob.writers, {})
async def test_delete_corrupt(self): tmp_dir = tempfile.mkdtemp() self.addCleanup(lambda: shutil.rmtree(tmp_dir)) blob = BlobFile( self.loop, self.blob_hash, len(self.blob_bytes), blob_completed_callback=self.blob_manager.blob_completed, blob_directory=tmp_dir) writer = blob.get_blob_writer() writer.write(self.blob_bytes) await blob.verified.wait() blob.close() blob = BlobFile( self.loop, self.blob_hash, len(self.blob_bytes), blob_completed_callback=self.blob_manager.blob_completed, blob_directory=tmp_dir) self.assertTrue(blob.get_is_verified()) with open(blob.file_path, 'wb+') as f: f.write(b'\x00') blob = BlobFile( self.loop, self.blob_hash, len(self.blob_bytes), blob_completed_callback=self.blob_manager.blob_completed, blob_directory=tmp_dir) self.assertFalse(blob.get_is_verified()) self.assertFalse(os.path.isfile(blob.file_path))
def save_verified_blob(self, writer): result = BlobFile.save_verified_blob(self, writer) if not self.callback.called: self.callback.callback(True) return result