def test_dropWroteFromOutside(self): """ A different scenario to those tested above, in which the data represented by the DROP isn't actually written *through* the DROP. Still, the DROP needs to be moved to COMPLETED once the data is written, and reading from it should still yield a correct result """ # Write, but not through the DROP a = FileDROP('A', 'A') filename = a.path msg = 'a message' with open(filename, 'w') as f: f.write(msg) a.setCompleted() # Read from the DROP self.assertEqual(six.b(msg), droputils.allDropContents(a)) self.assertIsNone(a.checksum) self.assertIsNone(a.size) # We can manually set the size because the DROP wasn't able to calculate # it itself; if we couldn't an exception would be thrown a.size = len(msg)
def assertMsgIsCorrect(msg, command): a = DockerApp('a', 'a', image='ubuntu:14.04', command=command) b = FileDROP('b', 'b') a.addOutput(b) with DROPWaiterCtx(self, b, 100): a.execute() self.assertEqual(six.b(msg), droputils.allDropContents(b))
def test_to_first_split(self): s3_drop = MockS3DROP(self.get_oid('s3'), uuid.uuid4(), bucket='mock', key='key123', profile_name='aws-profile') copy_from_s3 = DockerApp(self.get_oid('app'), uuid.uuid4(), image='mock:latest', command='copy_from_s3.sh %iDataURL0 /dfms_root/%o0', user='******') measurement_set = DirectoryContainer(self.get_oid('dir'), uuid.uuid4(), dirname=TestChiles02._temp) copy_from_s3.addInput(s3_drop) copy_from_s3.addOutput(measurement_set) outputs = [] frequencies = make_groups_of_frequencies(FREQUENCY_GROUPS, 5) frequencies = frequencies[0] for group in frequencies: casa_py_drop = DockerApp(self.get_oid('app'), uuid.uuid4(), image='mock:latest', command='casa_py.sh /dfms_root/%i0 /dfms_root/%o0 {0} {1}'.format(group[0], group[1]), user='******') result = FileDROP(self.get_oid('file'), uuid.uuid4(), dirname=TestChiles02._temp) copy_to_s3 = DockerApp(self.get_oid('app'), uuid.uuid4(), image='mock:latest', command='copy_to_s3.sh /dfms_root/%i0 %oDataURL0', user='******') s3_drop_out = MockS3DROP(self.get_oid('s3'), uuid.uuid4(), bucket='mock', key='{0}_{1}/key123'.format(group[0], group[1]), profile_name='aws-profile') casa_py_drop.addInput(measurement_set) casa_py_drop.addOutput(result) copy_to_s3.addInput(result) copy_to_s3.addOutput(s3_drop_out) outputs.append(s3_drop_out) barrier_drop = BarrierAppDROP(self.get_oid('barrier'), uuid.uuid4()) barrier_drop.addInput(measurement_set) for output in outputs: barrier_drop.addInput(output) with droputils.DROPWaiterCtx(self, barrier_drop, 50000): s3_drop.setCompleted()
def assert_message_is_correct(message, command): a = BashShellApp('a', 'a', command=command) b = FileDROP('b', 'b') a.addOutput(b) with DROPWaiterCtx(self, b, 100): a.async_execute() self.assertEqual(six.b(message), droputils.allDropContents(b))
def test_clientServer(self): """ A client-server duo. The server outputs the data it receives to its output DROP, which in turn is the data held in its input DROP. The graph looks like this: A --|--> B(client) --|--> D |--> C(server) --| C is a server application which B connects to. Therefore C must be started before B, so B knows C's IP address and connects successfully. Although the real writing is done by C, B in this example is also treated as a publisher of D. This way D waits for both applications to finish before proceeding. """ try: AutoVersionClient().close() except DockerException: warnings.warn( "Cannot contact the Docker daemon, skipping docker tests") return a = FileDROP('a', 'a') b = DockerApp('b', 'b', image='ubuntu:14.04', command='cat %i0 > /dev/tcp/%containerIp[c]%/8000') c = DockerApp('c', 'c', image='ubuntu:14.04', command='nc -l 8000 > %o0') d = FileDROP('d', 'd') b.addInput(a) b.addOutput(d) c.addInput(a) c.addOutput(d) # Let 'b' handle its interest in c b.handleInterest(c) data = os.urandom(10) with DROPWaiterCtx(self, d, 100): a.write(data) a.setCompleted() self.assertEqual(data, droputils.allDropContents(d))
def test_dropCompleteTriggersReplication(self): with dlm.DataLifecycleManager() as manager: drop = FileDROP('oid:A', 'uid:A1', expectedSize=1) manager.addDrop(drop) self._writeAndClose(drop) # The call to close() should have turned it into a SOLID object # because the DLM replicated it self.assertEqual(DROPPhases.SOLID, drop.phase) self.assertEqual(2, len(manager.getDropUids(drop))) # Try the same with a non-precious data object, it shouldn't be replicated drop = FileDROP('oid:B', 'uid:B1', expectedSize=1, precious=False) manager.addDrop(drop) self._writeAndClose(drop) self.assertEqual(DROPPhases.GAS, drop.phase) self.assertEqual(1, len(manager.getDropUids(drop)))
def test_directoryContainer(self): """ A small, simple test for the DirectoryContainer DROP that checks it allows only valid children to be added """ # Prepare our playground cwd = os.getcwd() os.chdir('/tmp') dirname = ".hidden" dirname2 = ".hidden/inside" if not os.path.exists(dirname2): os.makedirs(dirname2) # DROPs involved a = FileDROP('a', 'a', dirname=dirname) b = FileDROP('b', 'b', dirname=dirname) c = FileDROP('c', 'c', dirname=dirname2) d = FileDROP('d', 'd', dirname=dirname2) cont1 = DirectoryContainer('e', 'e', dirname=dirname) cont2 = DirectoryContainer('f', 'f', dirname=dirname2) # Paths are absolutely reported self.assertEqual(os.path.realpath('/tmp/.hidden'), os.path.realpath(cont1.path)) self.assertEqual(os.path.realpath('/tmp/.hidden/inside'), os.path.realpath(cont2.path)) # Certain children-to-be are rejected self.assertRaises(TypeError, cont1.addChild, NullDROP('g', 'g')) self.assertRaises(TypeError, cont1.addChild, InMemoryDROP('h', 'h')) self.assertRaises(TypeError, cont1.addChild, ContainerDROP('i', 'i')) self.assertRaises(Exception, cont1.addChild, c) self.assertRaises(Exception, cont1.addChild, d) self.assertRaises(Exception, cont2.addChild, a) self.assertRaises(Exception, cont2.addChild, b) # These children are correct cont1.addChild(a) cont1.addChild(b) cont2.addChild(c) cont2.addChild(d) # Revert to previous state shutil.rmtree(dirname, True) os.chdir(cwd)
def test_echo(self): a = FileDROP('a', 'a') b = BashShellApp('b', 'b', command='cp %i0 %o0') c = FileDROP('c', 'c') b.addInput(a) b.addOutput(c) # Random data so we always check different contents data = os.urandom(10) with DROPWaiterCtx(self, c, 100): a.write(data) a.setCompleted() self.assertEqual(data, droputils.allDropContents(c)) # We own the file, not root uid = os.getuid() self.assertEqual(uid, os.stat(c.path).st_uid)
def _ngas_and_fs_io(self, command): a = NgasDROP( 'a', 'a' ) # not a filesystem-related DROP, we can reference its URL in the command-line b = DockerApp('b', 'b', image="ubuntu:14.04", command=command) c = FileDROP('c', 'c') b.addInput(a) b.addOutput(c) with DROPWaiterCtx(self, b, 100): a.setCompleted() self.assertEqual(six.b(a.dataURL), droputils.allDropContents(c))
def assertFiles(delete_parent_directory, parentDirExists, tempDir=None): tempDir = tempDir or tempfile.mkdtemp() a = FileDROP('a', 'a', dirname=tempDir, delete_parent_directory=delete_parent_directory) a.write(' ') a.setCompleted() self.assertTrue(a.exists()) self.assertTrue(os.path.isdir(tempDir)) a.delete() self.assertFalse(a.exists()) self.assertEqual(parentDirExists, os.path.isdir(tempDir)) if parentDirExists: shutil.rmtree(tempDir)
def _scan_and_import_files(self): for root, dirs, files in os.walk(self._dirname): for f in files: _, ext = os.path.splitext(f) if ext.lower() in self._ext: path = '{0}/{1}'.format(root, f) fd = FileDROP(str(uuid.uuid1()), str(uuid.uuid1()), filepath=path, check_filepath_exists=True) self._children.append(fd) fd._parent = self
def test_expiringNormalDrop(self): with dlm.DataLifecycleManager(checkPeriod=0.5) as manager: drop = FileDROP('oid:A', 'uid:A1', expectedSize=1, lifespan=0.5) manager.addDrop(drop) # Writing moves the DROP to COMPLETE self._writeAndClose(drop) # Wait now, the DROP should be moved by the DLM to EXPIRED time.sleep(1) self.assertEqual(DROPStates.EXPIRED, drop.status)
def test_DROPFile(self): """ This test exercises the DROPFile mechanism to read the data represented by a given DROP. The DROPFile class will decide whether the data should be read directly or through the DROP """ drop = FileDROP('a', 'a', expectedSize=5) drop.write('abcde') with DROPFile(drop) as f: self.assertEqual(six.b('abcde'), f.read()) self.assertTrue(drop.isBeingRead()) self.assertIsNotNone(f._io) self.assertFalse(drop.isBeingRead())
def test_two_simultaneous_pipes(self): """ A more complicated test where three bash applications run at the same time. The first streams its output to the second one, while the second one streams *its* output to the third one. ------------- -------------- ------------- -------------- ------------- ---------- | BashApp A | --> | InMemory B | --> | BashApp C | --> | InMemory D | --> | BashApp E | --> | File F | | echo | | "/pipe1" | | dc | | "/pipe2" | | sort | | | -----*------- -------------- ----*--*----- -------------- -----*------- ---------- | | | | \-------------|named-pipe|----------\ \-----------|named-pipe|-----------/ BashApp A writes "5 4 3 2 1" (each on a new line), which is read by "cat" (BashApp C). The printed results (a copy of the original) are streamed through D and read by "sort" (BashApp E), which writes the output to F. """ output_fname = tempfile.mktemp() a = StreamingOutputBashApp('a', 'a', command=r"echo -en '5\n4\n3\n2\n1'") b = InMemoryDROP('b', 'b') c = StreamingInputOutputBashApp('c', 'c', command="cat") d = InMemoryDROP('d', 'd') e = StreamingInputBashApp('e', 'e', command="sort -n > %o0") f = FileDROP('f', 'f', filepath=output_fname) a.addOutput(b) b.addStreamingConsumer(c) c.addOutput(d) d.addStreamingConsumer(e) e.addOutput(f) # Let's fire the app with DROPWaiterCtx(self, f, 2): a.async_execute() # The application executed, finished, and its output was recorded for drop in (a, b, c, d, e, f): self.assertEqual(DROPStates.COMPLETED, drop.status) self.assertEqual([1, 2, 3, 4, 5], [ int(x) for x in droputils.allDropContents(f).strip().split(six.b('\n')) ]) # Clean up and go os.remove(output_fname)
def test_lostDrop(self): with dlm.DataLifecycleManager(checkPeriod=0.5) as manager: drop = FileDROP('oid:A', 'uid:A1', expectedSize=1, lifespan=10, precious=False) manager.addDrop(drop) self._writeAndClose(drop) # "externally" remove the file, its contents now don't exist os.unlink(drop._fnm) # Let the DLM do its work time.sleep(1) # Check that the DROP is marked as LOST self.assertEqual(DROPPhases.LOST, drop.phase)
def test_simpleCopy(self): """ Simple test for a dockerized application. It copies the contents of one file into another via the command-line cp utility. It then checks that the contents of the target DROP are correct, and that the target file is actually owned by our process. The test will not run if a docker daemon cannot be contacted though; this is to avoid failures in machines that don't have a docker service running. """ try: AutoVersionClient().close() except DockerException: warnings.warn( "Cannot contact the Docker daemon, skipping docker tests") return a = FileDROP('a', 'a') b = DockerApp('b', 'b', image='ubuntu:14.04', command='cp %i0 %o0') c = FileDROP('c', 'c') b.addInput(a) b.addOutput(c) # Random data so we always check different contents data = os.urandom(10) with DROPWaiterCtx(self, c, 100): a.write(data) a.setCompleted() self.assertEqual(data, droputils.allDropContents(c)) # We own the file, not root uid = os.getuid() self.assertEqual(uid, os.stat(c.path).st_uid)
def test_single_pipe(self): """ A simple test where two bash apps are connected to each other in a streaming fashion. The data flows through a pipe which is created by the framework. The data drop in between acts only as a intermediator to establish the underlying communication channel. ------------- -------------- ------------- ---------- | BashApp A | --> | InMemory B | --> | BashApp C | --> | File D | | echo | | "/a/pipe" | | dc | | | -----*------- -------------- ------*------ ---------- | | \-------------|named-pipe|------------/ BashApp A writes "5 4 3 2 1" (each on a new line), which is read by cat and redirected to D. """ output_fname = tempfile.mktemp() a = StreamingOutputBashApp('a', 'a', command=r"echo -en '5\n4\n3\n2\n1'") b = InMemoryDROP('b', 'b') c = StreamingInputBashApp('c', 'c', command="cat > %o0") d = FileDROP('d', 'd', filepath=output_fname) a.addOutput(b) c.addStreamingInput(b) c.addOutput(d) # Let's fire the app with DROPWaiterCtx(self, d, 2): a.async_execute() # The application executed, finished, and its output was recorded for drop in (a, b, c, d): self.assertEqual(DROPStates.COMPLETED, drop.status, "Drop %r not COMPLETED: %d" % (drop, drop.status)) self.assertEqual( [5, 4, 3, 2, 1], [int(x) for x in droputils.allDropContents(d).split(six.b('\n'))]) # Clean up and go os.remove(output_fname)
def test_cleanupExpiredDrops(self): with dlm.DataLifecycleManager(checkPeriod=0.5, cleanupPeriod=2) as manager: drop = FileDROP('oid:A', 'uid:A1', expectedSize=1, lifespan=1, precious=False) manager.addDrop(drop) self._writeAndClose(drop) # Wait 2 seconds, the DROP is still COMPLETED time.sleep(0.5) self.assertEqual(DROPStates.COMPLETED, drop.status) self.assertTrue(drop.exists()) # Wait 5 more second, now it should be expired but still there time.sleep(1) self.assertEqual(DROPStates.EXPIRED, drop.status) self.assertTrue(drop.exists()) # Wait 2 more seconds, now it should have been deleted time.sleep(1) self.assertEqual(DROPStates.DELETED, drop.status) self.assertFalse(drop.exists())
def test_sql_create(self): sqlite01 = get_oid('sqlite') sqlite_drop = FileDROP( sqlite01, get_uuid(), precious=False, dirname=os.path.join(TestSqlite._temp, sqlite01), check_exists=False, ) initialize_sqlite = InitializeSqliteApp( get_oid('app'), get_uuid(), user='******', ) sqlite_in_memory = InMemoryDROP( get_oid('memory'), get_uuid(), precious=False, ) initialize_sqlite.addInput(sqlite_drop) initialize_sqlite.addOutput(sqlite_in_memory) with droputils.DROPWaiterCtx(self, sqlite_in_memory, 50000): sqlite_drop.setCompleted()
def test_join(self): """ Using the container data object to implement a join/barrier dataflow. A1, A2 and A3 are FileDROPs B1, B2 and B3 are SumupContainerChecksum C1, C2 and C3 are InMemoryDROPs D is a SumupContainerChecksum E is a InMemoryDROP --> A1 --> B1 --> C1 --| --> A2 --> B2 --> C2 --|--> D --> E --> A3 --> B3 --> C3 --| Upon writing all A* DROPs, the execution of B* DROPs should be triggered, after which "C" will transition to COMPLETE. Once all "C"s have moved to COMPLETED "D"'s execution will also be triggered, and finally E will hold the sum of B1, B2 and B3's checksums """ filelen = self._test_drop_sz * ONE_MB #create file data objects a1 = FileDROP('oid:A1', 'uid:A1', expectedSize=filelen) a2 = FileDROP('oid:A2', 'uid:A2', expectedSize=filelen) a3 = FileDROP('oid:A3', 'uid:A3', expectedSize=filelen) # CRC Result DROPs, storing the result in memory b1 = SumupContainerChecksum('oid:B1', 'uid:B1') b2 = SumupContainerChecksum('oid:B2', 'uid:B2') b3 = SumupContainerChecksum('oid:B3', 'uid:B3') c1 = InMemoryDROP('oid:C1', 'uid:C1') c2 = InMemoryDROP('oid:C2', 'uid:C2') c3 = InMemoryDROP('oid:C3', 'uid:C3') # The final DROP that sums up the CRCs from the container DROP d = SumupContainerChecksum('oid:D', 'uid:D') e = InMemoryDROP('oid:E', 'uid:E') # Wire together dropAList = [a1,a2,a3] dropBList = [b1,b2,b3] dropCList = [c1,c2,c3] for dropA,dropB in map(lambda a,b: (a,b), dropAList, dropBList): dropA.addConsumer(dropB) for dropB,dropC in map(lambda b,c: (b,c), dropBList, dropCList): dropB.addOutput(dropC) for dropC in dropCList: dropC.addConsumer(d) d.addOutput(e) # Write data into the initial "A" DROPs, which should trigger # the whole chain explained above with DROPWaiterCtx(self, e): for dropA in dropAList: # this should be parallel for for _ in range(self._test_num_blocks): dropA.write(self._test_block) # All DROPs are completed now that the chain executed correctly for drop in dropAList + dropBList + dropCList: self.assertEqual(drop.status, DROPStates.COMPLETED) # The results we want to compare sum_crc = c1.checksum + c2.checksum + c3.checksum dropEData = int(droputils.allDropContents(e)) self.assertNotEqual(sum_crc, 0) self.assertEqual(sum_crc, dropEData)
def createDrop(self, oid, uid, **kwargs): kwargs['dirname'] = self._dirName return FileDROP(oid, uid, **kwargs)
def createDrop(self, oid, uid, **kwargs): kwargs['dirname'] = self._savingDir return FileDROP(oid, uid, **kwargs)
def test_dropAddition(self): with dlm.DataLifecycleManager() as manager: drop = FileDROP('oid:A', 'uid:A1', expectedSize=10) manager.addDrop(drop)