def test_graphml(self): output = convert( 'graph', self.test_input['distances'], {'format': 'graphml'}) expected_edges = set(self.test_input['distances']['data'].edges( data='distance')) actual_edges = set() self.assertIsInstance(output['data'], (str, unicode)) tree = etree.fromstring(output['data']) self.assertEqual(len(tree), 2) self.assertEqual(tree[0].tag, self.GRAPHML_NS + 'key') self.assertEqual(tree[1].tag, self.GRAPHML_NS + 'graph') for edge in tree[1].findall(self.GRAPHML_NS + 'edge'): edge = (edge.attrib['source'], edge.attrib['target'], int(edge.find(self.GRAPHML_NS + 'data').text)) self.assertNotIn(edge, actual_edges) actual_edges.add(edge) self.assertEqual(expected_edges, actual_edges) output = convert( 'graph', output, {'format': 'networkx'}) self.assertTrue( is_isomorphic(output['data'], self.test_input['distances']['data'], edge_match=numerical_edge_match('distance', 1)))
def test_treestore(self): output = convert('tree', { 'format': 'newick', 'data': self.newick }, {'format': 'r.apetree'}) output = convert('tree', output, {'format': 'treestore'}) self.assertEqual(output['format'], 'treestore') rows = bson.decode_all(output['data']) for d in rows: if 'rooted' in d: root = d self.assertNotEqual(root, None) self.assertEqual(len(root['clades']), 1) def findId(id): for d in rows: if d['_id'] == id: return d top = findId(root['clades'][0]) self.assertEqual(len(top['clades']), 2) internal = findId(top['clades'][0]) rubribarbus = findId(top['clades'][1]) ahli = findId(internal['clades'][0]) allogus = findId(internal['clades'][1]) self.assertEqual(internal['branch_length'], 2) self.assertEqual(ahli['name'], 'ahli') self.assertEqual(ahli['branch_length'], 0) self.assertEqual(allogus['name'], 'allogus') self.assertEqual(allogus['branch_length'], 1) self.assertEqual(rubribarbus['name'], 'rubribarbus') self.assertEqual(rubribarbus['branch_length'], 3)
def test_non_binary_tree(self): convert( 'tree', { 'format': 'newick', 'url': 'file://' + os.path.join('data', 'geospiza_from_otl.phy') }, {'format': 'nested'})
def test_adjacencylist(self): output = convert( 'graph', self.test_input['distances'], {'format': 'adjacencylist'}) expected_edges = set(self.test_input['distances']['data'].edges()) actual_edges = set() for line in output['data'].splitlines(): parts = line.split(' ', 1) if len(parts) > 1: source, targets = parts for target in targets.split(' '): edge = (source, target) self.assertNotIn(edge, actual_edges) actual_edges.add(edge) self.assertEqual(expected_edges, actual_edges) output = convert( 'graph', output, {'format': 'networkx'}) # Don't take edges into consideration, because they were lost in the # original conversion self.assertTrue( is_isomorphic(output['data'], self.test_input['distances']['data'], edge_match=None))
def round_trip(self, obj): """Convert an object to base64 and back returning the new object.""" b64 = convert( 'python', {'format': 'object', 'data': obj}, {'format': 'pickle.base64'} )['data'] newobj = convert( 'python', {'format': 'pickle.base64', 'data': b64}, {'format': 'object'} ) return newobj['data']
def test_header_detection(self): output = convert('table', { 'format': 'csv', 'data': 'a,b,c\n7,1,c\n8,2,f\n9,3,i' }, {'format': 'rows'}) self.assertEqual(output['data']['fields'], ['a', 'b', 'c']) self.assertEqual(len(output['data']['rows']), 3) output = convert('table', { 'format': 'csv', 'data': '1,2,3\n7,10,\n,11,\n,12,' }, {'format': 'rows'}) self.assertEqual(output['data']['fields'], ['1', '2', '3']) self.assertEqual(len(output['data']['rows']), 3)
def test_jsonlines(self): output = convert('table', { 'format': 'jsonlines', 'data': '{"a": 1, "b": 2}\n{"a": 3, "b": 4}' }, {'format': 'objectlist'}) self.assertEqual(output['format'], 'objectlist') self.assertEqual(output['data'], [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}])
def test_column_names_csv(self): output = convert( 'table', { 'format': 'csv', 'data': ',a,b,longer name\n1,1,1,1\n2,2,2,2\n3,3,3,3\n' }, {'format': 'column.names'}) self.assertEqual(output['format'], 'column.names') self.assertEqual(output['data'], ['', 'a', 'b', 'longer name'])
def test_flu(self): output = convert('table', { 'format': 'csv', 'url': 'file://' + os.path.join('data', 'flu.csv') }, {'format': 'column.names'}) self.assertEqual(output['format'], 'column.names') self.assertEqual(len(output['data']), 162) self.assertEqual(output['data'][:3], ['Date', 'United States', 'Alabama'])
def test_big_header(self): output = convert( 'table', { 'format': 'csv', 'url': 'file://' + os.path.join('data', 'RadiomicsData.csv') }, {'format': 'rows'}) self.assertEqual(len(output['data']['fields']), 454) self.assertEqual(output['data']['fields'][:3], ['GLCM_autocorr', 'GLCM_clusProm', 'GLCM_clusShade']) self.assertEqual(len(output['data']['rows']), 99)
def test_objectlist_to_rows(self): objlist = [{'a': {'b': 5}}, {'a': {'b': {'c': 3}}}] output = convert('table', { 'format': 'objectlist', 'data': objlist }, {'format': 'rows'}) self.assertEqual(output['format'], 'rows') self.assertEqual(output['data'], { 'fields': ['a.b', 'a.b.c'], 'rows': [{ 'a.b': 5 }, { 'a.b.c': 3 }] }) output = convert('table', { 'format': 'rows', 'data': output['data'] }, {'format': 'objectlist'}) self.assertEqual(output['data'], objlist)
def test_convert(self): tmp = tempfile.mktemp() output = convert('image', { 'format': 'png.base64', 'data': self.image }, { 'format': 'png', 'url': 'file://' + tmp, 'mode': 'auto' }) value = open(tmp).read() os.remove(tmp) self.assertEqual(output['format'], 'png') self.assertEqual(base64.b64encode(value), self.image) output = convert('image', { 'format': 'png.base64', 'data': self.image }, {'format': 'pil'}) tmp = tempfile.mktemp() output = convert('image', output, {'format': 'png'}) io1 = StringIO(base64.b64decode(self.image)) im1 = Image.open(io1) io2 = StringIO(output['data']) im2 = Image.open(io2) self.assertEqual(compareImages(im1, im2), 0) output = convert('image', { 'format': 'png.base64', 'data': self.image }, {'format': 'jpeg'}) data = StringIO(output['data']) jpeg = Image.open(data) self.assertTrue(isinstance(jpeg, JpegImageFile))
def test_clique(self): # clique.json -> NetworkX output = convert( 'graph', self.test_input['alphabetGraph'], {'format': 'networkx'}) self.assertEqual( set([n[1]['name'] for n in output['data'].nodes(data=True)]), set(['a', 'b', 'c', 'd'])) self.assertEqual(len(output['data'].edges()), 3) self.assertEqual(output['data'].degree('55ba5019f8883b5bf35f3e30'), 0) # NetworkX -> clique.json output = convert( 'graph', output, {'format': 'clique.json'}) # Since the id of the nodes are lost, only test the structure # Check nodes with names a, b, c, and d # Check the following edges # a -> b # a -> c # b -> c output['data'] = json.loads(output['data']) nodes = [item for item in output['data'] if item['type'] == 'node'] edges = [(item['source']['$oid'], item['target']['$oid']) for item in output['data'] if item['type'] == 'link'] oid_by_name = {} for node in nodes: oid_by_name[node['data']['name']] = node['_id']['$oid'] # Check nodes self.assertEqual(sorted(oid_by_name.keys()), ['a', 'b', 'c', 'd']) # Check edges self.assertEqual(len(edges), 3) self.assertIn((oid_by_name['a'], oid_by_name['b']), edges) self.assertIn((oid_by_name['a'], oid_by_name['c']), edges) self.assertIn((oid_by_name['b'], oid_by_name['c']), edges)
def test_column_names(self): output = convert( 'table', { 'format': 'rows', 'data': { 'fields': ['a', 'b'], 'rows': [{ 'a': 6, 'b': 5 }] } }, {'format': 'column.names'}) self.assertEqual(output['format'], 'column.names') self.assertEqual(output['data'], ['a', 'b'])
def test_objectlist(self): rows = { 'fields': ['a', 'b'], 'rows': [{ 'a': 1, 'b': 'x' }, { 'a': 4, 'b': 'y' }] } objectlist = convert('table', { 'format': 'rows', 'data': rows }, {'format': 'objectlist'})['data'] # Should have same row data self.assertEqual(objectlist, rows['rows']) rows2 = convert('table', { 'format': 'objectlist', 'data': objectlist }, {'format': 'rows'})['data'] # Should have same fields but could be in different order self.assertEqual(set(rows['fields']), set(rows2['fields'])) # Should have same row data self.assertEqual(rows['rows'], rows2['rows']) # Make sure we can go back and forth to JSON objectlist = convert( 'table', convert('table', { 'format': 'objectlist', 'data': rows['rows'] }, {'format': 'objectlist.json'}), {'format': 'objectlist'})['data'] self.assertEqual(rows['rows'], objectlist)
def test_inputs_from_file(self): """Run a task with base64 inputs in a file.""" a = tempfile.NamedTemporaryFile() b = tempfile.NamedTemporaryFile() convert( 'python', {'format': 'object', 'data': (0, 1)}, {'format': 'pickle.base64', 'mode': 'local', 'path': a.name} ) convert( 'python', {'format': 'object', 'data': 2}, {'format': 'pickle.base64', 'mode': 'local', 'path': b.name} ) outputs = self.run_basic_task({ 'a': {'format': 'pickle.base64', 'mode': 'local', 'path': a.name}, 'b': {'format': 'pickle.base64', 'mode': 'local', 'path': b.name} }) self.assertEqual(outputs.get('c'), (0, 1, 0, 1)) self.assertEqual(outputs.get('d'), 4)
def test_nan(self): output = convert( 'table', { 'format': 'csv', 'url': 'file://' + os.path.join('data', 'RadiomicsData.csv') }, {'format': 'rows.json'}) data = json.loads(output['data']) self.assertEqual(len(data['fields']), 454) self.assertEqual(data['fields'][:3], ['GLCM_autocorr', 'GLCM_clusProm', 'GLCM_clusShade']) self.assertEqual(len(data['rows']), 99) for row in data['rows']: for field in row: if isinstance(row[field], float): self.assertFalse(math.isnan(row[field])) self.assertFalse(math.isinf(row[field]))
def convert(self, data_spec, format): """Convert to a compatible data format. :param dict data_spec: Data specification :param str format: The target data format :returns: dict >>> spec = {'name': 'a', 'type': 'number', 'format': 'number'} >>> port = Port(spec) >>> new_spec = port.convert({'format': 'number', 'data': 1}, 'json') >>> new_spec['format'] 'json' >>> port.fetch(new_spec) 1 """ return convert(self.type, data_spec, {'format': format})
def test_sniffer(self): output = convert('table', { 'format': 'csv', 'url': 'file://' + os.path.join('data', 'test.csv') }, {'format': 'rows'}) self.assertEqual(len(output['data']['fields']), 32) self.assertEqual(output['data']['fields'][:3], ['FACILITY', 'ADDRESS', 'DATE OF INSPECTION']) self.assertEqual(len(output['data']['rows']), 14) flu = load(os.path.join(self.analysis_path, 'xdata', 'flu.json')) output = run(flu, inputs={}, outputs={'data': { 'type': 'table', 'format': 'rows' }}) self.assertEqual(output['data']['data']['fields'][:3], ['Date', 'United States', 'Alabama'])