def gen_csv_webhdfs(col_defs, num_rows, host, user, hdfs_path, has_id_col=False, use_kerberos=False): col_list = parse_cols(col_defs) with tempdir() as tmpdir: filename = os.path.basename(hdfs_path) tmp_path = os.path.join(tmpdir, filename) with open(tmp_path, 'w') as file: for i in range(int(num_rows)): for c in range(len(col_list)): if c > 0: file.write(',') if c == 0 and has_id_col: file.write(str(i)) else: write_col(file, col_list[c], 1) file.write('\n') webhdfs.create_file(host, user, hdfs_path, tmp_path, use_kerberos=use_kerberos)
def test_xml_processing(self): self.query(udf.fixindent(''' CREATE python SCALAR SCRIPT process_users(url VARCHAR(200)) EMITS (firstname VARCHAR(100), lastname VARCHAR(100)) AS import urllib import lxml.etree as etree # import xml.etree.cElementTree as etree def run(ctx): data = ''.join(urllib.urlopen(ctx.url).readlines()) tree = etree.XML(data) for user in tree.findall('user/[@active="1"]'): fn = user.findtext('first_name') ln = user.findtext('family_name') ctx.emit(fn, ln) ''')) with tempdir() as tmp: with open(os.path.join(tmp, 'keepers.xml'), 'w') as f: f.write(self.xml()) with HTTPServer(tmp) as hs: url = 'http://%s:%d/keepers.xml' % hs.address rows = self.query(''' SELECT process_users('%s') FROM DUAL ORDER BY lastname ''' % url) expected = [('Joe', 'Hart'), ('Manuel', 'Neuer')] self.assertRowsEqual(expected, rows)
def test_selftest(self): with tempdir() as tmp: with open(os.path.join(tmp, 'foo.xml'), 'w') as f: f.write('''<foo/>\n''') with HTTPServer(tmp) as hs: self.assertIn('<foo/>', urllib.urlopen('http://%s:%d/foo.xml' % hs.address).read())
def test_server_is_chdir_safe(self): cwd = os.getcwd() with tempdir() as tmp: self.assertEqual(cwd, os.getcwd()) with FTPServer(tmp) as ftpd: self.assertEqual(cwd, os.getcwd()) self.assertEqual(cwd, os.getcwd()) self.assertEqual(cwd, os.getcwd())
def test_1(self): with tempdir() as tmp: with open(os.path.join(tmp, 'dummy'), 'w') as f: f.write('babelfish') with HTTPServer(tmp) as httpd: url = urllib.urlopen('http://%s:%d/dummy' % httpd.address) data = url.readlines() self.assertIn('babelfish', '\n'.join(data))
def test_server_is_chdir_safe(self): cwd = os.getcwd() with tempdir() as tmp: self.assertEqual(cwd, os.getcwd()) with HTTPServer(tmp) as httpd: # Current implementation chdir to documentroot; # needs subprocesses to avoid this. pass #self.assertEqual(cwd, os.getcwd()) self.assertEqual(cwd, os.getcwd()) self.assertEqual(cwd, os.getcwd())
def test_1(self): with tempdir() as tmp: with open(os.path.join(tmp, 'dummy'), 'w'): pass with FTPServer(tmp) as ftpd: ftp = ftplib.FTP() ftp.connect(*ftpd.address) ftp.login() data = [] ls = ftp.retrlines('LIST', data.append) ftp.quit() self.assertIn('dummy', '\n'.join(data))
def test_1(self): with tempdir() as tmp: with open(os.path.join(tmp, 'dummy'), 'w'): pass auth = DummyAuthorizer() auth.add_user('user', 'passwd', tmp, perm='elradfmw') with FTPServer(tmp, authorizer=auth) as ftpd: ftp = ftplib.FTP() ftp.connect(*ftpd.address) ftp.login('user', 'passwd') ftp.mkd('some_dir') data = [] ls = ftp.retrlines('LIST', data.append) ftp.quit() self.assertIn('dummy', '\n'.join(data)) self.assertIn('some_dir', '\n'.join(data))
def importHelper(self, csv, tableDefinition, exception): with tempdir() as tmp: with open(os.path.join(tmp, 'data.csv'), 'w') as f: f.write(csv) with FTPServer(tmp) as ftpd: url = 'ftp://*****:*****@%s:%d' % ftpd.address self.query(''' create connection ftpconnection to '%s' ''' % url) self.query(tableDefinition) with self.assertRaisesRegexp(Exception, exception): self.query(''' import into t from csv at ftpConnection file 'data.csv'; ''') rows = self.query('''SELECT * FROM T''') self.assertEqual(0, self.rowcount())
def test_xml_processing(self): '''DWA-13842''' self.query( udf.fixindent(''' CREATE OR REPLACE R SCALAR SCRIPT process_users(url VARCHAR(200)) EMITS (firstname VARCHAR(100), lastname VARCHAR(100)) AS require('RCurl') require('XML') run <- function(ctx) { cont <- getURL(ctx$url) tree <- xmlTreeParse(cont) for (i in 1:length(tree$doc$children$users)) { if (tree$doc$children$users[i]$user$attributes['active']==1) { firstname <- tree$doc$children$users[i]$user$children$first_name$children$text$value; familyname <- tree$doc$children$users[i]$user$children$family_name$children$text$value; ctx$emit(firstname, familyname) } } } ''')) with tempdir() as tmp: with open(os.path.join(tmp, 'keepers.xml'), 'w') as f: f.write(self.xml()) with FTPServer(tmp) as ftpd: url = 'ftp://*****:*****@%s:%d/keepers.xml' % ftpd.address rows = self.query(''' SELECT process_users('%s') FROM DUAL ORDER BY lastname ''' % url) expected = [('Joe', 'Hart'), ('Manuel', 'Neuer')] self.assertRowsEqual(expected, rows)