def test_filename_ok(self): topo = Topology() fn = streamsx.spl.op.Expression.expression('getApplicationDir()+"' + '/a/b"') topo.source( files.CSVReader(schema='tuple<rstring a, int32 b>', file=fn)) topo.source(files.CSVReader(schema=CommonSchema.String, file="/tmp/a"))
def test_composite_kwargs(self): topo = Topology() s = topo.source(range(13)) sch = 'tuple<rstring a, int32 b>' s = s.map(lambda v: ('A' + str(v), v + 7), schema=sch) fn = os.path.join(self.dir, 'data.csv') config = { 'append': True, 'flush': 1, 'close_mode': CloseMode.punct.name, 'flush_on_punctuation': True, 'format': Format.csv.name, 'has_delay_field': False, 'quote_strings': False, 'write_failure_action': WriteFailureAction.log.name, 'write_punctuations': False, } fsink = files.FileSink(fn, **config) s.for_each(fsink) tester = Tester(topo) tester.tuple_count(s, 13) tester.test(self.test_ctxtype, self.test_config) self.assertTrue(os.path.isfile(fn)) topo = Topology() r = topo.source(files.CSVReader(schema=sch, file=fn)) expected = [{'a': 'A' + str(v), 'b': v + 7} for v in range(13)] tester = Tester(topo) tester.contents(r, expected) tester.test(self.test_ctxtype, self.test_config)
def test_composite(self): topo = Topology() s = topo.source(range(13)) sch = 'tuple<rstring a, int32 b>' s = s.map(lambda v: ('A' + str(v), v + 7), schema=sch) fn = os.path.join(self.dir, 'data.csv') fsink = files.FileSink(fn) fsink.append = True fsink.flush = 1 fsink.close_mode = CloseMode.punct.name fsink.flush_on_punctuation = True fsink.format = Format.csv.name fsink.has_delay_field = False fsink.quote_strings = False fsink.write_failure_action = WriteFailureAction.log.name fsink.write_punctuations = False s.for_each(fsink) tester = Tester(topo) tester.tuple_count(s, 13) tester.test(self.test_ctxtype, self.test_config) self.assertTrue(os.path.isfile(fn)) topo = Topology() r = topo.source(files.CSVReader(schema=sch, file=fn)) expected = [{'a': 'A' + str(v), 'b': v + 7} for v in range(13)] tester = Tester(topo) tester.contents(r, expected) tester.test(self.test_ctxtype, self.test_config)
def data_source(topo, schema): input_file = 'TradesAndQuotes.csv.gz' sample_file = os.path.join(script_dir, input_file) topo.add_file_dependency(sample_file, 'etc') # add sample file to etc dir in bundle fn = os.path.join('etc', input_file) # file name relative to application dir s = topo.source(files.CSVReader(schema=schema, file=fn, compression=Compression.gzip.name)) # add event-time TQRecTWithEvTime = StreamSchema(schema).extend(StreamSchema('tuple<timestamp evTime>')) fo = R.Functor.map(s, TQRecTWithEvTime) fo.evTime = fo.output(fo.outputs[0], op.Expression.expression('timeStringToTimestamp(date, time, false)')) ev_stream = fo.outputs[0] ev_stream = ev_stream.set_event_time('evTime') return ev_stream
def test_read_file_from_application_dir(self): topo = Topology() script_dir = os.path.dirname(os.path.realpath(__file__)) sample_file = os.path.join(script_dir, 'data.csv') topo.add_file_dependency(sample_file, 'etc') # add sample file to etc dir in bundle fn = os.path.join('etc', 'data.csv') # file name relative to application dir sch = 'tuple<rstring a, int32 b>' #fn = streamsx.spl.op.Expression.expression('getApplicationDir()+"'+'/'+fn+'"') r = topo.source(files.CSVReader(schema=sch, file=fn)) r.print() tester = Tester(topo) tester.tuple_count(r, 3) tester.test(self.test_ctxtype, self.test_config)
def test_read_write(self): topo = Topology() s = topo.source(range(13)) sch = 'tuple<rstring a, int32 b>' s = s.map(lambda v: ('A' + str(v), v + 7), schema=sch) fn = os.path.join(self.dir, 'data.csv') s.for_each(files.CSVWriter(fn)) tester = Tester(topo) tester.tuple_count(s, 13) tester.test(self.test_ctxtype, self.test_config) self.assertTrue(os.path.isfile(fn)) topo = Topology() r = topo.source(files.CSVReader(schema=sch, file=fn)) expected = [{'a': 'A' + str(v), 'b': v + 7} for v in range(13)] tester = Tester(topo) tester.contents(r, expected) tester.test(self.test_ctxtype, self.test_config)