def test_skip_errors_basic(self): ''' test the basic usage of skip_errors ''' self.assertRaises(ValueError, lambda: G(range(5)).map(_double_only_evens).to(list)) self.assertEqual( G(range(5)).map(_double_only_evens).skip_errors().to(list), [0, 4, 8])
def test_switch_with_default_1(self): self.assertEqual( G( range(10) ).switch( lambda i: i%5, { 0: lambda i: i+1, 4: lambda i: float(i) }, lambda i: None ).to(list), [ 1, None, None, None, 4.0, 6, None, None, None, 9.0 ] )
def test_switch_with_default_2(self): self.assertEqual( G( range(10) ).switch( lambda i: i%5, { 0: lambda i: i+1, 4: lambda i: float(i) }, range ).to(list), [ 1, range(0, 1), range(0, 2), range(0, 3), 4.0, 6, range(0, 6), range(0, 7), range(0, 8), 9.0 ] )
def tail(file_path: str) -> Iterable[str]: assert os.path.isfile(file_path) notifier = inotify.adapters.Inotify() notifier.add_watch(file_path) with open(file_path, 'r') as f: notifier.add_watch(file_path) yield from G( # create a Generator fed by the notifier notifier.event_gen(yield_nones=False) ).filter( # filter for IN_MODIFY events (mask equals 2) lambda i: i[0].mask == 2 ).map( # when the file is modified, get the new size lambda i: os.path.getsize(i[2]) ).uniq( # filter duplicates, just incase nothing was added to the file ).window( # window the (previous_size, current_size) 2 ).side_task( # seek the file descriptor and pass the input since f.seek returns None lambda i: f.seek(i[0]) ).map( # read in the newly added data lambda i: f.read(i[1] - i[0]) ).chain( # chain the incoming chunks since they might not be single lines ).groupby( # seperate groups by lines lambda i: i == '\n' ).filter( # exclude groups that are just '\n', since they are the delimiters lambda i: i[0] == False ).map( # join the characters to construct each line as a string lambda i: ''.join(i[1]) #).print('-', use_repr=True # uncomment this line to see the constructed lines )
def test_insert_relation(self): db = self.db #for i in range(20000): # db.store_item(i) report( 'relation insertion', rps( G(count()).map( lambda i: db.store_relation(i, 'less_than', i + 1))))
def show_generator_output(sess, n_img, inp_z, out_channel, img_mode='RGB'): # Show the output from the generator cmap = None if img_mode == 'RGB' else 'gray' z_dim = inp_z.get_shape().as_list()[-1] example_z = np.random.uniform(-1, 1, size=[n_img, z_dim]) samples = sess.run(G(inp_z, out_channel, False), feed_dict={inp_z: example_z}) images_grid = helper.images_square_grid(samples, image_mode) return images_grid
def test_switch_basic_usage_3(self): self.assertEqual( G( 'hello world or something'.split(' ') ).switch( lambda i: i[:2], { 'wo': lambda i: i+i, 'or': lambda i: i[0], 'he': lambda i: i.upper() } ).to(list), ['HELLO', 'worldworld', 'o', 'something'] )
def test_switch_benchmark(self): '''ensure switch can process 1,000,000 items a second minimum''' from itertools import count speed = self.assertGreater( G( count() ).switch( lambda i: i%3, { 0: lambda i: [i], 2: lambda i: float(i) } ).benchmark(), 100000 )
def scrape_urls(target): return G( # create a generator that iterates over the target url's content get(target).iter_content() ).filter( # clean the incoming data to just printables lambda char: char in printable_bytes ).groupby( # create groups by seperating the content by its quotes lambda char: char in {b'"', b"'"} ).map( # join the bytes between quotes and encode them to strings lambda flag_group: b''.join(flag_group[1]).decode() ).filter( # filter for relative and non-relative urls lambda s: (s.startswith('/') or '://' in s) and ' ' not in s ).map( # add target to the beginning of relative urls lambda url: '/'.join(target.split('/')[:3]) + url if url.startswith('/') else url # ).print('found:' # uncomment this line to see all urls the script is finding before deduplication ).to(set)
def model_loss(inp_real, inp_z, out_channel, alpha=0.2, smooth_factor=0.1): # Loss from the real image for G and D d_model_real, d_logits_real = D(inp_real, alpha=alpha) d_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_logits_real, labels=tf.ones_like(d_model_real) * (1 - smooth_factor))) # Loss from the fake image for G and D inp_fake = G(inp_z, out_channel, alpha=alpha) d_model_fake, d_logits_fake = D(inp_fake, reuse=True, alpha=alpha) d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_logits_fake, labels=tf.zeros_like(d_model_fake))) g_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_logits_fake, labels=tf.ones_like(d_model_fake))) return d_loss_real + d_loss_fake, g_loss
def test_switch_basic_usage_2(self): self.assertEqual( G( range(10) ).switch( lambda i: i%5, { 0: lambda i: i+1, 4: lambda i: float(i) } ).to(list), [ 1, 1, 2, 3, 4.0, 6, 6, 7, 8, 9.0 ] )
def test_switch_basic_usage_1(self): self.assertEqual( G( range(10) ).switch( lambda i: i%3, { 0: lambda i: [i], 2: lambda i: float(i) } ).to(list), [ [0], 1, 2.0, [3], 4, 5.0, [6], 7, 8.0, [9] ] )
def test_map_parallel_basic(self): ''' test the basic usage of map_parallel ''' self.assertEqual( G(range(5)).map_parallel(float, 2).to(list), [0.0, 1.0, 2.0, 3.0, 4.0]) self.assertEqual( G(range(5)).map_parallel(float, 3).to(list), [0.0, 1.0, 2.0, 3.0, 4.0]) self.assertEqual( G(range(5)).map_parallel(float, 4).to(list), [0.0, 1.0, 2.0, 3.0, 4.0]) self.assertEqual( G(range(5)).map_parallel(float, 5).to(list), [0.0, 1.0, 2.0, 3.0, 4.0]) self.assertEqual( G(range(5)).map_parallel(float, 6).to(list), [0.0, 1.0, 2.0, 3.0, 4.0]) self.assertEqual( G(range(5)).map_parallel(float, 7).to(list), [0.0, 1.0, 2.0, 3.0, 4.0]) self.assertEqual( G(range(5)).map_parallel(float, 8).to(list), [0.0, 1.0, 2.0, 3.0, 4.0])
def test_chunks_3_dimensional_g_usage(self): self.assertEqual( G(range(16)).chunk(2, 2, 2).to(list), [ (((0, 1), (2, 3)), ((4, 5), (6, 7))), (((8, 9), (10, 11)), ((12, 13), (14, 15))), ])
def test_chunks_2_dimensional_g_usage(self): self.assertEqual( G(range(18)).chunk(3, 3).to(list), [((0, 1, 2), (3, 4, 5), (6, 7, 8)), ((9, 10, 11), (12, 13, 14), (15, 16, 17))])
def test_chunk_on_speed(self): self.assertGreater( G(count()).chunk_on(lambda i: i % 10 == 0).benchmark(), 100_000)
def test_chunk_on_g_usage(self): self.assertEqual( G(range(32)).chunk_on(lambda i: str(i).endswith('0')).to(list), [(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), (10, 11, 12, 13, 14, 15, 16, 17, 18, 19), (20, 21, 22, 23, 24, 25, 26, 27, 28, 29), (30, 31)])
def test_ifmap_basic_g_usage_1(self): ''' test the basic usage of ifmap with G invocation ''' self.assertEqual( G(range(5)).ifmap(bool, str).to(list), [0, "1", "2", "3", "4"])
def test_ifmap_basic_g_usage_2(self): ''' test the basic usage of ifmap with G invocation ''' self.assertEqual( G(range(5)).ifmap(lambda i: i % 2, double).to(list), [0, "1", "2", "3", "4"])
def test_ifmap_benchmark_1(self): ''' test the basic usage of ifmap with G invocation ''' self.assertGreater(G(count).ifmap(lambda i: i % 2, double), 1_000_000)
def test_serialization_deserialization(self): db = self.db report('serialization/deserialization', rps(G(count()).map(lambda i: db.deserialize(db.serialize(i)))))
def test_skip_errors_with_log_false(self): ''' test skip_errors with log specifically enabled ''' self.assertEqual( G(range(5)).map(_double_only_evens).skip_errors( log=False).to(list), [0, 4, 8])
def valid_ip(s: str) -> bool: try: ip_address(s) return True except: return False host_dns = G( # iterate over each line on /etc/hosts open('/etc/hosts', 'r') ).map( # strip trailing whitespace str.strip ).filter( # remove empty lines bool ).map( # replace tabs with spaces, split remaining lines by spaces and filter empty strings lambda line: [field for field in line.replace('\t', ' ').split(' ') if field] ).filter( # only lines that start with a valid ip address and have at least one following hostname lambda fields: valid_ip(fields[0]) and len(fields) > 1 ).map( # map every hostname on the line to the ip address lambda fields: [[hostname, fields[0]] for hostname in fields[1:]] ).chain( # chain the lists so you have one long k,v stream #).print( # uncomment this line to see the k,v pairs be materialized ).to(dict) # feed the k,v stream to dict to materialize the dictionary if __name__ == '__main__': print(host_dns)
def test_ifmap_benchmark_0(self): ''' test the basic usage of ifmap with G invocation ''' self.assertGreater(G(count).ifmap(bool, str), 1_000_000)
def test_insert_item(self): db = self.db report('object insertion', rps(G(count()).map(lambda i: db.store_item(i))))