def _test_dependency_failure(self, dset_size, pcount, key_count, kill_after): class WorkerKiller(object): def __init__(self, worker, count): self.count = count + 1 self.worker = worker self.lock = threading.Lock() def countdown(self, i): with self.lock: self.count -= i if self.count == 0: logger.info('Killing %r', self.worker.name) pid = self.worker.service('tasks').execute( lambda: os.getpid()).result() os.kill(pid, signal.SIGKILL) def __str__(self): return 'kill %s after %s elements' % (self.worker.name, self.count) def identity_mapper(killers, key_count, i): for killer in killers: killer.update('countdown', 1) return i def keyby_mapper(killers, key_count, i): identity_mapper(killers, key_count, i) return (i % key_count, i) try: self.ctx.conf['bndl.execute.attempts'] = 2 killers = [ self.ctx.accumulator(WorkerKiller(worker, count)) for worker, count in zip(self.ctx.workers, kill_after) ] dset = self.ctx \ .range(dset_size, pcount=pcount) \ .map(identity_mapper, killers, key_count) \ .shuffle() \ .map(keyby_mapper, killers, key_count) \ .aggregate_by_key(sum) result = dset.collect() self.assertEqual(len(result), key_count) self.assertEqual(sorted(pluck(0, result)), list(range(key_count))) self.assertEqual(sum(pluck(1, result)), sum(range(dset_size))) time.sleep(1) finally: self.ctx.conf['bndl.execute.attempts'] = 1
def test_pluck(): assert list(pluck(0, [[0, 1], [2, 3], [4, 5]])) == [0, 2, 4] assert list(pluck([0, 1], [[0, 1, 2], [3, 4, 5]])) == [(0, 1), (3, 4)] assert list(pluck(1, [[0], [0, 1]], None)) == [None, 1] data = [{"id": 1, "name": "cheese"}, {"id": 2, "name": "pies", "price": 1}] assert list(pluck("id", data)) == [1, 2] assert list(pluck("price", data, None)) == [None, 1] assert list(pluck(["id", "name"], data)) == [(1, "cheese"), (2, "pies")] assert list(pluck(["name"], data)) == [("cheese",), ("pies",)] assert list(pluck(["price", "other"], data, None)) == [(None, None), (1, None)] assert raises(IndexError, lambda: list(pluck(1, [[0]]))) assert raises(KeyError, lambda: list(pluck("name", [{"id": 1}])))
def test_pluck(): assert list(pluck(0, [[0, 1], [2, 3], [4, 5]])) == [0, 2, 4] assert list(pluck([0, 1], [[0, 1, 2], [3, 4, 5]])) == [(0, 1), (3, 4)] assert list(pluck(1, [[0], [0, 1]], None)) == [None, 1] data = [{'id': 1, 'name': 'cheese'}, {'id': 2, 'name': 'pies', 'price': 1}] assert list(pluck('id', data)) == [1, 2] assert list(pluck('price', data, None)) == [None, 1] assert list(pluck(['id', 'name'], data)) == [(1, 'cheese'), (2, 'pies')] assert list(pluck(['name'], data)) == [('cheese',), ('pies',)] assert list(pluck(['price', 'other'], data, None)) == [(None, None), (1, None)] assert raises(IndexError, lambda: list(pluck(1, [[0]]))) assert raises(KeyError, lambda: list(pluck('name', [{'id': 1}])))
def test_average(self): values = range(100) keys = list(map(lambda i: i // 20, values)) expected = {} for key, group in groupby(zip(keys, values), itemgetter(0)): vals = list(pluck(1, group)) expected[key] = sum(vals) / len(vals) pairs = self.ctx.collection(zip(keys, values)) sum_count = pairs.combine_by_key( lambda value: (value, 1), lambda x, value: (x[0] + value, x[1] + 1), lambda x, y: (x[0] + y[0], x[1] + y[1])) avg_by_key = sum_count.starmap(lambda key, value: (key, value[0] / value[1])) self.assertDictEqual(avg_by_key.collect_as_map(), expected)
def test_pluck(): assert list(pluck(0, [[0, 1], [2, 3], [4, 5]])) == [0, 2, 4] assert list(pluck([0, 1], [[0, 1, 2], [3, 4, 5]])) == [(0, 1), (3, 4)] assert list(pluck(1, [[0], [0, 1]], None)) == [None, 1] data = [{"id": 1, "name": "cheese"}, {"id": 2, "name": "pies", "price": 1}] assert list(pluck("id", data)) == [1, 2] assert list(pluck("price", data, 0)) == [0, 1] assert list(pluck(["id", "name"], data)) == [(1, "cheese"), (2, "pies")] assert list(pluck(["name"], data)) == [("cheese", ), ("pies", )] assert list(pluck(["price", "other"], data, 0)) == [(0, 0), (1, 0)] assert raises(IndexError, lambda: list(pluck(1, [[0]]))) assert raises(KeyError, lambda: list(pluck("name", [{"id": 1}]))) assert list(pluck(0, [[0, 1], [2, 3], [4, 5]], no_default2)) == [0, 2, 4] assert raises(IndexError, lambda: list(pluck(1, [[0]], no_default2)))
root = tree.getroot() reviews = root.findall("Review") sentences = root.findall("**/sentence") # print("# Reviews : ", len(reviews)) print("# Sentences : ", len(sentences)) opinions = root.findall("**/**/Opinion") categories = [opinion.attrib["category"] for opinion in opinions] targets = [opinion.attrib["target"] for opinion in opinions] entities_and_aspects = [cat.split('#') for cat in categories] polarities = [opinion.attrib["polarity"] for opinion in opinions] print("# Opinions : ", len(opinions)) df = pd.DataFrame({ "category": categories, "entity": list(pluck(0, entities_and_aspects)), 'aspect': list(pluck(1, entities_and_aspects)), 'target': targets, "polarity": polarities }).sort_values("entity") dfs.append(df) df_all = pd.concat(dfs) df_all.polarity = df_all.polarity.apply(lambda t: t if t else 'unknown') df_all.to_csv(SEMEVAL_DATASETS_2016 / 'all-entities-and-aspects.csv') g = sns.countplot(y="category", hue="polarity", data=df_all, palette={ "neutral": "yellow",