def setup_parallel(): pm = ProgressMonitor(Name='MainWorkflow', DbConnection=MockProgressManager()) a = ProgressTracker(Name='CopyFiles', FriendlyId='a', HasParallelChildren=True) b = ProgressTracker(Name='CreateFolder', FriendlyId='b') c = ProgressTracker(Name='CopyFiles', FriendlyId='c', EstimatedSeconds=10) b1 = ProgressTracker(Name='CreateFolder', FriendlyId='br1') c1 = ProgressTracker(Name='CopyFiles', FriendlyId='c1', EstimatedSeconds=11) b2 = ProgressTracker(Name='CreateFolder', FriendlyId='b2') c2 = ProgressTracker(Name='CopyFiles', FriendlyId='c2', EstimatedSeconds=12) d2 = ProgressTracker(Name='CopyFiles', FriendlyId='c2', EstimatedSeconds=12) assert a.friendly_id == 'a' b.with_tracker(c) b1.with_tracker(c1) b2.with_tracker(c2) c2.with_tracker(d2) a.with_tracker(b) a.with_tracker(b1) a.with_tracker(b2) pm.with_tracker(a) return pm
def test_can_total_progress_with_child_events(): pm = ProgressMonitor(DbConnection=MockProgressManager()) a = ProgressTracker(Name='CopyFiles', FriendlyId='abc') b = ProgressTracker(Name='CreateFolder') c = ProgressTracker(Name='CopyFiles') d = ProgressTracker(Name='SendEmail') a.with_tracker(b) b.with_tracker(c).with_tracker(d) pm.with_tracker(a) assert len(pm.all_children) == 4
def test_can_start_all_parents(c_mock, g_mock): g_mock.side_effect = get_by_id_side_effect c_mock.side_effect = children_side_effect pm = ProgressMonitor(DbConnection=RedisProgressManager()) pm = pm.load('94a52a41-bf9e-43e3-9650-859f7c263dc8') t = pm.find_id('039fe353-2c01-49f4-a743-b09c02c9f683') assert t t.start(Parents=True) assert t.parent.status == 'In Progress' print t.parent.id assert t.parent.parent.status == 'In Progress'
def setup_basic(): pm = ProgressMonitor(Name='MainWorkflow', DbConnection=MockProgressManager()) a = ProgressTracker(Name='CopyFiles', FriendlyId='a') b = ProgressTracker(Name='CreateFolder', FriendlyId='b') c = ProgressTracker(Name='CopyFiles', FriendlyId='c', EstimatedSeconds=10) assert a.friendly_id == 'a' pm.with_tracker(a) a.with_tracker(b) b.with_tracker(c) return pm
def search(self): indices = [0] queries = self.queries if self.dictionary_on and type(self.dictionary_text) is list: if type( queries ) is list: ## queries starts as str, but becomes list if queries are given (don't ask) queries = queries + self.dictionary_text else: queries = self.dictionary_text with ProgressMonitor().task(100, 'Starting search..') as monitor: monitor.add_listener(self.callback) index = get_index(self.corpus, monitor=monitor.submonitor(50)) if QUERY_MODES[self.query_mode] == 'filter': # simple search query = " OR ".join('({})'.format(parse_query(q)[1]) for q in queries) if not self.context_window: selected = list(index.search(query)) sample = self.corpus[selected] else: sample = self.corpus.copy() sample._tokens = sample._tokens.copy() selected = [] for i, context in index.get_context( query, int(self.context_window)): sample._tokens[i] = context selected.append(i) sample = sample[selected] o = np.ones(len(self.corpus)) o[selected] = 0 remaining = np.nonzero(o)[0] remaining = self.corpus[remaining] else: sample = self.corpus.copy() remaining = None seen = set() for q in queries: label, q = parse_query(q) # todo: implement as sparse matrix! scores = np.zeros(len(sample), dtype=np.float) for i, j in index.search(q, frequencies=True): seen.add(i) scores[i] = j scores = scores.reshape((len(sample), 1)) sample.extend_attributes(scores, [label]) if self.include_unmatched: remaining = None else: selected = list(seen) o = np.ones(len(self.corpus)) o[selected] = 0 remaining = np.nonzero(o)[0] remaining = self.corpus[remaining] sample = sample[selected] return sample, remaining
def get_index(corpus: Corpus, monitor: ProgressMonitor, multiple_processors=False, **kargs) -> Index: """ Get the index for the provided corpus, reindexing (and tokenizing) if needed """ with _GLOBAL_LOCK: if not hasattr(corpus, "_orange3sma_index_lock"): corpus._orange3sma_index_lock = Lock() with corpus._orange3sma_index_lock: ix = getattr(corpus, "_orange3sma_index", None) if not (ix and ix.tokens is corpus._tokens): monitor.update(0, "Getting tokens") corpus.tokens # force tokens procs = max(1, multiprocessing.cpu_count()-1) if multiple_processors else 1 monitor.update(50, "Creating index") ix = Index(corpus, procs=procs, **kargs) corpus._orange3sma_index = ix return ix
def get_counts( corpus: Corpus, monitor: ProgressMonitor ) -> Tuple[Mapping[str, int], Mapping[str, int]]: monitor.update(0, "Getting tokens") tokens = corpus.tokens # forces tokens to be created n = len(tokens) tf, df = {}, {} # tf: {word : freq}, df: {word: {doc_i, ...}} monitor.update(50, "Counting words") with monitor.subtask(50) as sm: sm.begin(n) for i, doc_tokens in enumerate(tokens): sm.update(message="Counting words {i}/{n}".format(**locals())) for t in doc_tokens: df.setdefault(t, set()).add(i) tf[t] = tf.get(t, 0) + 1 df = {w: len(df[w]) for w in df} return tf, df
def calculate(self): with ProgressMonitor().task(100, 'Calculating statistics..') as monitor: monitor.add_listener(self.callback) if self.reference_corpus: t = compare(self.corpus, self.reference_corpus, monitor=monitor) else: t = frequencies(self.corpus, monitor=monitor) return t
def compare(corpus: Corpus, reference_corpus: Corpus, monitor: ProgressMonitor): tf1, df1 = get_counts(corpus, monitor.submonitor(40)) tf2, df2 = get_counts(reference_corpus, monitor.submonitor(40)) words = list(set(df1.keys()) | set(df2.keys())) counts = np.fromiter((tf1.get(t, 0) for t in words), int) docfreqs = np.fromiter((df1.get(t, 0) for t in words), int) refcounts = np.fromiter((tf2.get(t, 0) for t in words), int) refdocfreqs = np.fromiter((df2.get(t, 0) for t in words), int) relc, relcr = _relfreq(counts), _relfreq(refcounts) over = relc / relcr return _create_table( words, OrderedDict([ ("percent", relc), ("frequency", counts), ("docfreq", docfreqs), ("overrepresentation", over), ("reference_percent", relcr), ("reference_frequency", refcounts), ("reference_docfreq", refdocfreqs), ]))
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at # http://aws.amazon.com/asl/ # or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License. import redis from progressmonitor import RedisProgressManager, ProgressMonitor, \ ProgressTracker pool = redis.ConnectionPool(host='localhost', port=6379, db=0) r = redis.Redis(connection_pool=pool) rroot = RedisProgressManager(RedisConnection=r) root = ProgressMonitor(DbConnection=rroot, Name="MasterWorkflow") wf_a = ProgressTracker(Name='Workflow A', FriendlyId='WorkflowA') wf_b = ProgressTracker(Name='Workflow B', FriendlyId='WorkflowB') wf_b_1 = ProgressTracker(Name='SubWorkflow B1', FriendlyId='WorkflowB1') wf_b_2 = ProgressTracker(Name='SubWorkflow B2', FriendlyId='WorkflowB2') task_a1 = ProgressTracker(Name='Task A-1', FriendlyId='TaskA1') task_a2 = ProgressTracker(Name='Task A-2', FriendlyId='TaskA2') task_b2_1 = ProgressTracker(Name='Task B2-1', FriendlyId='TaskB21') root.with_tracker(wf_a).with_tracker(wf_b) wf_b.with_tracker(wf_b_1).with_tracker(wf_b_2) wf_a.with_tracker(task_a1).with_tracker(task_a2) wf_b_2.with_tracker(task_b2_1) print "Total items in workflow: {}".format(root.all_children_count) print "Total items not started: {}".format(root.not_started_count) print task_b2_1.status, wf_b_2.status, wf_b.status, root.status task_b2_1.start(Parents=True) print "Total items started: {}".format(root.in_progress_count) print "Percentage started: {}".format(root.in_progress_pct)
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at # http://aws.amazon.com/asl/ # or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License. import redis from progressmonitor import RedisProgressManager, ProgressMonitor, \ ProgressTracker pool = redis.ConnectionPool(host='localhost', port=6379, db=0) r = redis.Redis(connection_pool=pool) rpm = RedisProgressManager(RedisConnection=r) pm = ProgressMonitor(DbConnection=rpm, Name="MasterWorkflow") wf_a = ProgressTracker(Name='Workflow A', FriendlyId='WorkflowA') wf_b = ProgressTracker(Name='Workflow B', FriendlyId='WorkflowB') wf_b_1 = ProgressTracker(Name='SubWorkflow B1', FriendlyId='WorkflowB1') wf_b_2 = ProgressTracker(Name='SubWorkflow B2', FriendlyId='WorkflowB2') task_a1 = ProgressTracker(Name='Task A-1', FriendlyId='TaskA1') task_a2 = ProgressTracker(Name='Task A-2', FriendlyId='TaskA2') task_b2_1 = ProgressTracker(Name='Task B2-1', FriendlyId='TaskB21') pm.with_tracker(wf_a).with_tracker(wf_b) wf_b.with_tracker(wf_b_1).with_tracker(wf_b_2) wf_a.with_tracker(task_a1).with_tracker(task_a2) wf_b_2.with_tracker(task_b2_1) task_b2_1.start(Parents=True) print "Total items started: {}".format(pm.in_progress_count) print "Percentage started: {}".format(pm.in_progress_pct) pm.update_all() id = wf_b.id pm2 = ProgressMonitor(DbConnection=rpm)
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at # http://aws.amazon.com/asl/ # or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License. import redis import time from progressmonitor import RedisProgressManager, ProgressMonitor, \ ProgressTracker pool = redis.ConnectionPool(host='localhost', port=6379, db=0) r = redis.Redis(connection_pool=pool) rpm = RedisProgressManager(RedisConnection=r) pm = ProgressMonitor(DbConnection=rpm, Name="MasterWorkflow") task = ProgressTracker(Name='SingleTask', FriendlyId='MyTask') pm.with_tracker(task) print pm.status, task.status print pm.start() print pm.status, task.status time.sleep(1) print pm.elapsed_time_in_seconds, task.elapsed_time_in_seconds task.start() time.sleep(1) print pm.status, task.status print pm.elapsed_time_in_seconds, task.elapsed_time_in_seconds task.succeed() pm.succeed() print pm.status, task.status print pm.elapsed_time_in_seconds, task.elapsed_time_in_seconds
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at # http://aws.amazon.com/asl/ # or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License. from progressmonitor import ProgressMonitor, ProgressTracker, DynamoDbDriver root = ProgressMonitor(DbConnection=DynamoDbDriver(TablePrefix='test'), Name="MasterWorkflow") wf_a = ProgressTracker(Name='Workflow A', FriendlyId='WorkflowA') wf_b = ProgressTracker(Name='Workflow B', FriendlyId='WorkflowB') wf_b_1 = ProgressTracker(Name='SubWorkflow B1', FriendlyId='WorkflowB1') wf_b_2 = ProgressTracker(Name='SubWorkflow B2', FriendlyId='WorkflowB2') task_a1 = ProgressTracker(Name='Task A-1', FriendlyId='TaskA1') task_a2 = ProgressTracker(Name='Task A-2', FriendlyId='TaskA2') task_b2_1 = ProgressTracker(Name='Task B2-1', FriendlyId='TaskB21') root.with_tracker(wf_a).with_tracker(wf_b) wf_b.with_tracker(wf_b_1).with_tracker(wf_b_2) wf_a.with_tracker(task_a1).with_tracker(task_a2) wf_b_2.with_tracker(task_b2_1) task_b2_1.start(Parents=True) print "Total items started: {}".format(root.in_progress_count) print "Percentage started: {}".format(root.in_progress_pct) root.update_all() id = root.id root2 = ProgressMonitor(DbConnection=DynamoDbDriver) print "Total items: {}".format(root2.all_children_count) root2 = root.load(id) print "Total items started: {}".format(root2.in_progress_count) print "Percentage started: {}".format(root2.in_progress_pct)
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at # http://aws.amazon.com/asl/ # or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License. import redis import time from progressmonitor import RedisProgressManager, ProgressMonitor, ProgressTracker pool = redis.ConnectionPool(host='localhost', port=6379, db=0) r = redis.Redis(connection_pool=pool) rpm = RedisProgressManager(RedisConnection=r) pm = ProgressMonitor(DbConnection=rpm) c = ProgressTracker(Name='TestWorkflow').with_metric(Namespace='dev_testing', Metric='OS/Startup') c.metric.with_dimension('linux_flavor', 'redhat') \ .with_dimension('version', '6.8') pm.with_tracker(c) pm.update_all() c.start(Parents=True) pm.update_all() print 'sleeping' time.sleep(2) c.succeed() pm.update_all() print c.elapsed_time_in_seconds print c.start_time print c.finish_time
def test_can_convert_from_db(c_mock, g_mock): g_mock.side_effect = get_by_id_side_effect c_mock.side_effect = children_side_effect pm = ProgressMonitor(DbConnection=RedisProgressManager()) pm = pm.load('94a52a41-bf9e-43e3-9650-859f7c263dc8') assert len(pm.all_children) == 5
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at # http://aws.amazon.com/asl/ # or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License. import redis import time from progressmonitor import RedisProgressManager, ProgressMonitor, \ ProgressTracker pool = redis.ConnectionPool(host='localhost', port=6379, db=0) r = redis.Redis(connection_pool=pool) rpm = RedisProgressManager(RedisConnection=r) pm = ProgressMonitor(DbConnection=rpm, Name="MasterWorkflow") task_a = ProgressTracker(Name='Task A', FriendlyId='TaskA') task_b = ProgressTracker(Name='Task B', FriendlyId='TaskB') task_c = ProgressTracker(Name='Task C', FriendlyId='TaskC') pm.with_tracker(task_a).with_tracker(task_b).with_tracker(task_c) print pm.status, task_a.status print pm.start() print pm.status, task_a.status, task_b.status, task_c.status time.sleep(1) task_a.start() time.sleep(1) task_b.start() time.sleep(1) task_c.start() print pm.elapsed_time_in_seconds, \ task_a.elapsed_time_in_seconds, \ task_b.elapsed_time_in_seconds, \
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at # http://aws.amazon.com/asl/ # or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License. import redis from progressmonitor import RedisProgressManager, ProgressMonitor, \ ProgressTracker pool = redis.ConnectionPool(host='localhost', port=6379, db=0) r = redis.Redis(connection_pool=pool) rpm = RedisProgressManager(RedisConnection=r) pm = ProgressMonitor(DbConnection=rpm, Name="MasterWorkflow") wf_a = ProgressTracker(Name='Workflow A', FriendlyId='WorkflowA', HasParallelChildren=True) wf_b = ProgressTracker(Name='Workflow B', FriendlyId='WorkflowB') wf_b_1 = ProgressTracker(Name='SubWorkflow B1', FriendlyId='WorkflowB1') wf_b_2 = ProgressTracker(Name='SubWorkflow B2', FriendlyId='WorkflowB2') task_a1 = ProgressTracker(Name='Task A-1', EstimatedSeconds=10) wf_a_1 = ProgressTracker(Name='SubWorkflow A1', HasParallelChildren=True) wf_a1_1 = ProgressTracker(Name='SubWorkflow A1, Task 1', EstimatedSeconds=20) wf_a1_2 = ProgressTracker(Name='SubWorkflow A1, Task 2', EstimatedSeconds=30) pm.with_tracker(wf_a).with_tracker(wf_b) wf_b.with_tracker(wf_b_1).with_tracker(wf_b_2) wf_a_1.with_tracker(wf_a1_1).with_tracker(wf_a1_2) wf_a.with_tracker(task_a1).with_tracker(wf_a_1) print "Total estimated seconds: {}".format(pm.total_estimate)
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at # http://aws.amazon.com/asl/ # or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License. import redis from progressmonitor import RedisProgressManager, ProgressMonitor, \ ProgressTracker pool = redis.ConnectionPool(host='localhost', port=6379, db=0) r = redis.Redis(connection_pool=pool) rroot = RedisProgressManager(RedisConnection=r) root = ProgressMonitor(DbConnection=rroot, Name="MasterWorkflow") wf_a = ProgressTracker(Name='Workflow A', FriendlyId='WorkflowA') wf_b = ProgressTracker(Name='Workflow B', FriendlyId='WorkflowB') wf_b_1 = ProgressTracker(Name='SubWorkflow B1', FriendlyId='WorkflowB1') wf_b_2 = ProgressTracker(Name='SubWorkflow B2', FriendlyId='WorkflowB2') task_a1 = ProgressTracker(Name='Task A-1', FriendlyId='TaskA1') task_a2 = ProgressTracker(Name='Task A-2', FriendlyId='TaskA2') task_b2_1 = ProgressTracker(Name='Task B2-1', FriendlyId='TaskB21') root.with_tracker(wf_a).with_tracker(wf_b) wf_b.with_tracker(wf_b_1).with_tracker(wf_b_2) wf_a.with_tracker(task_a1).with_tracker(task_a2) wf_b_2.with_tracker(task_b2_1) task_b2_1.start(Parents=True) print "Total items started: {}".format(root.in_progress_count) print "Percentage started: {}".format(root.in_progress_pct) root.update_all() id = root.id root2 = ProgressMonitor(DbConnection=rroot)