def delete(ctx, cfg): '''Delete existing predictions''' stmt = db.delete_predictions(cfg, get('cx', ctx), get('cy', ctx)) db.execute_statement(cfg, stmt) return ctx
def test_unload_segments(): inputs = {'segments': 1, 'other': 2} outputs = segaux.unload_segments(inputs) assert get('segments', outputs, None) is None assert get('other', outputs) is 2
def test_segment_runs_as_expected(client): ''' As a blackmagic user, when I send cx, cy, & acquired range via HTTP POST, change segments are detected and saved so that they can be retrieved later. ''' response = client.post('/segment', json={ 'cx': test.cx, 'cy': test.cy, 'acquired': test.acquired }) chips = _ceph.select_chip(cx=test.cx, cy=test.cy) pixels = _ceph.select_pixels(cx=test.cx, cy=test.cy) print("PIXEL LENGTH:{}".format(len(pixels))) print("PIXEL TYPE:{}".format(type(pixels))) segments = _ceph.select_segments(cx=test.cx, cy=test.cy) assert response.status == '200 OK' assert get('cx', response.get_json()) == test.cx assert get('cy', response.get_json()) == test.cy assert get('acquired', response.get_json()) == test.acquired assert get('exception', response.get_json(), None) == None assert len(list(map(lambda x: x, chips))) == 1 assert len(list(map(lambda x: x, pixels))) == 10000 assert len(list(map(lambda x: x, segments))) == 10000
def tagRawSentence(self, rawLine, DICT, word_dict, pos_dict): line = initializeSentence(DICT, rawLine) sen = [] wordTags = line.split() for i in range(len(wordTags)): fwObject = FWObject.getFWObject(wordTags, i) word, tag = getWordTag(wordTags[i]) node = self.findFiredNode(fwObject) if node.depth > 0: current_dict = ct.get(word.lower(), word_dict, default=0) if current_dict == 0: sen.append( (0, ct.get(node.conclusion.lower(), pos_dict, default=0), 0)) else: sen.append( (ct.get("index", current_dict), ct.get(node.conclusion.lower(), pos_dict, default=0), ct.get("domain", current_dict))) else: # Fired at root, return initialized tag current_dict = ct.get(word.lower(), word_dict, default=0) if current_dict == 0: sen.append((0, ct.get(tag.lower(), pos_dict), 0)) else: sen.append( (ct.get("index", current_dict), ct.get(tag.lower(), pos_dict, default=0), ct.get("domain", current_dict))) return sen
def group_data(ctx): grouper = lambda x: 'defaults' if x['sday'] == '0001-01-01' and x[ 'eday'] == '0001-01-01' else 'data' groups = groupby(grouper, ctx['data']) return merge(ctx, { 'data': get('data', groups, []), 'defaults': get('defaults', groups, []) })
def delete(ctx, cfg): cx = int(get('cx', ctx)) cy = int(get('cy', ctx)) _ceph.delete_chip(cx, cy) _ceph.delete_pixels(cx, cy) _ceph.delete_segments(cx, cy) return ctx
def delete(ctx, cfg): cx = int(get('cx', ctx)) cy = int(get('cy', ctx)) db.execute_statements(cfg, [ db.delete_chip(cfg, cx, cy), db.delete_pixels(cfg, cx, cy), db.delete_segments(cfg, cx, cy) ]) return ctx
def log_request(ctx): cx = get('cx', ctx, None) cy = get('cy', ctx, None) a = get('acquired', ctx, None) logger.info('POST /segment {cx}, {cy}, {a}'.format(cx=cx, cy=cy, a=a)) return ctx
def exception_handler(ctx, http_status, name, fn): try: return fn(ctx) except Exception as e: return do(logger.error, {'cx': get('cx', ctx, None), 'cy': get('cy', ctx, None), 'acquired': get('acquired', ctx, None), 'exception': '{name} exception: {ex}'.format(name=name, ex=e), 'http_status': http_status})
def load_data(ctx, cfg): return assoc( ctx, 'data', thread_first( ctx, partial(segments, cfg=cfg), partial(segaux.aux, cfg=cfg), segaux.combine, segaux.unload_segments, segaux.unload_aux, extract_segments, partial(segaux.prediction_dates, month=get("month", ctx), day=get("day", ctx)), segaux.average_reflectance, reformat))
def log_request(ctx): '''Create log message for HTTP request''' tx = get('tx', ctx, None) ty = get('ty', ctx, None) a = get('acquired', ctx, None) d = get('date', ctx, None) c = get('chips', ctx, None) logger.info("POST /tile {x},{y},{a},{d},{c}".format(x=tx, y=ty, a=a, d=d, c=c)) return ctx
def wrapper(*args, **kwargs): start = datetime.now() ctx = fn(*args, **kwargs) d = {'cx': get('cx', ctx, None), 'cy': get('cy', ctx, None), 'acquired': get('acquired', ctx, None)} logger.info(assoc(d, '{name}_elapsed_seconds'.format(name=fn.__name__), (datetime.now() - start).total_seconds())) return ctx
def average_reflectance_fn(segment): '''Add average reflectance values into dataset''' avgrefl = lambda intercept, slope, ordinal: add(intercept, mul(slope, ordinal)) date = arrow.get(get('date', segment)).datetime.toordinal() ar = { 'blar': avgrefl(get('blint', segment), spectral_slope('blcoef', segment), date), 'grar': avgrefl(get('grint', segment), spectral_slope('grcoef', segment), date), 'niar': avgrefl(get('niint', segment), spectral_slope('nicoef', segment), date), 'rear': avgrefl(get('reint', segment), spectral_slope('recoef', segment), date), 's1ar': avgrefl(get('s1int', segment), spectral_slope('s1coef', segment), date), 's2ar': avgrefl(get('s2int', segment), spectral_slope('s2coef', segment), date), 'thar': avgrefl(get('thint', segment), spectral_slope('thcoef', segment), date) } return merge(segment, ar)
def parameters(r): '''Check HTTP request parameters''' tx = get('tx', r, None) ty = get('ty', r, None) acquired = get('acquired', r, None) chips = get('chips', r, None) date = get('date', r, None) if (tx is None or ty is None or acquired is None or chips is None or date is None): raise Exception( 'tx, ty, acquired, chips and date are required parameters') else: return { 'tx': int(tx), 'ty': int(ty), 'acquired': acquired, 'date': date, 'chips': list(map(lambda chip: (int(first(chip)), int(second(chip))), chips)), 'test_data_exception': get('test_data_exception', r, None), 'test_training_exception': get('test_training_exception', r, None), 'test_save_exception': get('test_save_exception', r, None) }
def test_prediction_runs_as_expected(client): ''' As a blackmagic user, when I send tx, ty, acquired, month, day and chip list via HTTP POST, predictions are generated and saved so that they can be retrieved later. ''' create_prediction_test_data(client) # test prediction response = client.post('/prediction', json={'tx': test.tx, 'ty': test.ty, 'cx': test.cx, 'cy': test.cy, 'month': test.prediction_month, 'day': test.prediction_day, 'acquired': test.acquired}) predictions = _ceph.select_predictions(cx=test.cx, cy=test.cy) assert response.status == '200 OK' assert get('tx', response.get_json()) == test.tx assert get('ty', response.get_json()) == test.ty assert get('cx', response.get_json()) == test.cx assert get('cy', response.get_json()) == test.cy assert get('acquired', response.get_json()) == test.acquired assert get('month', response.get_json()) == test.prediction_month assert get('day', response.get_json()) == test.prediction_day assert get('exception', response.get_json(), None) == None # The number of predictions is dictated by the NLCDTRN dataset for the chip, # and the number of non-zero classifications available. assert len([p for p in predictions]) == 30000
def prediction_dates(segments, month, day): for s in segments: default_date = default_prediction_date(s) if default_date: yield assoc(s, 'date', default_date) else: dates = prediction_date_fn(sday=get('sday', s), eday=get('eday', s), month=month, day=day) for date in dates: yield assoc(s, 'date', date)
def wrapper(*args, **kwargs): start = datetime.now() ctx = fn(*args, **kwargs) d = {"tx":get("tx", ctx, None), "ty":get("ty", ctx, None), "date":get("date", ctx, None), "acquired":get("acquired", ctx, None), "chips":count(get("chips", ctx, []))} logger.info(json.dumps(assoc(d, "{name}_elapsed_seconds".format(name=fn.__name__), (datetime.now() - start).total_seconds()))) return ctx
def detection(ctx, cfg): with workers(cfg) as w: if get('test_detection_exception', ctx, None) is not None: return merge(ctx, exception(msg='test_detection_exception', http_status=500)) else: return merge(ctx, {'detections': list(flatten(w.map(detect, take(ctx['test_pixel_count'], ctx['timeseries']))))})
def respond(ctx): body = {'cx': get('cx', ctx, None), 'cy': get('cy', ctx, None), 'acquired': get('acquired', ctx, None)} e = get('exception', ctx, None) if e: response = jsonify(assoc(body, 'exception', e)) else: response = jsonify(body) response.status_code = get('http_status', ctx, 200) return response
def compute(t, lhs, rhs): """ Join Operation for Python Streaming Backend Note that a pure streaming Join is challenging/impossible because any row in one seq might connect to any row in the other, requiring simultaneous complete access. As a result this approach compromises and fully realizes the LEFT sequence while allowing the RIGHT sequence to stream. As a result Always put your bigger table on the RIGHT side of the Join. """ lhs = compute(t.lhs, lhs) rhs = compute(t.rhs, rhs) on_left = rowfunc(t.lhs[t.on_left]) on_right = rowfunc(t.rhs[t.on_right]) right_columns = list(range(len(t.rhs.columns))) for col in listpack(t.on_right): right_columns.remove(t.rhs.columns.index(col)) get_right = lambda x: type(x)(get(right_columns, x)) lhs_dict = groupby(on_left, lhs) for row in rhs: try: key = on_right(row) matches = lhs_dict[key] for match in matches: yield match + get_right(row) except KeyError: pass
def tagRawSentenceHash(self, rawLine, DICT, word_dict): line = initializeSentence(DICT, rawLine) sen = [] wordTags = line.split() for i in range(len(wordTags)): fwObject = FWObject.getFWObject(wordTags, i) word, tag = getWordTag(wordTags[i]) node = self.findFiredNode(fwObject) #Only hash word once and block out-of-lexicon words word_hash = murmurhash3_32(word, seed=0) try: word_cat = ct.get(word_hash, word_dict) except: word_cat = 0 word_hash = 0 #Format and return if node.depth > 0: sen.append((word_hash, murmurhash3_32(node.conclusion, seed=0), word_cat)) else: # Fired at root, return initialized tag sen.append((word_hash, murmurhash3_32(tag, seed=0), word_cat)) return sen
def test_segment_cassandra_exception(client): ''' As a blackmagic user, when an exception occurs saving chips, pixels & segments to Cassandra, an HTTP 500 is issued with a descriptive message so that the issue may be investigated, corrected & retried. ''' cx = test.cx cy = test.cy a = test.acquired delete_detections(test.cx, test.cy) response = client.post('/segment', json={ 'cx': cx, 'cy': cy, 'acquired': a, 'test_cassandra_exception': True }) chips = db.execute_statement(cfg=app.cfg, stmt=db.select_chip(cfg=app.cfg, cx=test.cx, cy=test.cy)) pixels = db.execute_statement(cfg=app.cfg, stmt=db.select_pixels(cfg=app.cfg, cx=test.cx, cy=test.cy)) segments = db.execute_statement(cfg=app.cfg, stmt=db.select_segments(cfg=app.cfg, cx=test.cx, cy=test.cy)) assert response.status == '500 INTERNAL SERVER ERROR' assert get('cx', response.get_json()) == cx assert get('cy', response.get_json()) == cy assert get('acquired', response.get_json()) == a assert type(get('exception', response.get_json())) is str assert len(get('exception', response.get_json())) > 0 assert len(list(map(lambda x: x, chips))) == 0 assert len(list(map(lambda x: x, pixels))) == 0 assert len(list(map(lambda x: x, segments))) == 0
def aux_filter(ctx): return assoc( ctx, 'aux', dict( list( filter(lambda d: first(get('nlcdtrn', second(d))) != 0, ctx['aux'].items()))))
def test_aux(): inputs = {'cx': test.cx, 'cy': test.cy, 'acquired': test.acquired} outputs = segaux.aux(inputs, blackmagic.cfg) assert get('aux', outputs, None) is not None
def test_tile_bad_parameters(client): ''' As a blackmagic user, when I don't send tx, ty, acquired, date & chips via HTTP POST the HTTP status is 400 and the response body tells me the required parameters so that I can send a good request. ''' tx = "not-an-integer" ty = test.ty acquired = test.acquired chips = test.chips date = test.training_date delete_tile(test.tx, test.ty) response = client.post('/tile', json={ 'tx': tx, 'ty': ty, 'acquired': acquired, 'chips': chips, 'date': date }) tiles = _ceph.select_tile(tx=test.tx, ty=test.ty) assert response.status == '400 BAD REQUEST' assert get('tx', response.get_json()) == tx assert get('ty', response.get_json()) == ty assert get('acquired', response.get_json()) == acquired assert get('date', response.get_json()) == date assert get('chips', response.get_json()) == count(chips) assert type(get('exception', response.get_json())) is str assert len(get('exception', response.get_json())) > 0 assert len(list(map(lambda x: x, tiles))) == 0
def test_segment_merlin_no_input_data(client): ''' As a blackmagic user, when no input data is available to build a timeseries, an HTTP 500 is issued with a message indicating "no input data" so that I know change detection cannot run for this time & space. ''' cx = test.cx cy = test.cy a = '1975/1976' delete_detections(test.cx, test.cy) response = client.post('/segment', json={ 'cx': cx, 'cy': cy, 'acquired': a }) chips = db.execute_statement(cfg=app.cfg, stmt=db.select_chip(cfg=app.cfg, cx=test.cx, cy=test.cy)) pixels = db.execute_statement(cfg=app.cfg, stmt=db.select_pixels(cfg=app.cfg, cx=test.cx, cy=test.cy)) segments = db.execute_statement(cfg=app.cfg, stmt=db.select_segments(cfg=app.cfg, cx=test.cx, cy=test.cy)) assert response.status == '500 INTERNAL SERVER ERROR' assert get('cx', response.get_json()) == cx assert get('cy', response.get_json()) == cy assert get('acquired', response.get_json()) == a assert type(get('exception', response.get_json())) is str assert len(get('exception', response.get_json())) > 0 assert len(list(map(lambda x: x, chips))) == 0 assert len(list(map(lambda x: x, pixels))) == 0 assert len(list(map(lambda x: x, segments))) == 0
def test_segment_bad_parameters(client): ''' As a blackmagic user, when I don't send cx, cy, & acquired range via HTTP POST the HTTP status is 400 and the response body tells me the required parameters so that I can send a good request. ''' # bad parameters cx = None cy = test.cy a = test.acquired delete_detections(test.cx, test.cy) response = client.post('/segment', json={ 'cx': cx, 'cy': cy, 'acquired': a }) chips = db.execute_statement(cfg=app.cfg, stmt=db.select_chip(cfg=app.cfg, cx=test.cx, cy=test.cy)) pixels = db.execute_statement(cfg=app.cfg, stmt=db.select_pixels(cfg=app.cfg, cx=test.cx, cy=test.cy)) segments = db.execute_statement(cfg=app.cfg, stmt=db.select_segments(cfg=app.cfg, cx=test.cx, cy=test.cy)) assert response.status == '400 BAD REQUEST' assert get('cx', response.get_json()) == cx assert get('cy', response.get_json()) == cy assert get('acquired', response.get_json()) == a assert type(get('exception', response.get_json())) is str assert len(get('exception', response.get_json())) > 0 assert len(list(map(lambda x: x, chips))) == 0 assert len(list(map(lambda x: x, pixels))) == 0 assert len(list(map(lambda x: x, segments))) == 0
def _get_json(self, key): o = self.client.get_object(Bucket=self.bucket_name, Key=key) if get('ContentEncoding', o, None) == 'gzip': v = gzip.decompress(o['Body'].read()).decode('utf-8') else: v = o['Body'].read().decode('utf-8') return json.loads(v)
def save(ctx, cfg): if get('test_cassandra_exception', ctx, None) is not None: raise Exception('test_cassandra_exception') else: save_chip(ctx, cfg) save_pixels(ctx, cfg) save_segments(ctx, cfg) return ctx
def assemble(pair): a, b = pair if a is not None: joined = get(on_left, a) else: joined = get(on_right, b) if a is not None: left_entries = get(left_self_columns, a) else: left_entries = (None, ) * (len(t.lhs.fields) - len(on_left)) if b is not None: right_entries = get(right_self_columns, b) else: right_entries = (None, ) * (len(t.rhs.fields) - len(on_right)) return joined + left_entries + right_entries
def _get_bin(self, key): o = self.client.get_object(Bucket=self.bucket_name, Key=key) if get('ContentEncoding', o, None) == 'gzip': v = gzip.decompress(o['Body'].read()) else: v = o['Body'].read() return v
def assemble(pair): a, b = pair if a is not None: joined = get(on_left, a) else: joined = get(on_right, b) if a is not None: left_entries = get(left_self_columns, a) else: left_entries = (None,) * (len(t.lhs.fields) - len(on_left)) if b is not None: right_entries = get(right_self_columns, b) else: right_entries = (None,) * (len(t.rhs.fields) - len(on_right)) return joined + left_entries + right_entries
def rowfunc(t): """ Rowfunc provides a function that can be mapped onto a sequence. >>> accounts = TableSymbol('accounts', '{name: string, amount: int}') >>> f = rowfunc(accounts['amount']) >>> row = ('Alice', 100) >>> f(row) 100 See Also: compute<Rowwise, Sequence> """ from toolz.curried import get indices = [t.parent.columns.index(col) for col in t.columns] return get(indices)
def tagRawSentence(self, rawLine, DICT, word_dict, pos_dict): line = initializeSentence(DICT, rawLine) sen = [] wordTags = line.split() for i in range(len(wordTags)): fwObject = FWObject.getFWObject(wordTags, i) word, tag = getWordTag(wordTags[i]) node = self.findFiredNode(fwObject) if node.depth > 0: current_dict = ct.get(word.lower(), word_dict, default = 0) if current_dict == 0: sen.append((0, ct.get(node.conclusion.lower(), pos_dict, default = 0), 0)) else: sen.append((ct.get("index", current_dict), ct.get(node.conclusion.lower(), pos_dict, default = 0), ct.get("domain", current_dict))) else:# Fired at root, return initialized tag current_dict = ct.get(word.lower(), word_dict, default = 0) if current_dict == 0: sen.append((0, ct.get(tag.lower(), pos_dict), 0)) else: sen.append((ct.get("index", current_dict), ct.get(tag.lower(), pos_dict, default = 0), ct.get("domain", current_dict))) return sen
def parent_signin(): barcode = request.forms['barcode'] students = request.forms.getlist('students') u = filter(lambda v: v.id == barcode, data['users'].values()) if len(u) < 0: return template('signin', message='Barcode ' + barcode + ' not recognized.') u = u[0] # check auth a = [u.name in s.authorized for s in t.get(students, data['students'])] if False in a: return template('signin', message='Not authorized for all students.') for s in students: data['students'][s].in_class = False return template('success', students=str(students), in_out='out')
def get(self, ind, default=None): return cytoolz.get(ind, self, default)
def _do_fit_step(dsk, next_token, step, cv, fields, tokens, params, Xs, ys, fit_params, n_splits, error_score, step_fields_lk, fit_params_lk, field_to_index, step_name, none_passthrough, is_transform): sub_fields, sub_inds = map(list, unzip(step_fields_lk[step_name], 2)) sub_fit_params = fit_params_lk[step_name] if step_name in field_to_index: # The estimator may change each call new_fits = {} new_Xs = {} est_index = field_to_index[step_name] for ids in _group_ids_by_index(est_index, tokens): # Get the estimator for this subgroup sub_est = params[ids[0]][est_index] if sub_est is MISSING: sub_est = step # If an estimator is `None`, there's nothing to do if sub_est is None: nones = dict.fromkeys(ids, None) new_fits.update(nones) if is_transform: if none_passthrough: new_Xs.update(zip(ids, get(ids, Xs))) else: new_Xs.update(nones) else: # Extract the proper subset of Xs, ys sub_Xs = get(ids, Xs) sub_ys = get(ids, ys) # Only subset the parameters/tokens if necessary if sub_fields: sub_tokens = list(pluck(sub_inds, get(ids, tokens))) sub_params = list(pluck(sub_inds, get(ids, params))) else: sub_tokens = sub_params = None if is_transform: sub_fits, sub_Xs = do_fit_transform(dsk, next_token, sub_est, cv, sub_fields, sub_tokens, sub_params, sub_Xs, sub_ys, sub_fit_params, n_splits, error_score) new_Xs.update(zip(ids, sub_Xs)) new_fits.update(zip(ids, sub_fits)) else: sub_fits = do_fit(dsk, next_token, sub_est, cv, sub_fields, sub_tokens, sub_params, sub_Xs, sub_ys, sub_fit_params, n_splits, error_score) new_fits.update(zip(ids, sub_fits)) # Extract lists of transformed Xs and fit steps all_ids = list(range(len(Xs))) if is_transform: Xs = get(all_ids, new_Xs) fits = get(all_ids, new_fits) elif step is None: # Nothing to do fits = [None] * len(Xs) if not none_passthrough: Xs = fits else: # Only subset the parameters/tokens if necessary if sub_fields: sub_tokens = list(pluck(sub_inds, tokens)) sub_params = list(pluck(sub_inds, params)) else: sub_tokens = sub_params = None if is_transform: fits, Xs = do_fit_transform(dsk, next_token, step, cv, sub_fields, sub_tokens, sub_params, Xs, ys, sub_fit_params, n_splits, error_score) else: fits = do_fit(dsk, next_token, step, cv, sub_fields, sub_tokens, sub_params, Xs, ys, sub_fit_params, n_splits, error_score) return (fits, Xs) if is_transform else (fits, None)
def _do_featureunion(dsk, next_token, est, cv, fields, tokens, params, Xs, ys, fit_params, n_splits, error_score): if 'transformer_list' in fields: raise NotImplementedError("Setting FeatureUnion.transformer_list " "in a gridsearch") (field_to_index, step_fields_lk) = _group_subparams(est.transformer_list, fields, ignore=('transformer_weights')) fit_params_lk = _group_fit_params(est.transformer_list, fit_params) token = next_token(est) n_samples = _do_n_samples(dsk, token, Xs, n_splits) fit_steps = [] tr_Xs = [] for (step_name, step) in est.transformer_list: fits, out_Xs = _do_fit_step(dsk, next_token, step, cv, fields, tokens, params, Xs, ys, fit_params, n_splits, error_score, step_fields_lk, fit_params_lk, field_to_index, step_name, False, True) fit_steps.append(fits) tr_Xs.append(out_Xs) # Rebuild the FeatureUnions step_names = [n for n, _ in est.transformer_list] if 'transformer_weights' in field_to_index: index = field_to_index['transformer_weights'] weight_lk = {} weight_tokens = list(pluck(index, tokens)) for i, tok in enumerate(weight_tokens): if tok not in weight_lk: weights = params[i][index] if weights is MISSING: weights = est.transformer_weights lk = weights or {} weight_list = [lk.get(n) for n in step_names] weight_lk[tok] = (weights, weight_list) weights = get(weight_tokens, weight_lk) else: lk = est.transformer_weights or {} weight_list = [lk.get(n) for n in step_names] weight_tokens = repeat(None) weights = repeat((est.transformer_weights, weight_list)) out = [] out_append = out.append fit_name = 'feature-union-' + token tr_name = 'feature-union-concat-' + token m = 0 seen = {} for steps, Xs, wt, (w, wl), nsamp in zip(zip(*fit_steps), zip(*tr_Xs), weight_tokens, weights, n_samples): if (steps, wt) in seen: out_append(seen[steps, wt]) else: for n in range(n_splits): dsk[(fit_name, m, n)] = (feature_union, step_names, [None if s is None else s + (n,) for s in steps], w) dsk[(tr_name, m, n)] = (feature_union_concat, [None if x is None else x + (n,) for x in Xs], nsamp + (n,), wl) seen[steps, wt] = m out_append(m) m += 1 return [(fit_name, i) for i in out], [(tr_name, i) for i in out]