def pipeline(chip, tx, ty, date, acquired, cfg): ctx = { 'tx': tx, 'ty': ty, 'cx': first(chip), 'cy': second(chip), 'date': date, 'acquired': acquired } return thread_first( ctx, partial(segments, cfg=cfg), segments_filter, partial(segaux.aux, cfg=cfg), segaux.aux_filter, segaux.combine, segaux.unload_segments, segaux.unload_aux, segaux.add_training_dates, add_average_reflectance, segaux.training_format, #segaux.log_chip, segaux.exit_pipeline)
def segments(): return thread_first( request.json, partial(exception_handler, http_status=500, name='log_request', fn=log_request), partial(exception_handler, http_status=400, name='parameters', fn=parameters), partial(exception_handler, http_status=500, name='timeseries', fn=partial(timeseries, cfg=cfg)), partial(exception_handler, http_status=500, name='nodata', fn=partial(nodata, cfg=cfg)), partial(exception_handler, http_status=500, name='detection', fn=partial(detection, cfg=cfg)), partial(exception_handler, http_status=500, name='delete', fn=partial(delete, cfg=cfg)), partial(exception_handler, http_status=500, name='save', fn=partial(save, cfg=cfg)), respond)
def load_data(ctx, cfg): return assoc( ctx, 'data', thread_first( ctx, partial(segments, cfg=cfg), partial(segaux.aux, cfg=cfg), segaux.combine, segaux.unload_segments, segaux.unload_aux, extract_segments, partial(segaux.prediction_dates, month=get("month", ctx), day=get("day", ctx)), segaux.average_reflectance, reformat))
def review_to_tagged_sents(sents): cluster_distances = cytoolz.thread_first(sents, clizer.vectorize_sents, clustering.normalize_vecs, clizer.clusterer.transform) clusters_for_sents = np.argmin(cluster_distances, axis=1) res = [] for i, sent in enumerate(sents): res.append([topic_tags[c] for c in clusters_for_sents[:i + 1][-4:]] + sent.lower().split()) return res
def tiles(): return thread_first(request.json, partial(exception_handler, http_status=500, name='log_request', fn=log_request), partial(exception_handler, http_status=400, name='parameters', fn=parameters), partial(exception_handler, http_status=500, name='data', fn=partial(data, cfg=cfg)), partial(exception_handler, http_status=500, name='statistics', fn=statistics), partial(exception_handler, http_status=500, name='randomize', fn=partial(randomize, cfg=cfg)), partial(exception_handler, http_status=500, name='split_data', fn=split_data), partial(exception_handler, http_status=500, name='sample', fn=partial(sample, cfg=cfg)), partial(exception_handler, http_status=500, name='train', fn=partial(train, cfg=cfg)), partial(exception_handler, http_status=500, name='save', fn=partial(save, cfg=cfg)), respond)
def likelihoods_by_sentence(sents): sent_cluster_distribs = cytoolz.thread_first(sents, clizer.vectorize_sents, clustering.normalize_vecs, clizer.clusterer.transform, (normal_lik, .5), clustering.normalize_dists) hard_assignments = np.argmax(sent_cluster_distribs, axis=1) # print(hard_assignments) return [ assignment in np.argsort( get_topic_distribution( clizer=clizer, target_dist=clizer.target_dists['best'], sent_cluster_distribs=sent_cluster_distribs[:i], new_dists_opts=np.eye(clizer.n_clusters)))[:3] for i, assignment in enumerate(hard_assignments) ]
def predictions_route(): return thread_first( request.json, partial(exception_handler, http_status=500, name='log_request', fn=log_request), partial(exception_handler, http_status=400, name='parameters', fn=parameters), partial(exception_handler, http_status=500, name='load_model', fn=partial(load_model, cfg=cfg)), partial(exception_handler, http_status=500, name='load_data', fn=partial(load_data, cfg=cfg)), partial(exception_handler, http_status=500, name='group_data', fn=group_data), partial(exception_handler, http_status=500, name='matrix', fn=matrix), partial(exception_handler, http_status=500, name='predictions', fn=partial(predictions, cfg=cfg)), partial(exception_handler, http_status=500, name='default_predictions', fn=default_predictions), partial(exception_handler, http_status=500, name='delete', fn=partial(delete, cfg=cfg)), partial(exception_handler, http_status=500, name='save', fn=partial(save, cfg=cfg)), respond)
def get_topic_seq(sents): cluster_distances = cytoolz.thread_first(sents, clizer.vectorize_sents, clustering.normalize_vecs, clizer.clusterer.transform) return np.argmin(cluster_distances, axis=1)