def text_processing(): title = request.form.get('d_title', None) body = request.form.get('d_body', None) keywords = None categories = None datasets = None dataset_ids = dict([(dataset['name'], index) for index, dataset in enumerate(iotools.load_datasets())]) if title or body: keywords = generate_dataset_keywords_dict({"name": title, "long_desc": body, "short_desc": ""}) keywords = sorted(keywords['all']) similarity_dict = get_dataset_compatibility(keywords) categories = [] for key, val in sorted(similarity_dict.items(), key=lambda x: x[1], reverse=True): if val > 0: row = { "name": key, "similarity": "%4.1f%%" % (val * 100,) } categories.append(row) related_datasets = get_related_datasets(keywords) datasets = [] for name, val in sorted(related_datasets[:20], key=lambda x: x[1], reverse=True): if val == 0: continue row = iotools.load_dataset(name) row['similarity'] = "%4.1f%%" % (val * 100,) datasets.append(row) return render_template("text_processing.html", d_title=title, d_body=body, keywords=keywords, categories=categories, related_datasets=datasets, dataset_ids=dataset_ids)
def get_related_datasets_html(): keywords = request.form['keywords'].split('\n') new_style = 'old_style' not in request.form related_datasets = get_related_datasets(keywords, new_style) datasets = [] for name, val in related_datasets[:20]: row = iotools.load_dataset(name) row['similarity'] = "%3.0f%%" % (val * 100,) datasets.append(row) return render_template('related-datasets.html', datasets=datasets)
def get_all_conflicts(): datasets = {} for name, conflictList in all_conflicts().items(): row = iotools.load_dataset(name) row['conflicts'] = conflictList category = row['category'] if category not in datasets: datasets[category] = [] datasets[category].append(row) return render_template('all-datasets.html', dataset_dict=datasets)