예제 #1
0
def text_processing():
    title = request.form.get('d_title', None)
    body = request.form.get('d_body', None)

    keywords = None
    categories = None
    datasets = None
    dataset_ids = dict([(dataset['name'], index) for index, dataset in enumerate(iotools.load_datasets())])

    if title or body:
        keywords = generate_dataset_keywords_dict({"name": title, "long_desc": body, "short_desc": ""})
        keywords = sorted(keywords['all'])

        similarity_dict = get_dataset_compatibility(keywords)
        categories = []
        for key, val in sorted(similarity_dict.items(), key=lambda x: x[1], reverse=True):
            if val > 0:
                row = {
                    "name": key,
                    "similarity": "%4.1f%%" % (val * 100,)
                }
                categories.append(row)

        related_datasets = get_related_datasets(keywords)
        datasets = []
        for name, val in sorted(related_datasets[:20], key=lambda x: x[1], reverse=True):
            if val == 0:
                continue
            row = iotools.load_dataset(name)
            row['similarity'] = "%4.1f%%" % (val * 100,)
            datasets.append(row)

    return render_template("text_processing.html", d_title=title, d_body=body, keywords=keywords,
                           categories=categories, related_datasets=datasets, dataset_ids=dataset_ids)
예제 #2
0
def get_related_datasets_html():
    keywords = request.form['keywords'].split('\n')
    new_style = 'old_style' not in request.form
    related_datasets = get_related_datasets(keywords, new_style)
    datasets = []
    for name, val in related_datasets[:20]:
        row = iotools.load_dataset(name)
        row['similarity'] = "%3.0f%%" % (val * 100,)
        datasets.append(row)
    return render_template('related-datasets.html', datasets=datasets)
예제 #3
0
def get_all_conflicts():
    datasets = {}
    for name, conflictList in all_conflicts().items():
        row = iotools.load_dataset(name)
        row['conflicts'] = conflictList
        category = row['category']
        if category not in datasets:
            datasets[category] = []
        datasets[category].append(row)

    return render_template('all-datasets.html', dataset_dict=datasets)