def registration_by_index(index): dataset = None repo = iotools.load_datasets_dict() if len(repo) > index >= 0: dataset = repo.values()[index] return registration(dataset)
def get_similarity_new_matrix_weighted(): options = [similarity.metrics.df_new_keywords_list_weighted, similarity.metrics.sf_weight] datasets = iotools.load_datasets_dict() ret = OrderedDict() for name1, dataset1 in datasets.items(): ret[name1] = OrderedDict() for name2, dataset2 in datasets.items(): ret[name1][name2] = similarity.metrics.item_item_similarity(dataset1, dataset2, *options) return ret
def get_conflicts(function): conflict_list = [] repo = iotools.load_datasets_dict() for name, dataset in repo.items(): result, message = function(dataset) if not result: # print "%s:\n\t\t %s\n" % (name, message) conflict_list.append((name, message)) return conflict_list
def view_by_index(index): dataset = None repo = iotools.load_datasets_dict() if len(repo) > index >= 0: dataset = repo.values()[index] cats = {} for cid, cat in enumerate(similarity.get_categories()): cats[cid] = cat return render_template('view-dataset.html', cats=cats, dataset=dataset)
def generate_keywords_dict(): ret = OrderedDict() for key in ['long_desc', 'short_desc', 'name', 'all']: ret[key] = OrderedDict() for dataset_name, dataset in iotools.load_datasets_dict().items(): keywords = generate_dataset_keywords_dict(dataset) for key in ['long_desc', 'short_desc', 'name', 'all']: for keyword in keywords[key]: if keyword not in ret[key]: ret[key][keyword] = [] ret[key][keyword] += zip(len(keywords[key][keyword])*[dataset_name], keywords[key][keyword]) for key, keywords in ret.items(): ret[key] = OrderedDict(sorted(keywords.items(), key=lambda x: x[0].lower(), reverse=False)) return ret
def generate_network(file_name, matrix): g = nx.Graph() datasets = iotools.load_datasets_dict() for name, dataset in datasets.items(): info = { "category": dataset['category'], "subcategory": dataset['subcategory'], } g.add_node(name, info) for name1 in matrix: for name2 in matrix[name1]: weight = matrix[name1][name2] if weight > 0.25: g.add_edge(name1, name2, weight=weight) print len(g.edges()) nx.write_gexf(g, file_name)
def weight_all_datasets(): ret = [] repo = iotools.load_datasets_dict() for name, dataset in repo.items(): ret.append((name, dataset_weighting_function(dataset))) return sorted(ret, key=lambda x: x[1], reverse=False)
def generate_all_dataset_keywords_dict(): ret = OrderedDict() for dataset_name, dataset in iotools.load_datasets_dict().items(): ret[dataset_name] = generate_dataset_keywords_dict(dataset) return ret
def get_datasets_in_category(category_name): repo = iotools.load_datasets_dict() ret = filter(lambda x: x['category'] == category_name, repo.values()) return ret