def keyword_neighbors(keyword): keywords = iotools.load_keywords_dict() keyword_weight = weighting_function(keyword) related_datasets = set([x[0] for x in keywords['all'][keyword]]) ret = {} for dataset in related_datasets: dataset_keywords = iotools.load_dataset_keywords_dict(dataset)['all'] for keyword2 in dataset_keywords: ret[keyword2] = ret.get(keyword2, 0) + 1 for keyword2, val in ret.items(): weight = 1.0 * weighting_function(keyword2) * mylog(val) / keyword_weight / mylog(len(related_datasets)) ret[keyword2] = weight return sorted(ret.items(), key=lambda x: x[1], reverse=True)
def tag_cloud_text_new_keywords_simple(): ret = [] for dataset, keywords in iotools.load_dataset_keywords_dict().items(): ret += keywords['all'].keys() return ret
def dataset_weighting_function(dataset): keywords = iotools.load_dataset_keywords_dict(dataset['name'])['all'] keywords_weight = weight_keywords(keywords) return sum([x[1] for x in keywords_weight])
def get_all_datasets_with_keywords(): categories = similarity.get_category_dict() keywords = iotools.load_dataset_keywords_dict() return render_template('datasets-with-keywords.html', dataset_dict=categories, keywords=keywords)
def df_new_keywords_list_weighted(dataset): name = dataset['name'] keywords = iotools.load_dataset_keywords_dict(name)['all'].keys() weights = [weighting_function(keyword) for keyword in keywords] return dict(zip(keywords, weights))
def df_new_keywords_list(dataset): name = dataset['name'] return iotools.load_dataset_keywords_dict(name)['all']