def write_all_unique_key_value_cnt(q: DatasetQuery): analysis_path = "./analysis/" # Export csv of every value for i in range(1, 6): cnt = q.get_all_unique_key_value_attrs(i) # Convert from cnt to 2d list # cnt: Dict[Counter] # or Dict[Dict[int]]: attrs_key, attr_value(key), count rows = [] for key, attr_obs in cnt.items(): for attr, count in attr_obs.items(): rows.append([key, attr, count]) name = analysis_path + "unique_attr_key_val_cnt" + str(i) + ".csv" DatasetQuery.export_csv(rows, name)
def write_all_data(q: DatasetQuery): # Write data analysis every unique attrs p = "./analysis/" for i in range(1, 6): dd = q.get_all_unique_key_attributes(i) name = p + "Unique_attrs_cat" + str(i) + ".txt" DatasetQuery.export_txt(dd, name) p = "./analysis/" for i in range(1, 6): dd = q.get_all_unique_key_attributes(i) name = p + "unique_attr_cnt" + str(i) + ".csv" # DatasetQuery.export_txt(dd, name) DatasetQuery.export_csv(dd.items(), name)
def get_dataset_query(): d = get_dataset() return DatasetQuery(d)
def print_random_data(q: DatasetQuery, category: int): # Get randome data and image dta = q.get_random_data(category) print(dta) return dta.get_image(dta.primary_image_url)
for key_attr, val_attrs in data.attributes.items(): sentences_matrix[key_attr].append(val_attrs) pbar.update(1) print("Done!") print("Creating Word2Vec models from globs...") with tqdm(total=len(sentences_matrix.items())) as pbar: for key, sentences in sentences_matrix.items(): models[key] = Word2Vec(sentences, min_count=1) pbar.update(1) print("Done!") if __name__ == '__main__': d = get_dataset() q = DatasetQuery(d) print("Extracting every key to key_list and build model vector...") n_first_key_to_cluster = 7 key_list = q.get_most_frequent_keys(1, n_first_key_to_cluster)[:, 0] # models.keys() is # dict_keys(['brand', 'inseam', 'size type', "bottoms size women's", 'material']) models: Dict[str, Word2Vec] = Embedding.extract_keys_vocab(d, 1, key_list) print("Building tree...") # Working from here head = -1 # Get the whole data. birch_tree = BirchTree(d, 1, models, head=head) tree = birch_tree.build_tree(verbose=False) # birch_tree.save_birch_tree_to_binary(TREE_FOLDER_PATH)