def getClustersWithCat(): feedback = '' str = '' try: category = request.args.get('category') str += 'got parameter category: %s\n' % category clusters = Cluster.query(Cluster.category == category).fetch() str += 'fetched the clusters' str = '' i = 1 for c in clusters: str += 'cluster %d:\n' % i str += 'category: %s\n' % c.category for np in c.listNews: str += '\t%s\n' % np.title str += '\n' i+= 1 except Exception as inst: feedback += 'Exception type: %s\n' % type(inst) feedback += 'Exception: %s\n' % inst.message str += feedback return Response(str, mimetype='text/plain')
def getClustersServer(): category = request.args.get('category') str = '' clusters = Cluster.query(Cluster.category == category).fetch() str += 'Got category %s\n' % category if clusters is None: str = 'None clusters :/' else: i = 1 str += 'Number of clusters %d\n' % len(clusters) for c in clusters: str += 'Cluster %d\n' % i for np in c.listNews: str += '\t%s\t%s\n' % (np.title, np.source_url) i += 1 return Response(str, mimetype='text/plain')
def prepareClustering(): logging.debug('preparing for refreshing..') ndb.delete_multi(Cluster.query().fetch(keys_only=True)) getClusters() logging.debug('refreshed!') return ''
def getMYClusters(): category = request.args.get('category') clusters = Cluster.query(Cluster.category == category).fetch() obj = {'listClusters' : [c.serialize() for c in clusters]} result = json.dumps(obj, ensure_ascii=True) return Response(result, mimetype='text/plain')
def getFilteredClustersDebug(): reqParam = request.args.get('wantedSources') listWanted = [] #vrednosta na parametarot wantedSources kje bide od oblik id1,id2 if None != reqParam: listWanted = reqParam.split(',') # da se pretvorat vo integers for i in range(0, len(listWanted)): listWanted[i] = int(listWanted[i]) result = 'Wanted sources: %s\n' % listWanted category = request.args.get('category') clusters = Cluster.query(Cluster.category == category).fetch() #tuka gi smestuvame samo klasterite shto se so filtrirani vesti # (ne zemame vesti od izvori sto korisnikot ne gi saka) clustersToReturn = [] for c in clusters: listNews = [] for np in c.listNews: if np.source_id in listWanted: listNews.append(np) #dokolku klasterot nema nitu edna vest posle filtriranjeto #ne go vkluchuvame bidejkji e prazen :) if len(listNews) > 0: c.listNews = listNews clustersToReturn.append(c) i = 1 #debugging result += 'Number of clusters %d\n' % len(clusters) for c in clustersToReturn: result += 'Cluster %d\n' % i for np in c.listNews: result += '\t%s\t%s\n' % (np.title, np.source_url) i += 1 return Response(result, mimetype='text/plain')
def get_clusters(self): clusters = [] with open(self.filename) as file: data = json.load(file) if data["clusters"]: clusters_json = data["clusters"] for cluster_json in clusters_json: cluster = Cluster() cluster.set_dimensions(self.get_dimensions()) if cluster_json["type"]: type_found = False for distribution in Distribution: if distribution.name == cluster_json["type"]: cluster.set_distribution(Distribution[distribution.name]) type_found = True if not type_found: print("Couldn't map distribution type. Defaulting to 'gaussian'") else: print("Couldn't find 'type' element for cluster") if cluster_json["cardinality"]: cardinality = int(cluster_json["cardinality"]) if 0 < cardinality <= 10: cluster.set_cardinality(cardinality) else: print("Invalid cardinality. Has to be within 1 to 10. Defaulting to 5.") else: print("Couldn't find 'cardinality' element for cluster") if cluster_json["density"]: density = int(cluster_json["density"]) if 0 < density <= 10: cluster.set_density(density) else: print("Invalid density. Has to be within 1 to 10. Defaulting to 5.") else: print("Couldn't find 'density' element for cluster") clusters.append(cluster) else: print("Couldn't find 'clusters' element in input file") return clusters
def getMYNews(): category = request.args.get('category') news = NewsPost.query().fetch() clusters = Cluster.query(Cluster.category == category).fetch() newNews = [] for n in news: newObject = NewsPostClient(url = n.url,host_page = n.host_page,title = n.title, description = n.description) newNews.append(newObject) #result = str(byteify(newNews[0].serialize())) result = '' return Response(result, mimetype='application/javascript')
def getFilteredClusters(): reqParam = request.args.get('wantedSources') listWanted = [] if None != reqParam: listWanted = reqParam.split(',') for i in range(0, len(listWanted)): listWanted[i] = int(listWanted[i]) category = request.args.get('category') clusters = Cluster.query(Cluster.category == category).fetch() clustersToReturn = [] for c in clusters: listNews = [] for np in c.listNews: if np.source_id in listWanted: listNews.append(np) if len(listNews) > 0: c.listNews = listNews clustersToReturn.append(c) # WE NEED TO SORT THE CLUSTERS BY SOME PARAMETER (MAYBE THE SIZE OF THE CLUSTER) obj = {'listClusters' : [c.serialize() for c in clustersToReturn]} result = json.dumps(obj, ensure_ascii=True) return Response(result, mimetype='text/plain')
def getClusters(): feedback = '' str = '' try: newsPosts = crawler.take_all_news_posts() # utility dicts for majority voting with naive bayes fileToRead = open(naivebayes_classification.str_dict_word_in_cat) dict_words = Unpickler(fileToRead).load() fileToRead.close() fileToRead = open(naivebayes_classification.str_dict_cat_count) dict_cats = Unpickler(fileToRead).load() fileToRead.close() fileToRead = open(naivebayes_classification.str_dict_priors) dict_priors = Unpickler(fileToRead).load() fileToRead.close() feedback += 'took the newsposts \n' #return Response('%d' % counter, mimetype='text/plain') clusters, innerfeedback = clustering.cluster_news(newsPosts) feedback += '%s\n' % innerfeedback feedback += 'done the clustering\n' i = 0 feedback += 'num of clusters: %d\n' % len(clusters) clusters = sorted(clusters, key=lambda x:-len(x.posts)) for c in clusters: feedback += 'getting posts from cluster\n' newsInCluster = c.posts feedback += 'got the posts from cluster\n' str += 'cluster %d\n' % i #implementing the majority voting votes_cat = {} for np in newsInCluster: str += ' \t %s\n' % np.title category = test_classifications.get_NB_category(np.words,dict_words, dict_cats, dict_priors) votes_cat[category] = 1 + votes_cat.get(category, 0) maxVotes = 0 maxCat = '' for cat in votes_cat: if votes_cat[cat] > maxVotes: maxVotes = votes_cat[cat] maxCat = cat feedback += '^^^ CLUSTER CATEGORY: %s with maxVotes: %d\n' % (maxCat, maxVotes) listNews = [] feedback += ' number of posts in cluster %d\n' % len(c.posts) for np in c.posts: feedback += 'trying to create NewsPostClient\n' feedback += 'title: %s \n' % np.title feedback += 'numWords: %d\n' % np.numWords feedback += 'url: %s\n' % np.url newNews = NewsPostClient(url = np.url, host_page = np.host_page, title = np.title, numWords = np.numWords, source_id = np.source_id, source_url = np.source_url, img_url = np.img_url, description = np.description) feedback += 'created NewsPostClient' listNews.append(newNews) feedback += 'appended newNews\n' newCluster = Cluster(category = maxCat, listNews = listNews) newCluster.put() str += '\n' i += 1 str += feedback except Exception as inst: feedback += 'Exception type: %s\n' % (type(inst)) feedback += 'Exception: %s\n' % (inst.message) str += feedback return Response(str, mimetype='text/plain')
def addCluster(self, r, x, y, v, theta, lambda0, color, is_point): self.clusters_list.append( Cluster(r, x, y, v, theta, lambda0, color, is_point))