def load_clusters(): """Load info from topics.txt file into Cluster, TermCluster tables""" # Delete whatever's in the db already Cluster.query.delete() TermCluster.query.delete() count_clusters = 0 for row in open("topics.csv"): row = row.rstrip().split(",") # Parse the txt into the appropriate data types for seeding cluster = int(row[1][-3:]) word = row[3].strip() # Check if word is in our list of key terms. If it is, add to # TermCluster table to allow for lookup later (see seed.py for TODO) if Term.check_for_term(word) is True: term_cluster_to_add = TermCluster(word=word, cluster_id=cluster) db.session.add(term_cluster_to_add) db.session.commit() # Check if a cluster is in our list of clusters. If it's not, add it. if Cluster.check_for_cluster(cluster) is False: cluster_to_add = Cluster(cluster_id=cluster) db.session.add(cluster_to_add) db.session.commit() # Print where we are and increment counter print "Topics.txt seeding row", count_clusters count_clusters += 1
def computeNeighborhood(p, setPoints, typeDistance, minPoints, eps): pointsOfCluster = [] for q in setPoints: if typeDistance == 0: if p.is_in_neighborhoodByEUSimple(q, eps): pointsOfCluster.append(q) elif typeDistance == 1: if p.is_in_neighborhoodEURelativeSpeed(q, eps): pointsOfCluster.append(q) if len(pointsOfCluster) < minPoints: return None else: return Cluster(None, pointsOfCluster)
def load_clusters(): """Load info from topics.txt file into Cluster, TermCluster tables File format: R row id,Topic XXX,R column ID,word where XXX represents a number between 0-400 R ids can be discarded during seeding Source: topic clustering data from Neurosynth, converted to long format in R prior to seeding. Notes: the words tracked in this clustering are not in perfect alignment with those tracked in studies_terms.txt. Approximately 2000 of the terms in studies_terms have a topical cluster, the remaining ~1000 do not. This number could be improved by stemming. Many of the words not tracked in clusters are multi-word phrases.""" # Delete whatever's in the db already Cluster.query.delete() TermCluster.query.delete() count_clusters = 0 topics_fileobj = open('seed_data/topics.csv') for row in topics_fileobj: row = row.rstrip().split(',') # Parse the txt into the appropriate data types for seeding cluster = int(row[1][-3:]) word = row[3].strip() # Check if word is in our list of key terms. If it is, add to # TermCluster table to allow for lookup later (see model.py for TODO) if Term.check_for_term(word) is True: term_cluster_to_add = TermCluster(word=word, cluster_id=cluster) db.session.add(term_cluster_to_add) db.session.commit() # Check if a cluster is in our list of clusters. If it's not, add it. if Cluster.check_for_cluster(cluster) is False: cluster_to_add = Cluster(cluster_id=cluster) db.session.add(cluster_to_add) db.session.commit() # Print where we are and increment counter print "Topics.txt seeding row", count_clusters count_clusters += 1 topics_fileobj.close()
def dbscan(setPoints, eps, minPoints): clusters = [] noises = [] IDs = [] classifications = {} for pt in setPoints: IDs.append(pt.id) classifications[pt.id] = 0 for pt in setPoints: if classifications[pt.id] == 0: points = _expand_cluster(setPoints, pt, classifications, noises, eps, minPoints) if points: clusters.append(Cluster(None, points)) # s="" # for p in noises: # s+=p.toString()+' ' # print(s) # s="" # for c in clusters: # s += c.toString()+'\n' # print(s) return [clusters, noises, IDs]
import sys sys.path.append("../src") from simulator import Simulator from model import Cluster #import stacktracer #stacktracer.trace_start("trace.html",interval=5,auto=True) # Set auto flag to always update file! model = Cluster(2) sim = Simulator(model) sim.setVerbose(None) #sim.setTerminationTime(10.0) sim.setStateSaving("custom") sim.simulate()