Exemplo n.º 1
0
from retrieval import article
import sys

__author__="Dan"
__date__ ="$16-Mar-2010 21:40:24$"

if __name__ == "__main__":
    todays_date = datetime.datetime.today()
    two_week_difference = datetime.timedelta(days=-14)
    week_difference = datetime.timedelta(days=-7)
    two_day_difference = datetime.timedelta(days=-3)
    week_ago_date_mysql = (todays_date + week_difference).strftime("%Y-%m-%d")
    days_ago_date_mysql = (todays_date + two_day_difference).strftime("%Y-%m-%d")
    
    article_list = article.db_get_all_articles("NOT isnull(clusterid) AND EXISTS (SELECT articleid FROM articleswithterms WHERE articleid = articles.id) AND `datepublished` >= '%(weekago)s'" % {"weekago" : week_ago_date_mysql})
    models,inv_index = articleunigram.db_load_models(article_list)

    mergers_query = "SELECT * FROM proposedmergers WHERE `approved` = 1"
    db = database.connect_to_database()
    cur = db.cursor(cursorclass = MySQLdb.cursors.DictCursor)
    clusterer = ClusterMaker(inv_index = inv_index)
    cur.execute(mergers_query)
    result = cur.fetchall()
    proposed_mergers = dict()
    for row in result:
        clusters_query = "SELECT `clusterid` FROM proposedmergers_clusters WHERE `mergerid` = %d" % (row['id'])
        cur.execute(clusters_query)
        clusters = cur.fetchall()
        proposed_mergers[row['id']] = [clustermodel.ClusterModel(inv_index = inv_index,id = cl['clusterid'],load_full_article = True) for cl in clusters]

    for p in proposed_mergers.items():
Exemplo n.º 2
0
            print "Setting cluster type to " + str(self.cluster_type)
    
    def get_cluster_info(self,e = None):
        if e:
            self.selected_cluster = int(self.lb_clusters.GetSelection(int(e.GetSelection())))
            print str(self.selected_cluster)
    
    def start_clustering(self,e = None):
        if e:
            clusterer = clustering.ClusterMaker(cluster_type = self.cluster_type)
            cluster_thread = clustering.ClustererThread(clusterer,self.models,self.inv_index,float(self.tc_threshold.GetValue()),self.cluster_method,on_change=self.update)
            cluster_thread.start()
            # clusterer.cluster_articles(self.models,self.inv_index,float(self.tc_threshold.GetValue()),self.cluster_method,on_change=self.update)
            # clusterer.run(self.models,self.inv_index,float(self.tc_threshold.GetValue()),self.cluster_method,on_change=self.update,name = "whatever")
            
    def update(self,clusters=None,new_cluster=None,remove_model=None):
        if clusters:
            self.tc_num_clusters.SetValue(str(len(clusters)))
            self.tc_num_clusters.Update()
            self.clusters = clusters
        if new_cluster:
            self.lb_clusters.AppendAndEnsureVisible(str(new_cluster))
            self.lb_clusters.Update()

theVar = 1

app = wx.App(False)
man_articles = article.db_get_all_articles("NOT trainingcluster = 0")
models,inv_index = articleunigram.db_load_models(man_articles)
frame = ClusterMonitor(None,'Article Browser',models,inv_index)
app.MainLoop()