Python WarehouseServer примеры использования

Язык программирования: Python

Пространство имен/Пакет: database.warehouse

Класс/Тип: WarehouseServer

Примеров на hotexamples.com: 15

Python WarehouseServer - 15 примеров найдено. Это лучшие примеры Python кода для database.warehouse.WarehouseServer, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

WarehouseServer(3)

get_all_documents(2)

get_documents_by_date(2)

get_authors(1)

get_document_authors(1)

get_document_by_id(1)

get_n_documents(1)

Пример #1

Показать файл

Файл: structures.py Проект: giorgosera/pythia-hackathon

 def get_authors(self):
     '''
     Returns the authors of the documents that appear in this cluster.
     '''
     ws = WarehouseServer()
     authors = set(ws.get_document_authors(self.document_dict.keys()))
     return list(authors)

Пример #2

Показать файл

 def get_authors(self):
     '''
     Returns the authors of the documents that appear in this cluster.
     '''
     ws = WarehouseServer()
     authors = set(ws.get_document_authors(self.document_dict.keys()))
     return list(authors)

Пример #3

Показать файл

Файл: user_classification_tests.py Проект: giorgosera/pythia-hackathon

    def test_author_classification_egypt_dataset(self):
        TestAuthor.drop_collection()    
        ws = WarehouseServer()      
        for author in [author for author in ws.get_authors(type=Author)]:
            if len(author.tweets) > 200:
                t = TestAuthor()
                t.screen_name = author.screen_name
                t.tweets = author.tweets
                t.save()
            
        
        authors = ws.get_authors(type=TestAuthor)
        for author in authors:
            print '-----------------------'
            print author.screen_name
            vector = author.update_feature_vector()
            print vector
        
        classifier = TreeClassifier()
        attributes = ["retweets", "links", "retweeted", "replies", "mentions", "ff-ratio", "class"]
        train_set = numpy.array([author.get_feature_vector_with_type() for author in TrainingAuthor.objects])

        classifier.train(train_set, attributes)
        
        for author in authors:
            prediction = "No prediction"
            if len(author.feature_vector) > 0:
                prediction = classifier.classify(author.get_feature_vector_with_type())
            print author.screen_name
            print prediction
            print '----------------------'
            
        TestAuthor.drop_collection()

Пример #4

Показать файл

Файл: outputfile.py Проект: aurora1625/pythia

def output_clusters_to_file(clusters, rownames, filename):
    '''
    DEPRECATED
    This method takes as input a set of clusters and generates 
    a very simplistic representation of these clusters in text form
    in a file. 
    '''
    
    ws = WarehouseServer()
    out = file(filename, 'w')
    out.write("Clustering results")
    out.write('\n')
    i = 0 
    for cluster in clusters:
        out.write('\n')
        out.write('***********************************************************')
        out.write('\n')
        out.write("Cluster"+str(i))
        out.write('\n')
        for document in cluster:
            out.write( ws.get_document_by_id(rownames[document]).content)
            out.write('\n')
        i += 1

Пример #5

Показать файл

def output_clusters_to_file(clusters, rownames, filename):
    '''
    DEPRECATED
    This method takes as input a set of clusters and generates 
    a very simplistic representation of these clusters in text form
    in a file. 
    '''

    ws = WarehouseServer()
    out = file(filename, 'w')
    out.write("Clustering results")
    out.write('\n')
    i = 0
    for cluster in clusters:
        out.write('\n')
        out.write(
            '***********************************************************')
        out.write('\n')
        out.write("Cluster" + str(i))
        out.write('\n')
        for document in cluster:
            out.write(ws.get_document_by_id(rownames[document]).content)
            out.write('\n')
        i += 1

Пример #6

Показать файл

Файл: index_tests.py Проект: aurora1625/pythia

# -*- coding: utf-8 -*-
'''
Created on 23 Jan 2012

@author: george

My playground!
'''
import unittest, os
from analysis.index import Index
from database.warehouse import WarehouseServer
from database.model.tweets import TwoGroupsTweet

BASE_PATH = os.path.expanduser("~/virtualenvfyp/pythia/data/")
index_path = BASE_PATH + "test_index"
ws = WarehouseServer()
sample_docs = ws.get_n_documents(100, type=TwoGroupsTweet)

index = Index(index_path)
for doc in sample_docs:
    index.add_document(doc)
index.finalize()

class TestPlayground(unittest.TestCase):
  
    def test_searching(self):        
        results = index.search_by_term("sales")
        
        calculated = []
        for doc in results:
            calculated.append(doc.get('id'))

Пример #7

Показать файл

Файл: dbscan_cluster_tests.py Проект: nihaofuyue0617/pythia

'''
Created on 26 Jan 2012

@author: george
'''
import unittest, datetime
from analysis.clustering.dbscan import DBSCANClusterer
from database.warehouse import WarehouseServer
from collections import OrderedDict

###########################################
# GLOBALS                                #
###########################################
ws = WarehouseServer()
epsilon = 2.0
min_pts = 2.0
points = []
points.append([1,1])
points.append([1.5,1])
points.append([1.8,1.5])
points.append([2.1,1])
points.append([3.1,2])
points.append([4.1,2])
points.append([5.1,2])
points.append([10,10])
points.append([11,10.5])
points.append([9.5,11])
points.append([9.9,11.4])
points.append([15.0, 17.0])
points.append([15.0, 17.0])
points.append([7.5, -5.0])

Пример #8

Показать файл

Файл: historical_egypt.py Проект: giorgosera/pythia-hackathon

'''
Created on 22 Jan 2012

@author: george
'''
import datetime, os
from crawlers.CrawlerFactory import CrawlerFactory
from database.model.tweets import EgyptTweet
from analysis.index import Index
from database.warehouse import WarehouseServer

BASE_PATH = os.path.expanduser("~/virtualenvfyp/pythia/data/")
ws = WarehouseServer()
index_path = os.path.join(BASE_PATH,"egypt_index")
if not os.path.exists(index_path):
    try:
        os.makedirs(index_path)
    except os.error:
        raise Exception(index_path + " could not be created.")  
    
#Save the tweets in the db
f = CrawlerFactory()
t = f.get_crawler("topsy")

search_hashtags = "#25jan OR #jan25 OR #egypt OR #tahrir OR #fuckmubarak OR #mubarak \
                   OR #suez OR #DownWithMubarak OR #NOSCAF OR #SCAF OR #cairo"
t.search_for(search_hashtags)
from_date=datetime.datetime(2011, 01, 27, 23, 55, 0)
to_date=datetime.datetime(2011, 01, 29, 0, 0, 0)
t.search_between(from_date=from_date, 
                 to_date=to_date,

Пример #9

Показать файл

Файл: evaluate_events.py Проект: giorgosera/pythia-hackathon

'''
Created on 21 Mar 2012

@author: george
'''

from database.warehouse import WarehouseServer
from database.model.tweets import EvaluationTweet
from analysis.clustering.kmeans import OrangeKmeansClusterer
from evaluation.evaluators import ClusteringEvaluator

ws = WarehouseServer()
documents = ws.get_all_documents(type=EvaluationTweet)

oc = OrangeKmeansClusterer(k=35, ngram=1)
ebe = ClusteringEvaluator(documents)
bcubed_precision, bcubed_recall, bcubed_f = ebe.evaluate(clusterer=oc)
print bcubed_precision, bcubed_recall, bcubed_f

Пример #10

Показать файл

Файл: training_authors.py Проект: nihaofuyue0617/pythia

@author: george
'''
import datetime
from crawlers.CrawlerFactory import CrawlerFactory
from database.model.tweets import *
from database.model.agents import *
from mongoengine import *
import tools.utils
from urlparse import urlparse
from database.warehouse import WarehouseServer

f = CrawlerFactory()
twitter = f.get_crawler("twitter")
#twitter.login()
ws = WarehouseServer()

from_date = datetime.datetime(2011, 1, 25, 0, 0, 0)
to_date = datetime.datetime(2011, 1, 26, 0, 00, 0)
items = ws.get_documents_by_date(from_date, to_date, limit=100)
screen_names = []
for tweet in items:
    screen_names.append(tweet.author_screen_name)
screen_names = set(screen_names)
print len(screen_names)
# A terrible hack to save the screen_names of users which are mentioned in tweets
# but they are not yet in the database. They'll be considered after all authors have
#been stored.
mentions_of_not_stored_users = []

for author_name in screen_names:

Пример #11

Показать файл

Файл: hackathontimeline.py Проект: giorgosera/pythia-hackathon

'''
Created on 24 Mar 2012

@author: george
'''

import datetime, unittest 
from database.warehouse import WarehouseServer
from analysis.clustering.kmeans import OrangeKmeansClusterer
from tools.utils import aggregate_data
from matplotlib.dates import num2date#!@UnresolvedImport
from visualizations.graphs import D3Timeline


ws = WarehouseServer()
from_date = datetime.datetime(2011, 1, 26, 0, 0, 0)
to_date = datetime.datetime(2011, 1, 27, 0, 0, 0) 
items = ws.get_documents_by_date(from_date, to_date, limit=3000)

oc = OrangeKmeansClusterer(k=100, ngram=1)
oc.add_documents(items)
oc.run("orange_clustering_test", pca=False)

top_clusters = []
for cluster in oc.clusters:
    documents = cluster.get_documents().values()
    if len(documents) == 0 : continue
    dates = [doc.date for doc in documents]
    delta = max(dates) - min(dates)
    delta_seconds = delta.total_seconds()
    if delta_seconds == 0: continue

Пример #12

Показать файл

# -*- coding: utf-8 -*-
'''
Created on 23 Jan 2012

@author: george

My playground!
'''
import unittest, os
from analysis.index import Index
from database.warehouse import WarehouseServer
from database.model.tweets import TwoGroupsTweet

BASE_PATH = os.path.expanduser("~/virtualenvfyp/pythia/data/")
index_path = BASE_PATH + "test_index"
ws = WarehouseServer()
sample_docs = ws.get_n_documents(100, type=TwoGroupsTweet)

index = Index(index_path)
for doc in sample_docs:
    index.add_document(doc)
index.finalize()


class TestPlayground(unittest.TestCase):
    def test_searching(self):
        results = index.search_by_term("sales")

        calculated = []
        for doc in results:
            calculated.append(doc.get('id'))

Пример #13

Показать файл

Файл: historical_egypt.py Проект: nihaofuyue0617/pythia

'''
Created on 22 Jan 2012

@author: george
'''
import datetime, os
from crawlers.CrawlerFactory import CrawlerFactory
from database.model.tweets import EgyptTweet
from analysis.index import Index
from mongoengine import *
from database.warehouse import WarehouseServer

BASE_PATH = os.path.expanduser("~/virtualenvfyp/pythia/data/")
ws = WarehouseServer()
index_path = os.path.join(BASE_PATH, "egypt_index")
if not os.path.exists(index_path):
    try:
        os.makedirs(index_path)
    except os.error:
        raise Exception(index_path + " could not be created.")

#Save the tweets in the db
f = CrawlerFactory()
t = f.get_crawler("topsy")

search_hashtags = "#25jan OR #jan25 OR #egypt OR #tahrir OR #fuckmubarak OR #mubarak \
                   OR #suez OR #DownWithMubarak OR #NOSCAF OR #SCAF OR #cairo"

t.search_for(search_hashtags)
##Last update ended at 2011-01-27 09:00:00
from_date = datetime.datetime(2011, 01, 24, 0, 0, 0)

Пример #14

Показать файл

Файл: annotate_events.py Проект: giorgosera/pythia-hackathon

'''
Created on 22 Mar 2012

@author: george

This script allow us to annotate known events with their labels
'''
import datetime
from database.warehouse import WarehouseServer
from mongoengine import connect
connect("pythia_db")
from evaluation.evaluators import AbstractEvaluator
ws = WarehouseServer()

from_date=datetime.datetime(2011, 01, 25, 12, 0, 0)
to_date=datetime.datetime(2011, 01, 25, 12, 5, 0)
tweet_list = ws.get_documents_by_date(from_date, to_date)
ce = AbstractEvaluator(tweet_list)
ce.annotate_dataset()

Пример #15

Показать файл

Файл: evaluate_authors.py Проект: giorgosera/pythia-hackathon

'''
Created on 21 Mar 2012

@author: george
'''
import numpy
from database.warehouse import WarehouseServer
from analysis.classification.tree import TreeClassifier
from database.model.agents import TrainingAuthor
from evaluation.evaluators import ClassificationEvaluator

ws = WarehouseServer()
authors = ws.get_all_documents(type=TrainingAuthor)
ce = ClassificationEvaluator(authors, ["Celebrity", "Media", "Journalists", "Activists", "Commoner"])
metrics = ce.evaluate(classifier=TreeClassifier(), K=10)
print metrics