示例#1
0
    def get_rooted_network(cls,
                           root_user,
                           postgres_handle,
                           go_back_this_many_weeks=2,
                           start_here='now',
                           distance=100):
        print 'Loading network in memory!'
        from collections import OrderedDict
        network = OrderedDict()
        network[root_user.id] = set(root_user.following_ids)
        if start_here == 'now':
            start_here = datetime.now()
        start_w_this_date = start_here - timedelta(
            days=go_back_this_many_weeks * 7)
        year_weeknum_strs = time_utils.year_weeknum_strs(
            start_w_this_date, go_back_this_many_weeks + 1)

        #see these:
        # - http://www.postgresql.org/docs/current/static/queries-with.html
        # - http://archives.postgresql.org/pgsql-novice/2009-01/msg00092.php
        qry = """
        WITH only_these_ids as (
            select id from unnest(%s) as id
        )
        select u.id, f.following_ids
        from twitter_user u
        join twitter_user_following_%s f on u.id = f.twitter_user_id 
        join only_these_ids on only_these_ids.id = u.id
        where u.followers_count > 10 
        ;
        """
        following_following_ids = cls.get_following_following_ids(
            root_user, distance=distance)
        params = {'following_following_ids': following_following_ids}
        for year_weeknum in year_weeknum_strs:
            print 'Starting %s query!' % year_weeknum
            results = postgres_handle.execute_query(
                qry % ('%(following_following_ids)s', year_weeknum), params)
            print 'Done w/ %s query!' % year_weeknum
            for result in results:
                if result['id'] not in network:
                    network[result['id']] = set(result['following_ids'])
        return network
示例#2
0
    def get_rooted_network(cls, root_user, postgres_handle, go_back_this_many_weeks=2, 
            start_here='now', distance=100):
        print 'Loading network in memory!'
        from collections import OrderedDict
        network = OrderedDict()
        network[root_user.id] = set(root_user.following_ids)
        if start_here == 'now':
            start_here = datetime.now()
        start_w_this_date = start_here - timedelta(days=go_back_this_many_weeks * 7)
        year_weeknum_strs = time_utils.year_weeknum_strs(start_w_this_date, go_back_this_many_weeks + 1)

        #see these:
        # - http://www.postgresql.org/docs/current/static/queries-with.html
        # - http://archives.postgresql.org/pgsql-novice/2009-01/msg00092.php
        qry = """
        WITH only_these_ids as (
            select id from unnest(%s) as id
        )
        select u.id, f.following_ids
        from twitter_user u
        join twitter_user_following_%s f on u.id = f.twitter_user_id 
        join only_these_ids on only_these_ids.id = u.id
        where u.followers_count > 10 
        ;
        """
        following_following_ids = cls.get_following_following_ids(root_user, distance=distance)
        params = {'following_following_ids':following_following_ids}
        for year_weeknum in year_weeknum_strs:
            print 'Starting %s query!' % year_weeknum
            results = postgres_handle.execute_query(qry % ('%(following_following_ids)s', 
                year_weeknum), params)
            print 'Done w/ %s query!' % year_weeknum
            for result in results:
                if result['id'] not in network:
                    network[result['id']] = set(result['following_ids'])
        return network
示例#3
0
################################################
##twitter_user_following
################################################
twitter_user_following = """
create table twitter_user_following_%(postfix)s(
    createddate timestamp not null default now(),
    modifieddate timestamp not null default now(),
    
    twitter_user_id text unique not null references twitter_user(id),
    following_ids text[] not null
);
CREATE TRIGGER twitter_user_following_modified_%(postfix)s BEFORE UPDATE
ON twitter_user_following_%(postfix)s FOR EACH ROW
EXECUTE PROCEDURE ts_modifieddate();
""" 
for year_week_st in time_utils.year_weeknum_strs(datetime.now(), 50):
    postgres_handle.execute_query(twitter_user_following % {'postfix':year_week_st}, return_results=False)
    postgres_handle.connection.commit()

################################################
##twitter_reduction
################################################    
twitter_reduction = """
create table twitter_reduction(
    createddate timestamp not null default now(),
    modifieddate timestamp not null default now(),
    
    id serial unique,
    root_user_id text not null references twitter_user(id),
    user_ids text[] not null,
    x_coordinates real[] not null,
示例#4
0
from datetime import datetime, timedelta
import psycopg2
from smarttypes.utils.postgres_handle import PostgresHandle
postgres_handle = PostgresHandle(smarttypes.connection_string)


################################################
##get rid of old connections
##we have db dumps, so we do have an archive 
##if ever needed
################################################

retention_days = 30 * 4 #about 4 months
delete_before_this_date = datetime.now() - timedelta(days=retention_days)

#delete users
sql = """
delete from twitter_user 
where last_loaded_following_ids < %(delete_before_this_date)s;"""
#print sql % {'delete_before_this_date':delete_before_this_date}
postgres_handle.execute_query(sql, {'delete_before_this_date':delete_before_this_date}, return_results=False)
postgres_handle.connection.commit()

#drop tables
sql = """drop table twitter_user_following_%(postfix)s;""" 
for year_week_st in time_utils.year_weeknum_strs(delete_before_this_date - timedelta(days=7), 20, forward=False):
	#print sql % {'postfix':year_week_st}
    postgres_handle.execute_query(sql % {'postfix':year_week_st}, return_results=False)
    postgres_handle.connection.commit()
示例#5
0
################################################
##get rid of old connections
##we have db dumps, so we do have an archive
##if ever needed
################################################

retention_days = 30 * 4  #about 4 months
delete_before_this_date = datetime.now() - timedelta(days=retention_days)

#delete users
sql = """
delete from twitter_user 
where last_loaded_following_ids < %(delete_before_this_date)s;"""
#print sql % {'delete_before_this_date':delete_before_this_date}
postgres_handle.execute_query(
    sql, {'delete_before_this_date': delete_before_this_date},
    return_results=False)
postgres_handle.connection.commit()

#drop tables
sql = """drop table twitter_user_following_%(postfix)s;"""
for year_week_st in time_utils.year_weeknum_strs(delete_before_this_date -
                                                 timedelta(days=7),
                                                 20,
                                                 forward=False):
    #print sql % {'postfix':year_week_st}
    postgres_handle.execute_query(sql % {'postfix': year_week_st},
                                  return_results=False)
    postgres_handle.connection.commit()