Exemplo n.º 1
0
def populate_account_ids(path):
    print('\nPopulating empty account ids...')
    # connect to DB
    con = sqlite.connect(path)
    cur = con.cursor()

    # Query DB for list of all accounts
    cur.execute('SELECT id_str, screen_name FROM accounts;')
    accts = cur.fetchall()

#    print('\nAll {} Accounts Fetched:\n{}\n'.format(len(accts), accts)) # @DEBUG
    # filter for accounts without an id
    scr_names = [x[1] for x in accts if (x[0] is None or x[0] =='')]

#    print('\n{} empty screen names from DB:\n{}\n'.format(len(scr_names), scr_names)) # @DEBUG

    # fetch users from Twitter
    users = screen_name_lookup(scr_names)

    # get (id, screen name) for each user. Make screen names lower case
    updates = [[u.get('id_str'), u.get('screen_name')] for u in users]

    # Make screen names lower case, make tuples
    updates = [(x[0], x[1].lower()) for x in updates]

#    print('List of {} updates to make: \n{}'.format(len(updates), updates)) # @DEBUG
    # update database. use COLLATE NOCASE to ensure that match is case-insensitive
    sql = "UPDATE accounts SET id_str=? WHERE screen_name=? COLLATE NOCASE;"
    for ud in updates:
#        print('Updating DB with:\n    Query: {}\n    Data: {}'.format(sql, ud)) # @DEBUG
        cur.execute(sql, ud)
        con.commit()
    con.close()
Exemplo n.º 2
0
def initialize_db(path):
    print('Initializing database...')
    # Ensure foreign keys are turned on
    con = sqlite.connect(path)
    cur = con.cursor()
    cur.execute('PRAGMA foreign_keys = ON;')
    con.commit()
    con.close()
    
    # Ensure required tables are in place
    print('    Verifying tables...')
    base_sql = "CREATE TABLE IF NOT EXISTS {t} ({c});"
    # Use DB_TABLES_LIST to ensure proper order of table creation
    for table_name in DB_TABLES_LIST:
#        print(table_name) # @DEBUG
        # get spec for table
        spec = DB_SPEC.get(table_name, None)

        # Check whether columns are specified for the table
        # spec is not defined -- all columns are None
        if spec is None:
            primary_key = None
            indexes = None
            fields = None
            foreign_keys = None
        # spec is defined. Retrieve column specs from spec.
        else:
            primary_key = spec.get('primary_key', None)
            indexes = spec.get('indexes', None)
            fields = spec.get('fields', None)
            foreign_keys = spec.get('foreign_keys', None)

#        print('Primary key: ', primary_key) # @DEBUG
#        print('Indexes: ', str(indexes)) # @DEBUG
#        print('Fields: ', str(fields)) # @DEBUG
#        print('Foreign Keys: ', str(foreign_keys)) # @DEBUG
        
        # patterns for DB field types in spec
        primary_key_pattern    = '{f} {dt} PRIMARY KEY'
        index_pattern          = '{f} {dt} UNIQUE'
        field_pattern          = '{f} {dt}'
        foreign_keys_pattern   = 'FOREIGN KEY({f}) REFERENCES {rt}({rc})'
        mc_primary_key_pattern = 'PRIMARY KEY ({fs})'
        mc_index_pattern       = 'CREATE UNIQUE INDEX IF NOT EXISTS {n} ON {t} ({cols});'

        # initialize containers for SQL for columns
        sql_arr = []
        foreign_keys_sql_arr = []
        mc_primary_key_arr = []
        more_sql_arr = []

        # generate SQL for primary key, if specified (single value)
        if primary_key is not None:
            if len(primary_key) == 1:
#           	 print('Primary key: ', primary_key) # @DEBUG
            	# Get datatype, default to 'TEXT'
            	dtype = DB_DATATYPES.get(primary_key[0], 'TEXT')
            	
            	# Make everything in DB lower case
            	primary_key = primary_key[0].lower()
            	
            	# Format SQL and append to SQL container
            	sql_arr.append(primary_key_pattern.format(f=primary_key,
                                                          dt=dtype))
            elif len(primary_key) > 1:
                # primary key is multi-column
                fs = ','.join(primary_key)
                mc_primary_key_arr.append(
                    mc_primary_key_pattern.format(fs=fs)
                    )

        # generate SQL for indexes, if specified (possibly multiple)
        if indexes is not None:
            # loop through indexed columns
            for ind, fs in indexes.items():
                #print('index: ', ind)  # @DEBUG
                # check whether index refers to fields
                if fs is None:
                    # ordinary index
                    # Get datatype, default to 'TEXT'
                    dtype = DB_DATATYPES.get(ind, 'TEXT')

                    # Make everything lower case
                    ind = ind.lower()
                
                    # Format SQL, append to SQL container
                    sql_arr.append(index_pattern.format(f=ind, dt=dtype))
                else:
                    # index refers to existing column(s)
                    sql = mc_index_pattern.format(n=ind, t=table_name, cols=','.join(fs))
                    more_sql_arr.append(sql)
                    
        # Generate SQL for fields, if specified (possibly multiple)
        if fields is not None:
            # Loop through fields
            for field in fields:
#                print('field: ', field) # @DEBUG
                # Get datatype, default to 'TEXT'
                dtype = DB_DATATYPES.get(field, 'TEXT')

                # Lower case
                field = field.lower()

                # format SQL, append to container
                sql_arr.append(field_pattern.format(f=field, dt=dtype))
	
        # Generate SQL for foreign keys, if specified (possibly multiple)
        if foreign_keys is not None:
            # Loop though foreign keys
            for field, params in foreign_keys.items():
                # get referred table from column spec
                ref_table = params.get('refers_to_table', None)

                # get referred column from column spec
                ref_col = params.get('refers_to_column', None)
#                print(' '.join(['foreign key:', field, '->(',ref_table,',',ref_col,')']))  # @DEBUG
                # Get datatype, default to 'TEXT'
                dtype = DB_DATATYPES.get(field, 'TEXT')

                # lower case
                field = field.lower()
                ref_table = ref_table.lower()
                ref_col = ref_col.lower()
                
                # Format SQL, depending on whether referred to column was fully specified
                # In all cases, create the column
                sql_arr.append(field_pattern.format(f=field, dt=dtype))                

                # Referred-to column was fully specified --
                # Add SQL for foreign key
                if (ref_table is not None) and (ref_col is not None): # @TO-DO: should probably raise an error here
                    foreign_keys_sql_arr.append(foreign_keys_pattern.format(f=field,
                                                               rt=ref_table,
                                                               rc=ref_col))
                else:
                    # Referred to column not fully specified --
                    # do nothing more
                    pass
        # combine SQL for foreign keys with other fields
        sql_arr = sql_arr + foreign_keys_sql_arr
        
        # Join SQL specification of columns for this table
        cols_spec = ', '.join(sql_arr)

        # Create complete SQL from base SQL patter, columns SQL, and table name
        sql = base_sql.format(t=table_name, c=cols_spec)      

#        print('SQL: ', sql) # @DEBUG
        # Connect to DB
        con = sqlite.connect(path)
        cur = con.cursor()

        try:
#            print('executing SQL for ' + table_name) # @DEBUG
            cur.execute(sql)
            for stmt in more_sql_arr:
                cur.execute(stmt)
            con.commit()
        except Exception as e:
            print("Error ensuring existence of necessary table")
            print(sql)
            con.rollback()
            raise e
        finally:
#            print('closing DB connection') # @DEBUG
            con.close()
        con.close()

    #----------------------------------
    # Ensure Teams are populated in DB 
    #----------------------------------
    print('    Populating teams...')
    # Connect to DB
    con = sqlite.connect(path)
    cur = con.cursor()

    # Base SQL for adding team (if not in DB)
    team_sql = 'INSERT OR REPLACE INTO teams(team_name, league) VALUES(?, ?);'

    # Make a copy of ACCOUNTS
    acct_table = list(ACCOUNTS)

    # make everything in accounts table lower case
    acct_table = [[(None if s is None else s.lower()) for s in acc] for acc in acct_table]
#    print('{0}\nLower case account table:\n{1}\n{0}\n'.format('*'*70, acct_table)) # @DEBUG
            
    
    # Iterate over teams
    for t in acct_table:
        # Ensure account is official/team account
#        print(str(t)) # @DEBUG
        if t[INDEX_TYPE] == 'official':
        # Get team name and cluster (league) for each team (lower case)
            name = t[INDEX_NAME]
            if name is not None:
                name=name.lower()
            cluster = t[INDEX_CLUSTER]
            if cluster is not None:
                cluster = cluster.lower()
                
            try:
                # Execute SQL with team name and cluster (league)
                cur.execute(team_sql, (name, cluster))
#                print('added team = {t}, cluster = {c}'.format(t=name, c=cluster)) # @DEBUG
            except Exception as e:
                # Print name and cluster that cause exception
                print('handling {t}, {c}'.format(t=name, c=cluster))
                raise

    # Commit all team additions to the DB
    con.commit()

    print('    Verifying accounts table...')
    # Base SQL for adding accounts to DB
    account_sql = 'INSERT OR REPLACE INTO accounts(id_str, cluster, type, screen_name, team_name, description) VALUES(?, ?, ?, ?, ?, ?);'

    # fill in missing ids
    ## collect missing accounts
    print('        Collecting account without IDs...')
    missing = []
    for acc in acct_table:
        if acc[INDEX_ID] is None:
            missing.append(acc[INDEX_SCREEN_NAME])

    # Query for screen names
    print('        Querying Twitter for missing IDs...')
    missing_users = screen_name_lookup(missing)
    missing_accts = {u.get('screen_name'):u.get('id_str') for u in missing_users}
    missing_reverse = {val:key for key, val in missing_accts.items()}

    # put results from twitter into the original table
    for acc in acct_table:
        if acc[INDEX_SCREEN_NAME] in missing_accts.keys():
            acc[INDEX_ID] = missing_accts.get(acc[INDEX_SCREEN_NAME])
#            print('{}\n>>> {} is in missing_accts.keys as {}\n{}'.format('*'*70,
#                                                                         acc[INDEX_SCREEN_NAME],
#                                                                         missing_reverse.get(acc[INDEX_ID]),
#                                                                         '*'*70)) #@DEBUG

#    print('{}\nAccounts filled with Twitter data:\n{}'.format('*'*70,'*'*70)) # @DEBUG
#    for acc in acct_table: # @DEBUG
#        print(acc)
#    print('*'*70 + '\n\n')
    
    # iterate over accounts
    print('        Inserting missing accounts...')
    for acc in acct_table:
        # Get data from settings for account
        id_str = acc[INDEX_ID]
        cluster = acc[INDEX_CLUSTER]
        acc_type = acc[INDEX_TYPE]
        screen_name = acc[INDEX_SCREEN_NAME]
        name = acc[INDEX_NAME]
        description = acc[INDEX_DESCRIPTION]
        try:
            # Execute SQL to add account to DB
            cur.execute(account_sql, (id_str, cluster, acc_type, screen_name, name, description))
#            print('Added account: id={i}, cluster={c}, type={at}, screen_name={sn}, team={tn}, description={d}'.format(i=id_str, c=cluster, at=acc_type, sn=screen_name, tn=name, d=description)) # @DEBUG
        except Exception as e:
            # print data that caused exception
            print('handling: ')
            print('    id_str: ', id_str)
            print('    cluster: ', cluster)
            print('    type: ', acc_type)
            print('    screen_name:', screen_name)
            print('    name: ', name)
            print('    description: ', description)
            raise

    # Commit all account additions to DB
    con.commit()
    con.close()

    populate_account_ids(path)

    create_views(path)
Exemplo n.º 3
0
    # connect to DB
    con = sqlite.connect(DB_PATH)
    cur = con.cursor()

    # Query DB for list of accounts
    cur.execute('SELECT id_str, screen_name FROM accounts;')

    accounts = cur.fetchall()
    accounts = [(x[0], x[1]) for x in accounts]

    # check for empty accounts
    for acc in accounts:
        if acc[0] is None:
            print('\n' + ('*'*70))
            print('Missing screen_name in accounts')
            acc_list = screen_name_lookup([acc[1]])
            acc_id = acc_list[0].get('id_str', None)

            # update DB with account id
            d = (acc_id, acc[1])
            sql = 'UPDATE accounts SET id_str=? WHERE screen_name=?'
            print('Updating database with:')
            print('    SQL: ', sql)
            print('    data:', d)
            cur.execute(sql, d)
            con.commit()

    # Get list of account ids from DB
    cur.execute('SELECT id_str FROM accounts;')
    account_ids = cur.fetchall()
    account_ids = [x[0] for x in account_ids]