示例#1
0
def get_paths ():
    """
    PURPOSE: Determine the path names of critical directories.
    Create these paths if they do not already exist
    """
    # Get the directory containing this initialize.py script.
    import os
    dir_root = os.path.abspath(__file__ + "/../../")

    # Get the path names of the directories the script will work with.
    dir_detailed = dir_root + '/data/detailed' # Short or long
    dir_input = dir_root + '/data/input-short' # Short version
    dir_output = dir_root + '/data/output-short' # Short version
    if os.environ ['BSF_LENGTH'] == 'long': # Long version
      dir_input = dir_root + '/data/input-long'
      dir_output = dir_root + '/data/output-long'
      
    # Get the path name of the public directory in the rails app
    dir_home = os.environ ['HOME']
    dir_rails_public = dir_home + '/bsf/public'
    if os.environ ['BSF_ENV'] == 'production':
        dir_rails_public = '/home/doppler/webapps/bsf/current/public'
    os.environ ['BSF_RAILS_PUBLIC'] = dir_rails_public

    # Print the names of the directories
    print "Input data directory: " + dir_input
    print "Output data directory: " + dir_output
    print "Directory for detailed fund data: " + dir_detailed
    print "Directory containing 'public' files in Rails: " + dir_rails_public

    # Create the directories if they do not already exist.
    from modules import common
    common.create_dir (dir_input)
    common.create_dir (dir_output)
    common.create_dir (dir_detailed)

    # Save results in environmental variables
    os.environ ['BSF_ROOT'] = dir_root
    os.environ ['BSF_INPUT'] = dir_input
    os.environ ['BSF_OUTPUT'] = dir_output
    os.environ ['BSF_DETAILED'] = dir_detailed
    os.environ ['BSF_RAILS_PUBLIC'] = dir_rails_public
示例#2
0
def download ():
    """
    Download detailed data on funds from Yahoo Finance
    """
    import time, os
    import psycopg2.extras
    from modules import db 
    from modules import common
    
    dir_output = os.environ ['BSF_OUTPUT']
    file_csv_output = dir_output + '/fund_unfiltered.csv'
    conn = db.connect (60) # Start database connection
    cur2 = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
    print "Creating unfiltered list of funds at:"
    print file_csv_output
    db.print_csv (conn, cur2, file_csv_output)
    conn.close () # Close database connection
    
    print "Filtering the list of funds"
    conn = db.connect (60) # Start database connection
    cur1 = conn.cursor ()
    
    
    # Filter the list of funds
    db.filter_by_fundtype (conn, cur1)
    db.filter_by_obj (conn, cur1)
    db.filter_by_name (conn, cur1)
    
    db.renumber (conn, cur1) # Reset ID numbers, DOES NOT WORK
    
    conn.close () # Close database connection
    
    print "************************************************"
    print "Downloading the detailed data on all stock funds"
    print "NOTE: This may be a VERY long process."
    
    # Get list of symbols
    conn = db.connect (60) # Start database connection
    cur2 = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
    list_symbols = db.get_symbols (cur2)
    conn.close () # Close database connection
    
    i = 0 # Number of funds completed
    i_max = len (list_symbols) # Total number of funds
    start = time.time ()
    
    for symbol in list_symbols:
        
        dir_detailed = os.environ ['BSF_DETAILED']
        common.create_dir (dir_detailed + '/' + symbol)
		
        url1 = 'http://finance.yahoo.com/q/pr?s=' + symbol + '+Profile' 
        url2 = 'http://finance.yahoo.com/q/hl?s=' + symbol + '+Holdings'
        url3 = 'http://finance.yahoo.com/d/quotes.csv?s=' + symbol + '&f=l1'
		
        file1 = dir_detailed + '/' + symbol + '/profile.html'
        file2 = dir_detailed + '/' + symbol + '/holdings.html'
        file3 = dir_detailed + '/' + symbol + '/quote.csv'
        
        common.download_file (url1, file1, 164, .2)
        common.download_file (url2, file2, 164, .2)
        common.download_file (url3, file3, 20, .002)
        
        i += 1
        now = time.time ()
        t_elapsed = now - start
        try:
            rate_s = i / t_elapsed # Stocks/second
            remain_s = (i_max - i)/rate_s
            remain_m = round(remain_s/60, 1)
            if i == 10 or i % 100 == 0:
                print "Download completion: " + str(i) + '/' + str(i_max)
                print "Minutes remaining: " + str(remain_m)
        except:
            pass
    print "Finished downloading detailed data on stock funds"
    print "*************************************************"