示例#1
0
def command():
    """Run the program.
    
    Command line parse and dispatch.
    """
    args = command_line_args()

    # Run GUI
    if not args.import_csv:
        return main()

    # An empty string is a valid SQLAlchemy non-default value for the database name.
    non_default_database = args.database == '' or args.database

    if args.verbosity >= 1:
        print(f"Running {__file__}")
        print(f'Loading movies from {args.import_csv}')
        if non_default_database:
            print(f"Adding movies to database '{args.database}'.")
        else:
            print("Adding movies to the default database.")

    if non_default_database:
        database.connect_to_database(args.database)
    else:
        database.connect_to_database()

    try:
        impexp.import_movies(args.import_csv)
    except impexp.MoviedbInvalidImportData as exc:
        print(exc)
示例#2
0
def start_up():
    """Initialize the program."""
    # moviedb-#84 Load the config object
    # Start the logger.
    root_dir, program_name = os.path.split(__file__)
    program, _ = program_name.split('.')
    start_logger(root_dir, program)

    # Initialize application configuration data.
    config.app = config.Config(program, VERSION)

    # Open the default database
    database.connect_to_database()
示例#3
0
def loaded_database(hamlet, solaris, dreams, revanche):
    """Provide a loaded database."""
    database.connect_to_database(filename=':memory:')
    # noinspection PyProtectedMember
    movies = [database.Movie(**movie) for movie in (hamlet, solaris, dreams, revanche)]
    # noinspection PyProtectedMember
    with database._session_scope() as session:
        session.add_all(movies)
    database.add_tag('blue')
    database.add_movie_tag_link('blue', database.MovieKeyTypedDict(title='Hamlet', year=1996))
    database.add_tag('yellow')
    database.add_movie_tag_link('yellow', database.MovieKeyTypedDict(title='Revanche', year=2008))
    database.add_tag('green')
    database.add_movie_tag_link('green', database.MovieKeyTypedDict(title='Revanche', year=2008))
    database.add_movie_tag_link('green', database.MovieKeyTypedDict(title='Solaris', year=1972))
示例#4
0
def get_location_distribution_one_brand(table, brand):
    """ Calculate the distribution of a car brand based on sale locations.

    The method takes as input the name of a table in the database and a car
    brand. It calculates the distribution based on locations and returns a
    Pandas Series with the distribution and the locations as index.
    """
    # get the rows from the table with the specified brand
    cur, con = database.connect_to_database()

    # get all the different locations present in the specified table
    locations = database.get_locations(table)

    # calculate the distribution
    index_names = []
    distribution = []
    for index, location in locations.iterrows():
        # find all the rows with the specified brand for each location
        query = "SELECT * FROM " + table + " WHERE Model LIKE '%%%" \
            + brand + "%%%' AND Location LIKE '%%%" \
            + location.location + "%%%';"
        result = pandas.read_sql_query(query, con)
        index_names.append(smart_str(location.location))
        distribution.append(len(result))

    location_distribution = pandas.Series(distribution, index=index_names)
    return location_distribution
示例#5
0
def get_location_distribution_all_brands(table):
    """ Calculate the distribution of all brands based on sale locations.

    The method calculates the distribution of all car brands in the specified
    table. A pandas Series with the distribution and the locations as index
    is returned.
    """
    # get all the different locations present in the specified table
    locations = database.get_locations(table)

    # calculate the distribution
    index_names = []
    distribution = []
    for index, location in locations.iterrows():
        # find all the for each location
        cur, con = database.connect_to_database()
        query = "SELECT * FROM " + table + " WHERE Location LIKE '%%%" \
            + location.location + "%%%';"
        result = pandas.read_sql_query(query, con)

        index_names.append(smart_str(location.location))
        distribution.append(len(result))

    location_distribution = pandas.Series(distribution, index=index_names)
    return location_distribution
示例#6
0
def get_distribution_one_brand(tablename, brand):
    """ Calculate a frequency distribution of a particular car brand.

    The method takes as input the name of a table in the database, and
    a car brand, and calculates a frequency distribution of the models
    for that brand.
    Returns a panda dataframe of the distribution with model-names in
    the first column, the number a model appears in the table in the
    second column, and the corresponding percentage in the last column.
    """
    # get the rows with the specified brand
    cur, con = database.connect_to_database()
    query = "SELECT * FROM " + tablename + \
        " WHERE Model LIKE '%%%" + brand + "%%%' AND CARTYPES LIKE 'STATIONCAR'"
    result = pandas.read_sql_query(query, con)

    #get the model column and number of models
    models = result.Model
    n_models = len(models)
    if n_models == 0:  # make sure it isn't empty
        return []

    # dont distinguish models on stuff like 5doors or 4doors, and engine size
    models = simplify_model_names(models)
    columns = ['Model', 'Number', 'Percentage']
    return create_distribution(models, columns, n_models)
示例#7
0
def create_car_brand_table(conn):
    """ The method creates a database table of car brands.

    It crawls bilbasen.dk for a list of car brands, and creates
    and stores these brands in a table called 'Brands' in the
    database. The method is called each time the method
    download_data_to_database is called.
    """
    conn.request("GET", "/")
    res = conn.getresponse()
    content = res.read()
    parsed_html = BeautifulSoup(content, from_encoding='utf8')
    cur, con = database.connect_to_database()

    # delete the old table
    if database.check_if_table_exist(cur, 'Brands'):
        database.delete_table(cur, 'Brands')
    cur.execute("CREATE TABLE Brands(Brand CHAR(100))")

    # extract car brands and insert to database table
    brands_tags = parsed_html.find("optgroup", {"label": "Alle mærker"})
    for child in brands_tags.children:
        brand = smart_str(child.string)
        if any(c.isalpha() for c in brand):
            command = "INSERT INTO Brands(Brand) VALUES('%s')" \
                % smart_str(brand)
            cur.execute(command)
    database.commit(cur, con)
示例#8
0
def handle_tweet(tweet_object, n_tweets):
    tweet, user = convert_tweet_object(tweet_object)

    if not test_for_retweet(tweet):
        output = create_output(tweet, user)
        # Connect to database
        db_connection = connect_to_database('tweets01')
        insert_single(db_connection, 'tweets', output)
示例#9
0
def main(rootdir, name, blacklist=None):
    try:
        conn = connect_to_database()
        setup_tables(conn)
        device_id = add_device(conn, (name, ))
        index_files(conn, rootdir, device_id, blacklist=blacklist)
        conn.commit()
    finally:
        conn.close()
    def test_check_if_table_exist(self):
        """ Test the check_if_table_exist function of the database module. """
        cur, con = database.connect_to_database()
        # test that false is returned when checking for a non-existing table
        result = database.check_if_table_exist(cur, "test")
        self.assertFalse(result)

        # test that the fake table exist
        result = database.check_if_table_exist(cur, "AllCars010115")
        self.assertTrue(result)
 def test_delete_table(self):
 	  """ Test the delete_table function of the database moule. """
 	  cur, con = database.connect_to_database()
 	  # delete the test table
 	  database.delete_table(cur, 'AllCars010115')
 	  # now check that it is gone
 	  result = database.check_if_table_exist(cur, 'AllCars010115')
 	  self.assertFalse(result)
 	  # create the table again for the rest of the tests
 	  self.setUp()
示例#12
0
def get_most_expensive_cars(table):
    """ Find the most expensive car(s) in the given table.

    The result is returned as a pandas DataFrame containing the
    entire database row of that car.
    """
    cur, con = database.connect_to_database()
    query = "SELECT t.* FROM " + table + " t WHERE t.price = \
        (select max(subt.price) from " + table + " subt);"
    return pandas.read_sql_query(query, con)
示例#13
0
文件: lsa.py 项目: ochik100/Hops
    def singular_value_decomposition(self, n_components):
        # mat = RowMatrix(self.spark_context.parallelize(np.asarray(self.tfidf.select(
        #     'id', 'features').rdd.map(lambda row: row[1].toArray()).collect()).T))

        rdd = self.tfidf.select(
            'id', 'features').rdd.map(lambda row: row[1].toArray())
        rdd.persist(ps.StorageLevel.MEMORY_AND_DISK)
        mat = RowMatrix(self.rdd_transpose(rdd))

        # rdd_ = self.tfidf.select(
        #     'id', 'features').rdd
        # rdd_ = rdd_.map(lambda row: row[1].toArray())
        svd = computeSVD(mat, n_components, computeU=True)
        print svd.U.numCols(), svd.U.numRows()
        print type(svd.V)
        self.vt = svd.V
        self.similarity_matrix = cosine_similarity(svd.V.toArray())

        self.five_most_similar_beers = self.sql_context.createDataFrame(
            map(lambda x: np.argsort(x)[::-1][:6].tolist(),
                self.similarity_matrix),
            ['id', 'first', 'second', 'third', 'fourth', 'fifth'])

        self.tfidf = self.tfidf.join(self.five_most_similar_beers, ['id'],
                                     'inner')

        self.token, self.db = database.connect_to_database()

        # use default arguments to avoid closure of the environment of the token and db variables
        def save_to_firebase(x, token=self.token, db=self.db):
            data = {
                'brewery_name': x.brewery_name,
                'beer_name': x.beer_name,
                'state': x.state,
                'beer_style': x.beer_style,
                'first': x.first,
                'second': x.second,
                'third': x.third,
                'fourth': x.fourth,
                'fifth': x.fifth,
                'top1': x.top1,
                'top2': x.top2,
                'top3': x.top3,
                'top4': x.top4,
                'top5': x.top5,
                'top6': x.top6,
                'top7': x.top7
            }
            # name = {'brewery_name': x.brewery_name, 'beer_name': x.beer_name}
            db.child('beers').child(x.id).set(data, token)
            # db.child('beer_names').child(x.id).set(name, token)
            # sleep.(0.1)

        self.tfidf.rdd.foreach(lambda x: save_to_firebase(x))
示例#14
0
def get_fastest_cars(table):
    """ Find the fastest car(s) in the given table.

    The method finds the car(s) that goes fastest from 0-100 km/t).
    The result is returned as a pandas DataFrame containing the
    entire database row of that car.
    """
    cur, con = database.connect_to_database()
    query = "SELECT t.* FROM " + table + " t WHERE t.kmt = \
        (select min(subt.kmt) from " + table + " subt where \
        (subt.kmt > 0));"
    return pandas.read_sql_query(query, con)
示例#15
0
def test_init_database_access_with_existing_database(tmpdir):
    """Attach to an existing database and check tha date_last_accessed is today."""

    # Slow test: 5% chance of this test being run.
    if random.randint(0, 99) > 5:
        return
    
    path = tmpdir.mkdir('tmpdir').join(database.database_fn)
    database.connect_to_database(filename=path)
    
    # Reattach to the same database
    database.connect_to_database(filename=path)
    
    with database._session_scope() as session:
        current = (session.query(database.MoviesMetaData.value)
                   .filter(database.MoviesMetaData.name == 'date_last_accessed')
                   .one())
        previous = (session.query(database.MoviesMetaData.value)
                    .filter(database.MoviesMetaData.name == 'date_created')
                    .one())
    assert current != previous
示例#16
0
def get_most_ecofriendly_cars(table):
    """ Find the most eco friendly car(s) in a given table.

    The car(s) that goes most KMs on a liter petrol.
    The result is returned as a pandas DataFrame containing the
    entire database row of that car.
    """
    cur, con = database.connect_to_database()
    query = "SELECT t.* FROM " + table + " t WHERE t.kml = \
        (select max(subt.kml) from " + table + " subt where \
        (subt.kml > 0));"
    return pandas.read_sql_query(query, con)
示例#17
0
def get_cheapest_cars(table):
    """ Find the cheapest car(s) in the given table.

    The method finds the cheapest car in the given table, that is
    not 0DKK and is not a leasing car. The result is returned as
    a pandas DataFrame containing the entire database row of that car.
    """
    cur, con = database.connect_to_database()
    query = "SELECT t.* FROM " + table + " t WHERE t.price = \
        (select min(subt.price) from " + table + " subt where \
        (subt.price > 0));"
    return pandas.read_sql_query(query, con)
def create_test_table():
    """ Create a test table to have specific data to test on. """
    cur, con = database.connect_to_database()
    if not database.check_if_table_exist(cur, 'testTable'):
        sql = "CREATE TABLE testTable (Model CHAR(100), Link CHAR(100), \
        Description MEDIUMTEXT, Kms INT(20), Year INT(4), \
        Kml FLOAT(20), Kmt FLOAT(20), Moth CHAR(30), Trailer CHAR(30), \
        Location CHAR(50), Price INT(20))"
        database.query(cur, sql)

        sql = "INSERT INTO testTable(Model, Link, Description, Kms, Year, \
        Kml, Kmt, Moth, Trailer, Location, Price) VALUES('%s', '%s', '%s', \
        '%s','%s','%s','%s','%s','%s','%s','%s')" % (
            'Audi A1', 'www.test.dk/audi1', 'Audi A1 description text',
            '100', '2000', '2', '6', '0', 'No trailer', 'Sjælland', '50000')
        database.query(cur, sql)

        sql = "INSERT INTO testTable(Model, Link, Description, Kms, Year, \
        Kml, Kmt, Moth, Trailer, Location, Price) VALUES('%s', '%s', '%s', \
        '%s','%s','%s','%s','%s','%s','%s','%s')" % (
            'Audi A2', 'www.test.dk/audi2', 'Audi A2 description text',
            '200', '2001', '3', '7', '0', 'No trailer', 'Fyn', '60000')
        database.query(cur, sql)

        sql = "INSERT INTO testTable(Model, Link, Description, Kms, Year, \
        Kml, Kmt, Moth, Trailer, Location, Price) VALUES('%s', '%s', '%s', \
        '%s','%s','%s','%s','%s','%s','%s','%s')" % (
            'Audi A3', 'www.test.dk/audi3', 'Audi A3 description text',
            '300', '2002', '4', '8', '0', 'No trailer', 'Bornholm', '70000')
        database.query(cur, sql)

        sql = "INSERT INTO testTable(Model, Link, Description, Kms, Year, \
        Kml, Kmt, Moth, Trailer, Location, Price) VALUES('%s', '%s', '%s', \
        '%s','%s','%s','%s','%s','%s','%s','%s')" % (
            'Ford Mondeo', 'www.test.dk/ford1', 'Ford Mondeo description text',
            '400', '2003', '5', '12', '0', 'No trailer', 'Jylland', '8000')
        database.query(cur, sql)

        sql = "INSERT INTO testTable(Model, Link, Description, Kms, Year, \
        Kml, Kmt, Moth, Trailer, Location, Price) VALUES('%s', '%s', '%s', \
        '%s','%s','%s','%s','%s','%s','%s','%s')" % (
            'Mercedes SLK', 'www.test.dk/mercedes1',
            'Mercedes SLK description text',
            '500', '2004', '2', '4', '0', 'No trailer', 'Fyn', '100000')
        database.query(cur, sql)
        database.commit(cur, con)
示例#19
0
def add_entry():
    """ Adds an entry to the CSV database, and refreshes the home page to update """
    username = util.remove_commas_from_string(request.form["name"])
    link = util.remove_commas_from_string(request.form["ytLink"])
    song = util.remove_commas_from_string(request.form["songName"])

    festive = CHRISTMAS_MODE and "christmasSong" in request.form

    with database.connect_to_database() as db:
        user_id = database.get_userid(db, username)
        database.add_song(db,
                          link,
                          song,
                          user_id,
                          month=12 if festive else None)

    return redirect(url_for('main'))
示例#20
0
def get_distribution_all_brands(tablename):
    """ Calculate a frequency distribution of all car brands.

    The method takes as input the name of a table in the database,
    and returns a panda dataframe of the distribution with brand-names
    in the first column, the number a brand appears in the table in
    the second column and the corresponding percentage in the last
    column.
    """
    cur, con = database.connect_to_database()
    query = "select * from " + tablename
    result = pandas.read_sql_query(query, con)

    # get the model column and the number of models
    models = result.Model
    n_models = len(models)

    models = extract_brands(models)
    columns = ['Brand', 'Number', 'Percentage']
    return create_distribution(models, columns, n_models)
示例#21
0
def main(playlist=PLAYLIST_CSV):
    songs = read_playlist(playlist)
    with connect_to_database() as db:
        db_size_before = get_songs_size(db)
        for url, played, title, user in songs:
            played = ast.literal_eval(played)
            assert type(played) is bool
            user_id = get_userid(db, user)
            title = title.replace("'", '')
            added = add_song(db, url, title, user_id)
            logging.info("%s %s by %s", "Added " if added else "Did not add ",
                         title, user)
            if played:
                set_song_played(db, url)
                logging.info("Set song played: %s", title)
        db_size_after = get_songs_size(db)

    no_songs_added = db_size_after - db_size_before
    logging.info(
        "Populated database with playlist. Added %d songs. Database size: %d -> %d",
        no_songs_added, db_size_before, db_size_after)
示例#22
0
def create_price_year_scatter(table, brand):
    """ The method creates a scatter plot.

    The method creates a scatter plot showing the coherence
    between prices and age (in production year) of the cars present
    on the given table.
    """
    cur, con = database.connect_to_database()
    query = ''
    if brand == 'all brands':
        query = 'SELECT * FROM ' + table
    else:
        query = "SELECT * FROM " + table + \
            " WHERE Model LIKE '%%%" + brand + "%%%'"
    result = pandas.read_sql_query(query, con)
    y = result.Price
    x = result.Year

    """ define default x- and y-ranges to cut off outliers for
        plots of all brands """
    default_xmin = 1950
    default_xmax = 2016
    default_ymin = 0
    default_ymax = 2500000

    """ for specific brands, use the minimum and maximum values """
    if brand != 'all brands':
        default_xmin = min(x.values)
        default_xmax = max(x.values)
        default_ymin = min(y.values)
        default_ymax = max(y.values)

    plt.plot(x, y, 'o')
    plt.xlim(default_xmin, default_xmax)
    plt.ylim(default_ymin, default_ymax)
    plt.xlabel('Year')
    plt.ylabel('Price')
    plt.tight_layout()
    plt.savefig('img/price_year_scatter', transparent=True)
    plt.clf()
示例#23
0
def log(error="Undefined Error", error_type = "GENERIC", severity="INFO"):	
	
	current_time = time.asctime(time.localtime(time.time()))	
	
	try:
		from database import connect_to_database
		db_connection = connect_to_database()
		db = db_connection.cursor()

		db.execute('INSERT INTO logs VALUES (?, ?, ?, ?, ?)', (None, error_type, error, current_time, severity,) )
		db_connection.commit()
		db_connection.close()

		#For debug purposes also print
		print ("REMOVE ME IN PROD: " + error)

	except Exception, err:		
		db_connection.close()

		for error in err:
			#This will change for debugging it's print	
			print("Unable to add error to DB: " + error)
示例#24
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import datetime
from flask import Flask, render_template, request
from suggestions import get_suggested_products
from database import connect as connect_to_database
from database.models import Client, Product, Feedback, Reunion

app = Flask(__name__)

# connect to mongodb
connect_to_database()

@app.route('/')
def index():
    return client_detail('003b000000KqjT8AAJ')

@app.route('/list')
def list_clients():
    clients = Client.objects()
    return render_template('list_clients.html', clients=clients)

@app.route('/clients/<client_id>')
def client_detail(client_id):
    client = Client.objects.get(contact_id=client_id)

    # suggestions
    raw_suggestions = get_suggested_products(client)
    suggestions = map(lambda x: (Product.objects.get(product_id=x[0]), x[1]), raw_suggestions)
示例#25
0
    return

    
    

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Scrape data from NYMag.')
    parser.add_argument('--scrape_url', '-s', dest='scrape_url', 
                        default=None, help='url to scrape restaurants from')
    parser.add_argument('--fetch_reviews', '-f', dest='fetch_reviews', type=int, 
                        default=None, help='number of reviews to fetch')
    parser.add_argument('--clear', '-c', dest='clean', type=int, 
                        default=None, help='clean up the database')
    args = parser.parse_args()

    db, connection = connect_to_database()

    try:
        url = args.scrape_url
        if url != None:
            get_new_restaurants(db, url)

        reviews_to_fetch = args.fetch_reviews
        if reviews_to_fetch != None:
            get_reviews(db, reviews_to_fetch)

        num_to_clean = args.clean
        if num_to_clean != None:
            clean(db, num_to_clean)

    except (pymongo.errors.ConnectionFailure, pymongo.errors.InvalidName) as err:
示例#26
0
def calculate_best_offer(table, model):
    """ Calculate the best offer for a given car model.

    The calculation is based on creating a linear regression model
    of the attributes: car description, mileage and age,
    with prices as y-values. The description of the car is analyzed
    to get a rank value, using the modules analyze_description method.
    Based on the linear regression model, a prediction of the prices
    is calculated, and the differences between the predicted prices and
    the actual prices are calculated to find the biggest difference (the
    largest negative value), which is the best offer.
    The returned result is a pandas DataFrame along with the
    difference.
    """
    # get all the offers for the specified model
    keywords = model.split()
    cur, con = database.connect_to_database()
    query = "SELECT * FROM " + table + " WHERE Model LIKE '%%%" \
        + keywords[0] + "%%%'"
    if len(keywords) > 1:
        for keyword in itertools.islice(keywords, 1, len(keywords)):
            query += " AND Model LIKE '%%%" + keyword + "%%%'"

    else:
        query += ";"

    result = pandas.read_sql_query(query, con)
    if len(result) == 0:
        return pandas.Series([]), None

    """ create data array of [description_score, price, kms, year] """
    data = []
    for index, car in result.iterrows():
        if car.Price != 0:
            if car.Price < 175000:  # do not include cars with price = 0
                rank = analyze_description(car.Description)
                data.append([rank, car.Kms, car.Year, car.Price])

    columns = ['rank', 'kms', 'year', 'price']
    dat = pandas.DataFrame.from_records(data, columns=columns)

    # create regression plot
    graphics.create_3D_regression_plot(dat, model)
    """ y (price) will be the response, and X (rank, kms, year)
    will be the predictors """
    X = dat.iloc[:, [0, 1, 2]]
    y = dat.iloc[:, [3]]
    
    """ add a constant term to the predictors to fit the intercept of
    the linear model """
    X = sm.add_constant(X)

    """ calculate the linear regression model with price as y-value
    to get the prediction values """
    reg_model = sm.OLS(y, X).fit()
    predictions = reg_model.predict()

    """ create a numpy array of the differences between the predicted values
    and the actual values for the prices and find the minimum - this is
    the best offer """
    differences = y.price.values - predictions
    smallest = np.amin(differences, axis=0)
    index2 = differences.argmin(axis=0)
    altindex = 0
    altindex2 = 0
    for index, car in result.iterrows():
        if altindex < 6:
            altindex2 += 1
            if car.Price != 0:
                if car.Price < 175000:  # do not include cars with price = 0
                    altindex += 1
    best_offer = result.loc[altindex2]

    return best_offer, smallest
示例#27
0
def connection():
    """Database access ."""
    database.connect_to_database(filename=':memory:')
示例#28
0
    parser.add_argument(
        'dest_root', type=str, help='the dest root location')
    args = parser.parse_args()


    print(
        ('Transferring data from device: {src_dev} at {src_root}\n' + 
         '                    to device: {dst_dev} at {dst_root}')
        .format(
            src_dev=args.source_device,
            src_root=args.source_root,
            dst_dev=args.dest_device,
            dst_root=args.dest_root))

    try:
        conn = connect_to_database()
        print(add_device(conn, (args.dest_device,)))
        transfer_unique(conn, args.source_device, args.source_root, args.dest_device, args.dest_root)
    finally:
        conn.close()

"""
select hash from (
    select count(*) as num_collisions, files.hash
    from files
    join devices
    on files.device_id = devices.id
    where files.hash is not null
    and devices.name = '4000B'
    group by files.hash
)
示例#29
0
import database

instrument = 'Kora'
database.connect_to_database()

try:
  database.display_instrument_info(instrument)
except KeyError:
  print('Oh no! This instrument does not exist.')
else:
  print(instrument)
# Write your code below:
finally:
  database.disconnect_from_database()
示例#30
0
def download_data_to_database(limit=None):
    """ The method scrapes data of cars at Bilbasen.dk.

    This method is the most important one of this module, as this is the
    one crawling bilbasen.dk to extract data of cars currently on sale.
    The method creates a new table each time it is called, and the table
    will be named 'AllBrandsDDMMYY', where DDMMYY is the current date. If
    the table already exist, it will be deleted.
    The method takes an optional parameter, defining how many pages to
    crawl. If no parameter is given, it crawls all pages (i.e. all cars)
    on bilbasen.dk.
    """
    # define the default parameters for the URI
    Brand = ""  # add forwardslash e.g. "/ford"
    Model = ""  # add forwardslash e.g. "/ka"
    Fuel = "1" #0: all 1: benzin mv
    Kmlfrom = "1"
    Fromyear = "0"
    Pricefrom = "20000"
    Priceto = "1000000"
    Mileageto = "0"
    Zipcode = "0000"
    IncludeEngrosCVR = "True"
    IncludeSellForCustomer = "True"
    IncludeWithoutVehicleRegistrationTax = "False"
    IncludeLeasing = "False"
    HpFrom = ""
    HpTo = ""
    page = "1"
    Cartypes="Stationcar"

    #http://www.bilbasen.dk/brugt/bil/ford/focus?Fuel=1&YearFrom=2011&PriceTo=175000&ZipCode=0&IncludeEngrosCVR=True&IncludeSellForCustomer=True&IncludeWithoutVehicleRegistrationTax=True&IncludeLeasing=True&Cartypes=Stationcar&HpFrom=&HpTo=
    # construct the URI
    query = "/brugt/bil" + Brand + Model + "?Fuel=" + Fuel + "&KmlFrom=" + \
        Kmlfrom + "&YearFrom=" + Fromyear + "&PriceFrom=" + Pricefrom + \
        "&PriceTo=" + Priceto + "&MileageTo=" + Mileageto + "&ZipCode=" + \
        Zipcode + "&IncludeEngrosCVR=" + IncludeEngrosCVR + \
        "&IncludeSellForCustomer=" + IncludeSellForCustomer + \
        "&IncludeWithoutVehicleRegistrationTax=" + \
        IncludeWithoutVehicleRegistrationTax + "&IncludeLeasing=" + \
        IncludeLeasing + "&Cartypes=" + Cartypes +"&HpFrom=" + HpFrom + "&HpTo=" + HpTo + \
        "&page=" + page

    # connect to bilbasen.dk, download brands and do a GET request for the URI
    conn = connect()
    create_car_brand_table(conn)
    conn.request("GET", query)
    res = conn.getresponse()

    # read the HTML and parse it with BeautifulSoup
    content = res.read()
    parsed_html = BeautifulSoup(content, from_encoding='utf8')

    # create the table in the database
    try:
        cur, con = database.connect_to_database()
        with con:
            date = get_date()
            tablename = 'AllCars' + date
            if database.check_if_table_exist(cur, tablename):
                database.delete_table(cur, tablename)
            command = "CREATE TABLE " + tablename + "(Model CHAR(100), " + \
                "Link CHAR(100), Description MEDIUMTEXT, Kms INT(20), " + \
                "Year INT(4), Kml FLOAT(50), Kmt FLOAT(50), " + \
                "Moth CHAR(30), Trailer CHAR(30), Location CHAR(50), " + \
                "Price INT(20))"
            cur.execute(command)

            # extract total number of pages to crawl
            npages = None
            uls = parsed_html.find('ul', {'class': 'pagination pull-right'})
            count = 0
            lis = uls.find_all('li')
            for li in lis:
                if li.text == '...':
                    npagesstring = lis[count+1].text
                    npages = int(npagesstring.replace('.', ''))
                count += 1

            if limit is None:
                limit = npages
            else:
                limit = int(limit)
            print "pages to crawl: %s" % limit

            # loop through the pages
            while int(page) <= limit:
                print "crawling page " + page

                # extract all cars of the different listing types
                rllplus = parsed_html.find_all(
                    'div', attrs={'class': 'row listing listing-plus'})
                rllexclusive = parsed_html.find_all(
                    'div', attrs={'class': 'row listing listing-exclusive'})
                rlldiscount = parsed_html.find_all(
                    'div', attrs={'class': 'row listing listing-discount'})

                # extract the data and insert the row in the database table
                for listing in rllplus:
                    car = extract_car_info('plus', listing)
                    insert_car_to_table(car, tablename, cur)

                for listing in rllexclusive:
                    car = extract_car_info('exclusive', listing)
                    insert_car_to_table(car, tablename, cur)

                for listing in rlldiscount:
                    car = extract_car_info('discount', listing)
                    insert_car_to_table(car, tablename, cur)

                # go to next page
                incr = int(page)
                incr += 1
                page = str(incr)

                query = "/brugt/bil" + Brand + Model + "?Fuel=" + Fuel + \
                    "&KmlFrom=" + Kmlfrom + "&YearFrom=" + Fromyear + \
                    "&PriceFrom=" + Pricefrom + "&PriceTo=" + Priceto + \
                    "&MileageTo=" + Mileageto + "&ZipCode=" + Zipcode + \
                    "&IncludeEngrosCVR=" + IncludeEngrosCVR + \
                    "&IncludeSellForCustomer=" + IncludeSellForCustomer + \
                    "&IncludeWithoutVehicleRegistrationTax=" + \
                    IncludeWithoutVehicleRegistrationTax + \
                    "&IncludeLeasing=" + IncludeLeasing + "&Cartypes=" + \
                    Cartypes + "&HpFrom=" + HpFrom + "&HpTo=" + HpTo + "&page=" + page

                conn.request("GET", query)
                res = conn.getresponse()
                content = res.read()
                parsed_html = BeautifulSoup(content, from_encoding='utf8')

    # in case of database error
    except mdb.Error, e:
        print "Error %d: %s" % (e.args[0], e.args[1])
        sys.exit(1)
def parse_and_execute_actions(args):
    if args.action == 'none':
        logging.info('Nothing to do, leaving now...')
        return

    elif args.action == 'create-config':
        if args.default:
            config.create_default_config(args.config)
        elif args.interactive:
            config.create_config_interactively(args.config)
        else:
            logging.warning('No way to create the config file was specified. '
                            'Choose --default or --interactive.')

        return

    config.validate_config(args.config)
    cfg = config.load_config(args.config)

    if args.action == 'setup-database':
        database.setup_database(cfg)
        return

    db, cursor = database.connect_to_database(cfg)

    if not project.is_machine_registered(cfg, db, cursor):
        project.register_render_machine(cfg, db, cursor)

    if args.action == 'start':
        if not project.check_if_project_name_valid(args):
            logging.error('Project name "{}" is not a valid choice!'.format(
                args.project_name))
        elif project.check_if_project_name_taken(args, db, cursor):
            logging.error('Project name "{}" is already taken!'.format(
                args.project_name))
        else:
            project.start_project(args, cfg, db, cursor)

    elif args.action == 'cancel':
        # TODO add security check
        if args.project:
            project.cancel_project(args, cfg, db, cursor)
        elif args.frames:
            project.cancel_frames(args, cfg, db, cursor)
        else:
            logging.warning(
                'Missing additional parameter, specify whether the project (--project) or frames '
                '(--frames <...>) should be deleted.')

    elif args.action == 'finish':
        project.finish_project(args, cfg, db, cursor)

    elif args.action == 'check':  # status report
        if not project.check_if_project_name_valid(args):
            logging.error('Project name "{}" is not a valid choice!'.format(
                args.project_name))
        elif not project.check_if_project_name_taken(args, db, cursor):
            logging.error('Project name "{}" does not exist!'.format(
                args.project_name))
        else:
            project.get_project_info(args, cfg, db, cursor)

            if args.history:
                pass

            elif args.frame_list:
                print(project.get_project_frame_list(args, cfg, db, cursor))

    elif args.action == 'render':
        if args.one_batch:
            project.render_frames(args, cfg, db, cursor)
        elif args.some_batches > 0:
            for _ in range(args.some_batches):
                project.render_frames(args, cfg, db, cursor)
        else:
            while project.has_project_open_frames(args, cfg, db, cursor):
                project.render_frames(args, cfg, db, cursor)

    elif args.action == 'free':
        if args.free_failed:
            project.free_failed_frames(args, cfg, db, cursor)
        elif args.free_waiting:
            project.free_waiting_frames(args, cfg, db, cursor)

    database.close_connection(db, cursor)
示例#32
0
文件: app.py 项目: ghl3/BarkovChain
from semantic import load_corpus, load_lsi, cosine
from semantic import update_vector, important_words

from assets import gather_assets

# If necessary, download the trained
# model from Amazon S3
gather_assets()

# Load the semantic models
dictionary, corpus, tfidf, bar_idx_map, idx_bar_map = load_corpus()
lsi, corpus_lsi_tfidf, lsi_index = load_lsi()

# Connect to the db
mongo_db, mongo_connection = connect_to_database(table_name="barkov_chain")

app = Flask(__name__)

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/slides')
def slides():
    return render_template('slides.html')

@app.route('/api/initial_location', methods=['POST'] )
def api_initial_location():
    """
    Take lat/lon coordinates and return
 def tearDown(self):
     """ Delete the "fake" table to clean up the database. """
     cur, con = database.connect_to_database()
     database.query(cur, "DROP TABLE AllCars010115")
 def setUp(self):
     """ Set up a "fake" newest table to test on. """
     cur, con = database.connect_to_database()
     database.query(
         cur, "CREATE TABLE AllCars010115(Column1 INT, Column2 INT)")
示例#35
0
        FROM files
        INNER JOIN devices
        ON files.device_id=devices.id
        WHERE files.type='{type}'
        AND devices.name='{device_name}'
        GROUP BY files.type, files.subtype;
        """.format(type=args.filetype, device_name=args.device_name)

    sql_select_examples = """
        SELECT relpath
        FROM files
        INNER JOIN devices
        ON files.device_id=devices.id
        WHERE files.type=?
        AND files.subtype=?
        AND devices.name='{device_name}'
        LIMIT 3;
        """.format(device_name=args.device_name)

    try:
        conn = connect_to_database(db_url=args.db_url)
        cur = conn.execute(sql_select_subtypes)
        for c in cur:
            print("type: '{}' subtype: '{}' count: {}".format(*c))
            cursor = conn.execute(sql_select_examples, c[:2])
            for c1 in cursor:
                print('\t{}'.format(c1[0]))

    finally:
        conn.close()
示例#36
0
def main():

    parser = argparse.ArgumentParser(description='Scrape data from NYMag.')
    parser.add_argument('--match', '-m', dest='match', type=int, default=None, 
                        help='Find the closest matching foursquare location to a nymag location')
    parser.add_argument('--tips', '-t', dest='tips', type=int, default=None, 
                        help='Add foursquare tips to the database')
    args = parser.parse_args()

    # Get the mongo database
    db, connection = connect_to_database()

    # Get the foursquare api
    try:
        api = get_api()
    except KeyError:
        print "Cannot get foursquare info from environment"
        print "Ensure that your environment is properly setup by sourcing 'setup.sh'"
        return 255
    auth_uri = api.oauth.auth_url()

    num_to_match = args.match
    if num_to_match != None:
        match_foursquare_id(db, api, num_to_match)

    tips_to_get = args.tips
    if tips_to_get != None:
        add_tips_to_db(db, api, tips_to_get)

    return

##############

    matches = get_foursquare_id(api, "art bar", longitude=-74.00355,
                                 latitude=40.738491)
    for match in matches:
        print match
    return

    #venues = get_nearby_venues(api,"40.728625","73.997684")
    #venues = get_nearby_venues(api,"44.3","37.2")
    venues = get_nearby_venues(api, 40.7, -74)

    print venues
    return

    for venue in venues:
        print venue
    return

    params = {}
    params['query'] = 'coffee'
    params['near'] = 'New York'
    params['intent'] = 'browse'
    params['limit'] = 15
    response = client.venues.search(params=params)

    geocode = response[u'geocode']
    venues = response[u'venues']

    for key, val in geocode['feature'].iteritems():
        print key, val
    return

    for venue in venues:
        print venue
        for key, val in venue.iteritems():
            print key, val