def command(): """Run the program. Command line parse and dispatch. """ args = command_line_args() # Run GUI if not args.import_csv: return main() # An empty string is a valid SQLAlchemy non-default value for the database name. non_default_database = args.database == '' or args.database if args.verbosity >= 1: print(f"Running {__file__}") print(f'Loading movies from {args.import_csv}') if non_default_database: print(f"Adding movies to database '{args.database}'.") else: print("Adding movies to the default database.") if non_default_database: database.connect_to_database(args.database) else: database.connect_to_database() try: impexp.import_movies(args.import_csv) except impexp.MoviedbInvalidImportData as exc: print(exc)
def start_up(): """Initialize the program.""" # moviedb-#84 Load the config object # Start the logger. root_dir, program_name = os.path.split(__file__) program, _ = program_name.split('.') start_logger(root_dir, program) # Initialize application configuration data. config.app = config.Config(program, VERSION) # Open the default database database.connect_to_database()
def loaded_database(hamlet, solaris, dreams, revanche): """Provide a loaded database.""" database.connect_to_database(filename=':memory:') # noinspection PyProtectedMember movies = [database.Movie(**movie) for movie in (hamlet, solaris, dreams, revanche)] # noinspection PyProtectedMember with database._session_scope() as session: session.add_all(movies) database.add_tag('blue') database.add_movie_tag_link('blue', database.MovieKeyTypedDict(title='Hamlet', year=1996)) database.add_tag('yellow') database.add_movie_tag_link('yellow', database.MovieKeyTypedDict(title='Revanche', year=2008)) database.add_tag('green') database.add_movie_tag_link('green', database.MovieKeyTypedDict(title='Revanche', year=2008)) database.add_movie_tag_link('green', database.MovieKeyTypedDict(title='Solaris', year=1972))
def get_location_distribution_one_brand(table, brand): """ Calculate the distribution of a car brand based on sale locations. The method takes as input the name of a table in the database and a car brand. It calculates the distribution based on locations and returns a Pandas Series with the distribution and the locations as index. """ # get the rows from the table with the specified brand cur, con = database.connect_to_database() # get all the different locations present in the specified table locations = database.get_locations(table) # calculate the distribution index_names = [] distribution = [] for index, location in locations.iterrows(): # find all the rows with the specified brand for each location query = "SELECT * FROM " + table + " WHERE Model LIKE '%%%" \ + brand + "%%%' AND Location LIKE '%%%" \ + location.location + "%%%';" result = pandas.read_sql_query(query, con) index_names.append(smart_str(location.location)) distribution.append(len(result)) location_distribution = pandas.Series(distribution, index=index_names) return location_distribution
def get_location_distribution_all_brands(table): """ Calculate the distribution of all brands based on sale locations. The method calculates the distribution of all car brands in the specified table. A pandas Series with the distribution and the locations as index is returned. """ # get all the different locations present in the specified table locations = database.get_locations(table) # calculate the distribution index_names = [] distribution = [] for index, location in locations.iterrows(): # find all the for each location cur, con = database.connect_to_database() query = "SELECT * FROM " + table + " WHERE Location LIKE '%%%" \ + location.location + "%%%';" result = pandas.read_sql_query(query, con) index_names.append(smart_str(location.location)) distribution.append(len(result)) location_distribution = pandas.Series(distribution, index=index_names) return location_distribution
def get_distribution_one_brand(tablename, brand): """ Calculate a frequency distribution of a particular car brand. The method takes as input the name of a table in the database, and a car brand, and calculates a frequency distribution of the models for that brand. Returns a panda dataframe of the distribution with model-names in the first column, the number a model appears in the table in the second column, and the corresponding percentage in the last column. """ # get the rows with the specified brand cur, con = database.connect_to_database() query = "SELECT * FROM " + tablename + \ " WHERE Model LIKE '%%%" + brand + "%%%' AND CARTYPES LIKE 'STATIONCAR'" result = pandas.read_sql_query(query, con) #get the model column and number of models models = result.Model n_models = len(models) if n_models == 0: # make sure it isn't empty return [] # dont distinguish models on stuff like 5doors or 4doors, and engine size models = simplify_model_names(models) columns = ['Model', 'Number', 'Percentage'] return create_distribution(models, columns, n_models)
def create_car_brand_table(conn): """ The method creates a database table of car brands. It crawls bilbasen.dk for a list of car brands, and creates and stores these brands in a table called 'Brands' in the database. The method is called each time the method download_data_to_database is called. """ conn.request("GET", "/") res = conn.getresponse() content = res.read() parsed_html = BeautifulSoup(content, from_encoding='utf8') cur, con = database.connect_to_database() # delete the old table if database.check_if_table_exist(cur, 'Brands'): database.delete_table(cur, 'Brands') cur.execute("CREATE TABLE Brands(Brand CHAR(100))") # extract car brands and insert to database table brands_tags = parsed_html.find("optgroup", {"label": "Alle mærker"}) for child in brands_tags.children: brand = smart_str(child.string) if any(c.isalpha() for c in brand): command = "INSERT INTO Brands(Brand) VALUES('%s')" \ % smart_str(brand) cur.execute(command) database.commit(cur, con)
def handle_tweet(tweet_object, n_tweets): tweet, user = convert_tweet_object(tweet_object) if not test_for_retweet(tweet): output = create_output(tweet, user) # Connect to database db_connection = connect_to_database('tweets01') insert_single(db_connection, 'tweets', output)
def main(rootdir, name, blacklist=None): try: conn = connect_to_database() setup_tables(conn) device_id = add_device(conn, (name, )) index_files(conn, rootdir, device_id, blacklist=blacklist) conn.commit() finally: conn.close()
def test_check_if_table_exist(self): """ Test the check_if_table_exist function of the database module. """ cur, con = database.connect_to_database() # test that false is returned when checking for a non-existing table result = database.check_if_table_exist(cur, "test") self.assertFalse(result) # test that the fake table exist result = database.check_if_table_exist(cur, "AllCars010115") self.assertTrue(result)
def test_delete_table(self): """ Test the delete_table function of the database moule. """ cur, con = database.connect_to_database() # delete the test table database.delete_table(cur, 'AllCars010115') # now check that it is gone result = database.check_if_table_exist(cur, 'AllCars010115') self.assertFalse(result) # create the table again for the rest of the tests self.setUp()
def get_most_expensive_cars(table): """ Find the most expensive car(s) in the given table. The result is returned as a pandas DataFrame containing the entire database row of that car. """ cur, con = database.connect_to_database() query = "SELECT t.* FROM " + table + " t WHERE t.price = \ (select max(subt.price) from " + table + " subt);" return pandas.read_sql_query(query, con)
def singular_value_decomposition(self, n_components): # mat = RowMatrix(self.spark_context.parallelize(np.asarray(self.tfidf.select( # 'id', 'features').rdd.map(lambda row: row[1].toArray()).collect()).T)) rdd = self.tfidf.select( 'id', 'features').rdd.map(lambda row: row[1].toArray()) rdd.persist(ps.StorageLevel.MEMORY_AND_DISK) mat = RowMatrix(self.rdd_transpose(rdd)) # rdd_ = self.tfidf.select( # 'id', 'features').rdd # rdd_ = rdd_.map(lambda row: row[1].toArray()) svd = computeSVD(mat, n_components, computeU=True) print svd.U.numCols(), svd.U.numRows() print type(svd.V) self.vt = svd.V self.similarity_matrix = cosine_similarity(svd.V.toArray()) self.five_most_similar_beers = self.sql_context.createDataFrame( map(lambda x: np.argsort(x)[::-1][:6].tolist(), self.similarity_matrix), ['id', 'first', 'second', 'third', 'fourth', 'fifth']) self.tfidf = self.tfidf.join(self.five_most_similar_beers, ['id'], 'inner') self.token, self.db = database.connect_to_database() # use default arguments to avoid closure of the environment of the token and db variables def save_to_firebase(x, token=self.token, db=self.db): data = { 'brewery_name': x.brewery_name, 'beer_name': x.beer_name, 'state': x.state, 'beer_style': x.beer_style, 'first': x.first, 'second': x.second, 'third': x.third, 'fourth': x.fourth, 'fifth': x.fifth, 'top1': x.top1, 'top2': x.top2, 'top3': x.top3, 'top4': x.top4, 'top5': x.top5, 'top6': x.top6, 'top7': x.top7 } # name = {'brewery_name': x.brewery_name, 'beer_name': x.beer_name} db.child('beers').child(x.id).set(data, token) # db.child('beer_names').child(x.id).set(name, token) # sleep.(0.1) self.tfidf.rdd.foreach(lambda x: save_to_firebase(x))
def get_fastest_cars(table): """ Find the fastest car(s) in the given table. The method finds the car(s) that goes fastest from 0-100 km/t). The result is returned as a pandas DataFrame containing the entire database row of that car. """ cur, con = database.connect_to_database() query = "SELECT t.* FROM " + table + " t WHERE t.kmt = \ (select min(subt.kmt) from " + table + " subt where \ (subt.kmt > 0));" return pandas.read_sql_query(query, con)
def test_init_database_access_with_existing_database(tmpdir): """Attach to an existing database and check tha date_last_accessed is today.""" # Slow test: 5% chance of this test being run. if random.randint(0, 99) > 5: return path = tmpdir.mkdir('tmpdir').join(database.database_fn) database.connect_to_database(filename=path) # Reattach to the same database database.connect_to_database(filename=path) with database._session_scope() as session: current = (session.query(database.MoviesMetaData.value) .filter(database.MoviesMetaData.name == 'date_last_accessed') .one()) previous = (session.query(database.MoviesMetaData.value) .filter(database.MoviesMetaData.name == 'date_created') .one()) assert current != previous
def get_most_ecofriendly_cars(table): """ Find the most eco friendly car(s) in a given table. The car(s) that goes most KMs on a liter petrol. The result is returned as a pandas DataFrame containing the entire database row of that car. """ cur, con = database.connect_to_database() query = "SELECT t.* FROM " + table + " t WHERE t.kml = \ (select max(subt.kml) from " + table + " subt where \ (subt.kml > 0));" return pandas.read_sql_query(query, con)
def get_cheapest_cars(table): """ Find the cheapest car(s) in the given table. The method finds the cheapest car in the given table, that is not 0DKK and is not a leasing car. The result is returned as a pandas DataFrame containing the entire database row of that car. """ cur, con = database.connect_to_database() query = "SELECT t.* FROM " + table + " t WHERE t.price = \ (select min(subt.price) from " + table + " subt where \ (subt.price > 0));" return pandas.read_sql_query(query, con)
def create_test_table(): """ Create a test table to have specific data to test on. """ cur, con = database.connect_to_database() if not database.check_if_table_exist(cur, 'testTable'): sql = "CREATE TABLE testTable (Model CHAR(100), Link CHAR(100), \ Description MEDIUMTEXT, Kms INT(20), Year INT(4), \ Kml FLOAT(20), Kmt FLOAT(20), Moth CHAR(30), Trailer CHAR(30), \ Location CHAR(50), Price INT(20))" database.query(cur, sql) sql = "INSERT INTO testTable(Model, Link, Description, Kms, Year, \ Kml, Kmt, Moth, Trailer, Location, Price) VALUES('%s', '%s', '%s', \ '%s','%s','%s','%s','%s','%s','%s','%s')" % ( 'Audi A1', 'www.test.dk/audi1', 'Audi A1 description text', '100', '2000', '2', '6', '0', 'No trailer', 'Sjælland', '50000') database.query(cur, sql) sql = "INSERT INTO testTable(Model, Link, Description, Kms, Year, \ Kml, Kmt, Moth, Trailer, Location, Price) VALUES('%s', '%s', '%s', \ '%s','%s','%s','%s','%s','%s','%s','%s')" % ( 'Audi A2', 'www.test.dk/audi2', 'Audi A2 description text', '200', '2001', '3', '7', '0', 'No trailer', 'Fyn', '60000') database.query(cur, sql) sql = "INSERT INTO testTable(Model, Link, Description, Kms, Year, \ Kml, Kmt, Moth, Trailer, Location, Price) VALUES('%s', '%s', '%s', \ '%s','%s','%s','%s','%s','%s','%s','%s')" % ( 'Audi A3', 'www.test.dk/audi3', 'Audi A3 description text', '300', '2002', '4', '8', '0', 'No trailer', 'Bornholm', '70000') database.query(cur, sql) sql = "INSERT INTO testTable(Model, Link, Description, Kms, Year, \ Kml, Kmt, Moth, Trailer, Location, Price) VALUES('%s', '%s', '%s', \ '%s','%s','%s','%s','%s','%s','%s','%s')" % ( 'Ford Mondeo', 'www.test.dk/ford1', 'Ford Mondeo description text', '400', '2003', '5', '12', '0', 'No trailer', 'Jylland', '8000') database.query(cur, sql) sql = "INSERT INTO testTable(Model, Link, Description, Kms, Year, \ Kml, Kmt, Moth, Trailer, Location, Price) VALUES('%s', '%s', '%s', \ '%s','%s','%s','%s','%s','%s','%s','%s')" % ( 'Mercedes SLK', 'www.test.dk/mercedes1', 'Mercedes SLK description text', '500', '2004', '2', '4', '0', 'No trailer', 'Fyn', '100000') database.query(cur, sql) database.commit(cur, con)
def add_entry(): """ Adds an entry to the CSV database, and refreshes the home page to update """ username = util.remove_commas_from_string(request.form["name"]) link = util.remove_commas_from_string(request.form["ytLink"]) song = util.remove_commas_from_string(request.form["songName"]) festive = CHRISTMAS_MODE and "christmasSong" in request.form with database.connect_to_database() as db: user_id = database.get_userid(db, username) database.add_song(db, link, song, user_id, month=12 if festive else None) return redirect(url_for('main'))
def get_distribution_all_brands(tablename): """ Calculate a frequency distribution of all car brands. The method takes as input the name of a table in the database, and returns a panda dataframe of the distribution with brand-names in the first column, the number a brand appears in the table in the second column and the corresponding percentage in the last column. """ cur, con = database.connect_to_database() query = "select * from " + tablename result = pandas.read_sql_query(query, con) # get the model column and the number of models models = result.Model n_models = len(models) models = extract_brands(models) columns = ['Brand', 'Number', 'Percentage'] return create_distribution(models, columns, n_models)
def main(playlist=PLAYLIST_CSV): songs = read_playlist(playlist) with connect_to_database() as db: db_size_before = get_songs_size(db) for url, played, title, user in songs: played = ast.literal_eval(played) assert type(played) is bool user_id = get_userid(db, user) title = title.replace("'", '') added = add_song(db, url, title, user_id) logging.info("%s %s by %s", "Added " if added else "Did not add ", title, user) if played: set_song_played(db, url) logging.info("Set song played: %s", title) db_size_after = get_songs_size(db) no_songs_added = db_size_after - db_size_before logging.info( "Populated database with playlist. Added %d songs. Database size: %d -> %d", no_songs_added, db_size_before, db_size_after)
def create_price_year_scatter(table, brand): """ The method creates a scatter plot. The method creates a scatter plot showing the coherence between prices and age (in production year) of the cars present on the given table. """ cur, con = database.connect_to_database() query = '' if brand == 'all brands': query = 'SELECT * FROM ' + table else: query = "SELECT * FROM " + table + \ " WHERE Model LIKE '%%%" + brand + "%%%'" result = pandas.read_sql_query(query, con) y = result.Price x = result.Year """ define default x- and y-ranges to cut off outliers for plots of all brands """ default_xmin = 1950 default_xmax = 2016 default_ymin = 0 default_ymax = 2500000 """ for specific brands, use the minimum and maximum values """ if brand != 'all brands': default_xmin = min(x.values) default_xmax = max(x.values) default_ymin = min(y.values) default_ymax = max(y.values) plt.plot(x, y, 'o') plt.xlim(default_xmin, default_xmax) plt.ylim(default_ymin, default_ymax) plt.xlabel('Year') plt.ylabel('Price') plt.tight_layout() plt.savefig('img/price_year_scatter', transparent=True) plt.clf()
def log(error="Undefined Error", error_type = "GENERIC", severity="INFO"): current_time = time.asctime(time.localtime(time.time())) try: from database import connect_to_database db_connection = connect_to_database() db = db_connection.cursor() db.execute('INSERT INTO logs VALUES (?, ?, ?, ?, ?)', (None, error_type, error, current_time, severity,) ) db_connection.commit() db_connection.close() #For debug purposes also print print ("REMOVE ME IN PROD: " + error) except Exception, err: db_connection.close() for error in err: #This will change for debugging it's print print("Unable to add error to DB: " + error)
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import datetime from flask import Flask, render_template, request from suggestions import get_suggested_products from database import connect as connect_to_database from database.models import Client, Product, Feedback, Reunion app = Flask(__name__) # connect to mongodb connect_to_database() @app.route('/') def index(): return client_detail('003b000000KqjT8AAJ') @app.route('/list') def list_clients(): clients = Client.objects() return render_template('list_clients.html', clients=clients) @app.route('/clients/<client_id>') def client_detail(client_id): client = Client.objects.get(contact_id=client_id) # suggestions raw_suggestions = get_suggested_products(client) suggestions = map(lambda x: (Product.objects.get(product_id=x[0]), x[1]), raw_suggestions)
return if __name__ == "__main__": parser = argparse.ArgumentParser(description='Scrape data from NYMag.') parser.add_argument('--scrape_url', '-s', dest='scrape_url', default=None, help='url to scrape restaurants from') parser.add_argument('--fetch_reviews', '-f', dest='fetch_reviews', type=int, default=None, help='number of reviews to fetch') parser.add_argument('--clear', '-c', dest='clean', type=int, default=None, help='clean up the database') args = parser.parse_args() db, connection = connect_to_database() try: url = args.scrape_url if url != None: get_new_restaurants(db, url) reviews_to_fetch = args.fetch_reviews if reviews_to_fetch != None: get_reviews(db, reviews_to_fetch) num_to_clean = args.clean if num_to_clean != None: clean(db, num_to_clean) except (pymongo.errors.ConnectionFailure, pymongo.errors.InvalidName) as err:
def calculate_best_offer(table, model): """ Calculate the best offer for a given car model. The calculation is based on creating a linear regression model of the attributes: car description, mileage and age, with prices as y-values. The description of the car is analyzed to get a rank value, using the modules analyze_description method. Based on the linear regression model, a prediction of the prices is calculated, and the differences between the predicted prices and the actual prices are calculated to find the biggest difference (the largest negative value), which is the best offer. The returned result is a pandas DataFrame along with the difference. """ # get all the offers for the specified model keywords = model.split() cur, con = database.connect_to_database() query = "SELECT * FROM " + table + " WHERE Model LIKE '%%%" \ + keywords[0] + "%%%'" if len(keywords) > 1: for keyword in itertools.islice(keywords, 1, len(keywords)): query += " AND Model LIKE '%%%" + keyword + "%%%'" else: query += ";" result = pandas.read_sql_query(query, con) if len(result) == 0: return pandas.Series([]), None """ create data array of [description_score, price, kms, year] """ data = [] for index, car in result.iterrows(): if car.Price != 0: if car.Price < 175000: # do not include cars with price = 0 rank = analyze_description(car.Description) data.append([rank, car.Kms, car.Year, car.Price]) columns = ['rank', 'kms', 'year', 'price'] dat = pandas.DataFrame.from_records(data, columns=columns) # create regression plot graphics.create_3D_regression_plot(dat, model) """ y (price) will be the response, and X (rank, kms, year) will be the predictors """ X = dat.iloc[:, [0, 1, 2]] y = dat.iloc[:, [3]] """ add a constant term to the predictors to fit the intercept of the linear model """ X = sm.add_constant(X) """ calculate the linear regression model with price as y-value to get the prediction values """ reg_model = sm.OLS(y, X).fit() predictions = reg_model.predict() """ create a numpy array of the differences between the predicted values and the actual values for the prices and find the minimum - this is the best offer """ differences = y.price.values - predictions smallest = np.amin(differences, axis=0) index2 = differences.argmin(axis=0) altindex = 0 altindex2 = 0 for index, car in result.iterrows(): if altindex < 6: altindex2 += 1 if car.Price != 0: if car.Price < 175000: # do not include cars with price = 0 altindex += 1 best_offer = result.loc[altindex2] return best_offer, smallest
def connection(): """Database access .""" database.connect_to_database(filename=':memory:')
parser.add_argument( 'dest_root', type=str, help='the dest root location') args = parser.parse_args() print( ('Transferring data from device: {src_dev} at {src_root}\n' + ' to device: {dst_dev} at {dst_root}') .format( src_dev=args.source_device, src_root=args.source_root, dst_dev=args.dest_device, dst_root=args.dest_root)) try: conn = connect_to_database() print(add_device(conn, (args.dest_device,))) transfer_unique(conn, args.source_device, args.source_root, args.dest_device, args.dest_root) finally: conn.close() """ select hash from ( select count(*) as num_collisions, files.hash from files join devices on files.device_id = devices.id where files.hash is not null and devices.name = '4000B' group by files.hash )
import database instrument = 'Kora' database.connect_to_database() try: database.display_instrument_info(instrument) except KeyError: print('Oh no! This instrument does not exist.') else: print(instrument) # Write your code below: finally: database.disconnect_from_database()
def download_data_to_database(limit=None): """ The method scrapes data of cars at Bilbasen.dk. This method is the most important one of this module, as this is the one crawling bilbasen.dk to extract data of cars currently on sale. The method creates a new table each time it is called, and the table will be named 'AllBrandsDDMMYY', where DDMMYY is the current date. If the table already exist, it will be deleted. The method takes an optional parameter, defining how many pages to crawl. If no parameter is given, it crawls all pages (i.e. all cars) on bilbasen.dk. """ # define the default parameters for the URI Brand = "" # add forwardslash e.g. "/ford" Model = "" # add forwardslash e.g. "/ka" Fuel = "1" #0: all 1: benzin mv Kmlfrom = "1" Fromyear = "0" Pricefrom = "20000" Priceto = "1000000" Mileageto = "0" Zipcode = "0000" IncludeEngrosCVR = "True" IncludeSellForCustomer = "True" IncludeWithoutVehicleRegistrationTax = "False" IncludeLeasing = "False" HpFrom = "" HpTo = "" page = "1" Cartypes="Stationcar" #http://www.bilbasen.dk/brugt/bil/ford/focus?Fuel=1&YearFrom=2011&PriceTo=175000&ZipCode=0&IncludeEngrosCVR=True&IncludeSellForCustomer=True&IncludeWithoutVehicleRegistrationTax=True&IncludeLeasing=True&Cartypes=Stationcar&HpFrom=&HpTo= # construct the URI query = "/brugt/bil" + Brand + Model + "?Fuel=" + Fuel + "&KmlFrom=" + \ Kmlfrom + "&YearFrom=" + Fromyear + "&PriceFrom=" + Pricefrom + \ "&PriceTo=" + Priceto + "&MileageTo=" + Mileageto + "&ZipCode=" + \ Zipcode + "&IncludeEngrosCVR=" + IncludeEngrosCVR + \ "&IncludeSellForCustomer=" + IncludeSellForCustomer + \ "&IncludeWithoutVehicleRegistrationTax=" + \ IncludeWithoutVehicleRegistrationTax + "&IncludeLeasing=" + \ IncludeLeasing + "&Cartypes=" + Cartypes +"&HpFrom=" + HpFrom + "&HpTo=" + HpTo + \ "&page=" + page # connect to bilbasen.dk, download brands and do a GET request for the URI conn = connect() create_car_brand_table(conn) conn.request("GET", query) res = conn.getresponse() # read the HTML and parse it with BeautifulSoup content = res.read() parsed_html = BeautifulSoup(content, from_encoding='utf8') # create the table in the database try: cur, con = database.connect_to_database() with con: date = get_date() tablename = 'AllCars' + date if database.check_if_table_exist(cur, tablename): database.delete_table(cur, tablename) command = "CREATE TABLE " + tablename + "(Model CHAR(100), " + \ "Link CHAR(100), Description MEDIUMTEXT, Kms INT(20), " + \ "Year INT(4), Kml FLOAT(50), Kmt FLOAT(50), " + \ "Moth CHAR(30), Trailer CHAR(30), Location CHAR(50), " + \ "Price INT(20))" cur.execute(command) # extract total number of pages to crawl npages = None uls = parsed_html.find('ul', {'class': 'pagination pull-right'}) count = 0 lis = uls.find_all('li') for li in lis: if li.text == '...': npagesstring = lis[count+1].text npages = int(npagesstring.replace('.', '')) count += 1 if limit is None: limit = npages else: limit = int(limit) print "pages to crawl: %s" % limit # loop through the pages while int(page) <= limit: print "crawling page " + page # extract all cars of the different listing types rllplus = parsed_html.find_all( 'div', attrs={'class': 'row listing listing-plus'}) rllexclusive = parsed_html.find_all( 'div', attrs={'class': 'row listing listing-exclusive'}) rlldiscount = parsed_html.find_all( 'div', attrs={'class': 'row listing listing-discount'}) # extract the data and insert the row in the database table for listing in rllplus: car = extract_car_info('plus', listing) insert_car_to_table(car, tablename, cur) for listing in rllexclusive: car = extract_car_info('exclusive', listing) insert_car_to_table(car, tablename, cur) for listing in rlldiscount: car = extract_car_info('discount', listing) insert_car_to_table(car, tablename, cur) # go to next page incr = int(page) incr += 1 page = str(incr) query = "/brugt/bil" + Brand + Model + "?Fuel=" + Fuel + \ "&KmlFrom=" + Kmlfrom + "&YearFrom=" + Fromyear + \ "&PriceFrom=" + Pricefrom + "&PriceTo=" + Priceto + \ "&MileageTo=" + Mileageto + "&ZipCode=" + Zipcode + \ "&IncludeEngrosCVR=" + IncludeEngrosCVR + \ "&IncludeSellForCustomer=" + IncludeSellForCustomer + \ "&IncludeWithoutVehicleRegistrationTax=" + \ IncludeWithoutVehicleRegistrationTax + \ "&IncludeLeasing=" + IncludeLeasing + "&Cartypes=" + \ Cartypes + "&HpFrom=" + HpFrom + "&HpTo=" + HpTo + "&page=" + page conn.request("GET", query) res = conn.getresponse() content = res.read() parsed_html = BeautifulSoup(content, from_encoding='utf8') # in case of database error except mdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit(1)
def parse_and_execute_actions(args): if args.action == 'none': logging.info('Nothing to do, leaving now...') return elif args.action == 'create-config': if args.default: config.create_default_config(args.config) elif args.interactive: config.create_config_interactively(args.config) else: logging.warning('No way to create the config file was specified. ' 'Choose --default or --interactive.') return config.validate_config(args.config) cfg = config.load_config(args.config) if args.action == 'setup-database': database.setup_database(cfg) return db, cursor = database.connect_to_database(cfg) if not project.is_machine_registered(cfg, db, cursor): project.register_render_machine(cfg, db, cursor) if args.action == 'start': if not project.check_if_project_name_valid(args): logging.error('Project name "{}" is not a valid choice!'.format( args.project_name)) elif project.check_if_project_name_taken(args, db, cursor): logging.error('Project name "{}" is already taken!'.format( args.project_name)) else: project.start_project(args, cfg, db, cursor) elif args.action == 'cancel': # TODO add security check if args.project: project.cancel_project(args, cfg, db, cursor) elif args.frames: project.cancel_frames(args, cfg, db, cursor) else: logging.warning( 'Missing additional parameter, specify whether the project (--project) or frames ' '(--frames <...>) should be deleted.') elif args.action == 'finish': project.finish_project(args, cfg, db, cursor) elif args.action == 'check': # status report if not project.check_if_project_name_valid(args): logging.error('Project name "{}" is not a valid choice!'.format( args.project_name)) elif not project.check_if_project_name_taken(args, db, cursor): logging.error('Project name "{}" does not exist!'.format( args.project_name)) else: project.get_project_info(args, cfg, db, cursor) if args.history: pass elif args.frame_list: print(project.get_project_frame_list(args, cfg, db, cursor)) elif args.action == 'render': if args.one_batch: project.render_frames(args, cfg, db, cursor) elif args.some_batches > 0: for _ in range(args.some_batches): project.render_frames(args, cfg, db, cursor) else: while project.has_project_open_frames(args, cfg, db, cursor): project.render_frames(args, cfg, db, cursor) elif args.action == 'free': if args.free_failed: project.free_failed_frames(args, cfg, db, cursor) elif args.free_waiting: project.free_waiting_frames(args, cfg, db, cursor) database.close_connection(db, cursor)
from semantic import load_corpus, load_lsi, cosine from semantic import update_vector, important_words from assets import gather_assets # If necessary, download the trained # model from Amazon S3 gather_assets() # Load the semantic models dictionary, corpus, tfidf, bar_idx_map, idx_bar_map = load_corpus() lsi, corpus_lsi_tfidf, lsi_index = load_lsi() # Connect to the db mongo_db, mongo_connection = connect_to_database(table_name="barkov_chain") app = Flask(__name__) @app.route('/') def index(): return render_template('index.html') @app.route('/slides') def slides(): return render_template('slides.html') @app.route('/api/initial_location', methods=['POST'] ) def api_initial_location(): """ Take lat/lon coordinates and return
def tearDown(self): """ Delete the "fake" table to clean up the database. """ cur, con = database.connect_to_database() database.query(cur, "DROP TABLE AllCars010115")
def setUp(self): """ Set up a "fake" newest table to test on. """ cur, con = database.connect_to_database() database.query( cur, "CREATE TABLE AllCars010115(Column1 INT, Column2 INT)")
FROM files INNER JOIN devices ON files.device_id=devices.id WHERE files.type='{type}' AND devices.name='{device_name}' GROUP BY files.type, files.subtype; """.format(type=args.filetype, device_name=args.device_name) sql_select_examples = """ SELECT relpath FROM files INNER JOIN devices ON files.device_id=devices.id WHERE files.type=? AND files.subtype=? AND devices.name='{device_name}' LIMIT 3; """.format(device_name=args.device_name) try: conn = connect_to_database(db_url=args.db_url) cur = conn.execute(sql_select_subtypes) for c in cur: print("type: '{}' subtype: '{}' count: {}".format(*c)) cursor = conn.execute(sql_select_examples, c[:2]) for c1 in cursor: print('\t{}'.format(c1[0])) finally: conn.close()
def main(): parser = argparse.ArgumentParser(description='Scrape data from NYMag.') parser.add_argument('--match', '-m', dest='match', type=int, default=None, help='Find the closest matching foursquare location to a nymag location') parser.add_argument('--tips', '-t', dest='tips', type=int, default=None, help='Add foursquare tips to the database') args = parser.parse_args() # Get the mongo database db, connection = connect_to_database() # Get the foursquare api try: api = get_api() except KeyError: print "Cannot get foursquare info from environment" print "Ensure that your environment is properly setup by sourcing 'setup.sh'" return 255 auth_uri = api.oauth.auth_url() num_to_match = args.match if num_to_match != None: match_foursquare_id(db, api, num_to_match) tips_to_get = args.tips if tips_to_get != None: add_tips_to_db(db, api, tips_to_get) return ############## matches = get_foursquare_id(api, "art bar", longitude=-74.00355, latitude=40.738491) for match in matches: print match return #venues = get_nearby_venues(api,"40.728625","73.997684") #venues = get_nearby_venues(api,"44.3","37.2") venues = get_nearby_venues(api, 40.7, -74) print venues return for venue in venues: print venue return params = {} params['query'] = 'coffee' params['near'] = 'New York' params['intent'] = 'browse' params['limit'] = 15 response = client.venues.search(params=params) geocode = response[u'geocode'] venues = response[u'venues'] for key, val in geocode['feature'].iteritems(): print key, val return for venue in venues: print venue for key, val in venue.iteritems(): print key, val