def scrape(self): db = Db() page = requests.get(self.URL) soup = BeautifulSoup(page.content, 'html.parser') results = soup.find(class_='container') questions = results.find_all(class_='card') for card in questions: title = card.find('h5').text.strip(' \t\n\r') rawlocation = card.find('h6').text.strip(' \t\n\r') # remove (maps) from rawlocation, split on first space rawlocation = rawlocation.strip('(Maps)') description = card.find('p', class_='card-text').text.strip(' \t\n\r') db.session.add( InitiativeImport( category=title, description=description, group="demand", source=self.URL, location=rawlocation, )) db.session.commit()
def scrape(): db = Db() # read data response = requests.get( 'https://api-server-271218.appspot.com/v1/tasks?zipcode=') result = json.loads(response.content) # print(result) questions = result['data']['tasks'] for card in questions: db.session.add( InitiativeImport( name=card['firstName'], category=card['TaskType']['name'], description=card['description'], group="demand", source= 'https://www.gewoonmensendiemensenwillenhelpen.nl/ik-wil-helpen', source_id=card['id'], location=card['zipcode'] + ' ' + card['city'], frequency=card['when'], )) db.session.commit()
def scrape(self): db = Db() for company in self.zorgheldenautos: db.session.add( InitiativeImport( name=company, group="zorgheldenauto", source='https://www.auto.nl/zorgheldenauto', )) db.session.commit()
def scrape(self): db = Db() counter = 1 while counter > 0: # print(self.URL + str(counter)) page = requests.get(self.URL + str(counter)) soup = BeautifulSoup(page.content, 'html.parser') results = soup.find_all(class_='postpreview') if len(results) > 0: counter += 1 for card in results: try: title = card.find(class_='heading3 heading3--semibold' ).text.strip(' \t\n\r') name = card.find(class_='entity-content-title').text description = card.find( class_='paragraph').text.strip(' \t\n\r') rawtheme = card.find( class_='postpreview-subtitle').text link = card.find(class_='postpreview-content') final_link = link['href'] source_id = final_link.split('/')[-2] db.session.add( InitiativeImport( name=name + " - " + title, description=description, group=rawtheme, source=final_link, source_id=source_id, )) except: print(card) pass else: counter = -1 db.session.commit()
""" Created on Mon Apr 20 14:11:31 2020 @author: jskro """ import datetime as dt import re from .TreeParser import TreeParser # class for scraping static website from models import InitiativeImport, Db # initialize Db db = Db() """ steps 1. get all initiativen from wijamsterdam.nl 2. scrape each initiatief, collect in records 3. insert records into db table """ # Step 1 # get all "initiatieven" urls from wijamsterdam # SCRAPER is defined by: url, schemas, metadata url = 'https://wijamsterdam.nl/initiatieven' # schemas: defines fields to be scraped # schema: fieldname:{xpath,all,cast,transform} schemas = { 'initiatives': { 'xpath': '//*[@class="tile-list ideas-list"]/div/a[@href]', 'all': True, 'transform': lambda elements: [e.attrib.values() for e in elements] }
def test_display_products(): database = Db() display = Display() rec_prod = [ ( 31, "Ice Tea saveur Pêche", "d", 82, "Auchan", "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt", ), ( 32, "Ice Tea pêche", "d", 82, "Auchan", "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt", ), ( 33, "Thé glacé pêche intense", "d", 79, "Auchan", "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt", ), ( 34, "Thé infusé glacé, Thé noir parfum pêche blanche", "d", 84, "Auchan", "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt", ), ( 35, "Thé vert infusé glacé saveur Menthe", "d", 84, "Auchan", "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt", ), ( 36, "Thé noir évasion pêche & saveur hibiscus", "d", 79, "Auchan", "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt", ), ( 37, "Thé glacé pêche intense", "d", 79, "Auchan", "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt", ), ( 38, "FROSTIES", "d", 1569, "Auchan", "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt", ), ( 39, "Sucre glace", "d", 1674, "Auchan", "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt", ), ( 40, "fuze tea pêche intense (thé glacé)", "d", 79, "Auchan", "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt", ), ] prod_displayed = display.display_products(38, rec_prod) for prod in prod_displayed: assert prod[0] != 38
def test_get_infos_product(): database = Db() rec_prod = database.get_infos_product(2) for prod in rec_prod: assert prod[6] == 2
def test_get_infos_category(): DB_CONF["db"] = "off_base" database = Db() rec_cat = database.get_infos_category() assert rec_cat[1][1] == "Yaourts"
from sqlalchemy import exc from models import Db Payload.max_decode_packets = 20 load_dotenv() app = Flask(__name__, static_folder='./build/static') app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 app.config['TEMPLATES_AUTO_RELOAD'] = True app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('DATABASE_URL') app.config['SQLALCHEMY_TRACK_MODIFICSTIONS'] = False cors = CORS(app, resources={r"/*": {"origins": "*"}}) db = Db(app) clients = [] socketio = SocketIO( app, cors_allowed_origins="*", json=json, ) @app.route("/LoginorRegister", methods=["GET", "POST"]) def login(): print("Login") data = json.loads(request.data.decode()) return loginOrRegister(data)
def __init__(self): self.database = Db() self.control = Control()