Пример #1
0
    def scrape(self):
        db = Db()
        page = requests.get(self.URL)

        soup = BeautifulSoup(page.content, 'html.parser')
        results = soup.find(class_='container')

        questions = results.find_all(class_='card')
        for card in questions:
            title = card.find('h5').text.strip(' \t\n\r')
            rawlocation = card.find('h6').text.strip(' \t\n\r')
            # remove (maps) from rawlocation, split on first space
            rawlocation = rawlocation.strip('(Maps)')

            description = card.find('p',
                                    class_='card-text').text.strip(' \t\n\r')

            db.session.add(
                InitiativeImport(
                    category=title,
                    description=description,
                    group="demand",
                    source=self.URL,
                    location=rawlocation,
                ))

        db.session.commit()
Пример #2
0
    def scrape():
        db = Db()

        # read data
        response = requests.get(
            'https://api-server-271218.appspot.com/v1/tasks?zipcode=')
        result = json.loads(response.content)
        # print(result)

        questions = result['data']['tasks']
        for card in questions:
            db.session.add(
                InitiativeImport(
                    name=card['firstName'],
                    category=card['TaskType']['name'],
                    description=card['description'],
                    group="demand",
                    source=
                    'https://www.gewoonmensendiemensenwillenhelpen.nl/ik-wil-helpen',
                    source_id=card['id'],
                    location=card['zipcode'] + ' ' + card['city'],
                    frequency=card['when'],
                ))

        db.session.commit()
Пример #3
0
    def scrape(self):
        db = Db()
        for company in self.zorgheldenautos:
            db.session.add(
                InitiativeImport(
                    name=company,
                    group="zorgheldenauto",
                    source='https://www.auto.nl/zorgheldenauto',
                ))

        db.session.commit()
Пример #4
0
    def scrape(self):
        db = Db()
        counter = 1
        while counter > 0:
            # print(self.URL + str(counter))
            page = requests.get(self.URL + str(counter))
            soup = BeautifulSoup(page.content, 'html.parser')
            results = soup.find_all(class_='postpreview')

            if len(results) > 0:
                counter += 1
                for card in results:
                    try:
                        title = card.find(class_='heading3 heading3--semibold'
                                          ).text.strip(' \t\n\r')
                        name = card.find(class_='entity-content-title').text
                        description = card.find(
                            class_='paragraph').text.strip(' \t\n\r')
                        rawtheme = card.find(
                            class_='postpreview-subtitle').text
                        link = card.find(class_='postpreview-content')
                        final_link = link['href']
                        source_id = final_link.split('/')[-2]

                        db.session.add(
                            InitiativeImport(
                                name=name + " - " + title,
                                description=description,
                                group=rawtheme,
                                source=final_link,
                                source_id=source_id,
                            ))
                    except:
                        print(card)
                        pass
            else:
                counter = -1

        db.session.commit()
Пример #5
0
"""
Created on Mon Apr 20 14:11:31 2020

@author: jskro
"""
import datetime as dt
import re

from .TreeParser import TreeParser  # class for scraping static website
from models import InitiativeImport, Db

# initialize Db
db = Db()
"""
    steps
    1. get all initiativen from wijamsterdam.nl
    2. scrape each initiatief, collect in records
    3. insert records into db table 
"""
# Step 1
# get all "initiatieven" urls from wijamsterdam
# SCRAPER is defined by: url, schemas, metadata
url = 'https://wijamsterdam.nl/initiatieven'
# schemas: defines fields to be scraped
# schema: fieldname:{xpath,all,cast,transform}
schemas = {
    'initiatives': {
        'xpath': '//*[@class="tile-list ideas-list"]/div/a[@href]',
        'all': True,
        'transform': lambda elements: [e.attrib.values() for e in elements]
    }
Пример #6
0
def test_display_products():
    database = Db()
    display = Display()
    rec_prod = [
        (
            31,
            "Ice Tea saveur Pêche",
            "d",
            82,
            "Auchan",
            "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt",
        ),
        (
            32,
            "Ice Tea pêche",
            "d",
            82,
            "Auchan",
            "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt",
        ),
        (
            33,
            "Thé glacé pêche intense",
            "d",
            79,
            "Auchan",
            "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt",
        ),
        (
            34,
            "Thé infusé glacé, Thé noir parfum pêche blanche",
            "d",
            84,
            "Auchan",
            "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt",
        ),
        (
            35,
            "Thé vert infusé glacé saveur Menthe",
            "d",
            84,
            "Auchan",
            "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt",
        ),
        (
            36,
            "Thé noir évasion pêche & saveur hibiscus",
            "d",
            79,
            "Auchan",
            "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt",
        ),
        (
            37,
            "Thé glacé pêche intense",
            "d",
            79,
            "Auchan",
            "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt",
        ),
        (
            38,
            "FROSTIES",
            "d",
            1569,
            "Auchan",
            "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt",
        ),
        (
            39,
            "Sucre glace",
            "d",
            1674,
            "Auchan",
            "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt",
        ),
        (
            40,
            "fuze tea pêche intense (thé glacé)",
            "d",
            79,
            "Auchan",
            "https://fr-en.openfoodfacts.org/product/7622210601988/yahourt",
        ),
    ]
    prod_displayed = display.display_products(38, rec_prod)
    for prod in prod_displayed:
        assert prod[0] != 38
Пример #7
0
def test_get_infos_product():
    database = Db()
    rec_prod = database.get_infos_product(2)
    for prod in rec_prod:
        assert prod[6] == 2
Пример #8
0
def test_get_infos_category():
    DB_CONF["db"] = "off_base"
    database = Db()
    rec_cat = database.get_infos_category()

    assert rec_cat[1][1] == "Yaourts"
Пример #9
0
from sqlalchemy import exc
from models import Db

Payload.max_decode_packets = 20

load_dotenv()

app = Flask(__name__, static_folder='./build/static')
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
app.config['TEMPLATES_AUTO_RELOAD'] = True
app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('DATABASE_URL')
app.config['SQLALCHEMY_TRACK_MODIFICSTIONS'] = False

cors = CORS(app, resources={r"/*": {"origins": "*"}})

db = Db(app)

clients = []

socketio = SocketIO(
    app,
    cors_allowed_origins="*",
    json=json,
)


@app.route("/LoginorRegister", methods=["GET", "POST"])
def login():
    print("Login")
    data = json.loads(request.data.decode())
    return loginOrRegister(data)
Пример #10
0
 def __init__(self):
     self.database = Db()
     self.control = Control()