Пример #1
0
    def getProcessedSet(self):

        # TODO: refactor - same code is in scrapeforum, move out to a DB object
        config = Configurator()
        db = config.getDb()

        connectionString = config.getDbConnectionString()
        engine = create_engine(connectionString, echo=(db['echo'] == 'True'))
        Session = sessionmaker(bind=engine)
        session = Session()
        # TODO - end

        return session.query(LatestTopic).filter_by(batch_id=self.batch_id)
Пример #2
0
    def __init__(self):
        try:
            # TODO: refactor - same code is in scrapeforum, move out to a DB object
            config = Configurator()
            db = config.getDb()

            connectionString = config.getDbConnectionString()
            engine = create_engine(connectionString, echo=(db['echo'] == 'True'))
            Session = sessionmaker(bind=engine)
            session = Session()
            # TODO - end

            # TODO add filter for 'success = 1'
            self.batch_id = session.query(func.max(LatestTopic.batch_id).label("max_batch_id")).one().max_batch_id
        except:
            # TODO: report error
            raise
        finally:
            engine.dispose()
Пример #3
0
    else:
        try:
            return BeautifulSoup(html.read())
        except Exception as e:
            logger.error("Parsing HTML failed. Error:\n" + repr(e))
            return None


#################
# Scraping body #
#################

# Read configuration
try:
    config = Configurator()
    db = config.getDb()
    URL =  config.getURL()["startURL"]
    logger.debug("URL is set to " + URL)
except KeyError:
    logger.fatal("Unable to read configuration from '../config/config.ini'. Make sure you are running the script from 'scrapeforum' folder.")

# TODO decide if it is needed or it's too much spam
# Send notification email about start
Mailer(constants.EMAIL_TYPE_NOTIFICATION).send("Scraping started", "Scraping started for " + URL + " at " + datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'))

# Connect to database
try:
    # Set up DB connection
    connectionString = config.getDbConnectionString()
    logger.debug("Trying to connect to DB with connectionString: " + connectionString)
    engine = create_engine(connectionString, echo=(db['echo'] == 'True'))