Пример #1
0
def parse(job, config):
    driver = None
    result = {
        'id': job['id'],
        'source_id': job['source_id'],
        'status': config.JS_FAILED,
        'content': '',
        'message': ''
    }
    try:
        url = job['url']
        prefix = url.split('//')[1].split('/')[0]
        if prefix in PARSERS:
            driver = util.create_chrome_driver()
            content = PARSERS[prefix](driver, url)  # Dispatch according to url
            result['content'] = content
            result['status'] = config.JS_FINISHED
        else:
            raise Exception('No parser for %s.' % url)
    except Exception as e:
        result['message'] = '%s\n%s' % (e, traceback.format_exc())
    finally:
        if driver:
            driver.quit()
        return result
Пример #2
0
def parse(jobs):
    result = []
    driver = util.create_chrome_driver()
    for job in jobs:
        result.append({
            'id': job['id'],
            'source_id': job['source_id'],
            'message': ''
        })
        try:
            url = job['url']
            prefix = url.split('//')[1].split('/')[0]
            if prefix in parsers:
                content = parsers[prefix](driver, url)
                result[-1]['status'] = js_finished
                result[-1]['content'] = content
            else:
                raise Exception('Parser not found for %s' % url)
        except Exception as e:
            result[-1]['status'] = js_failed
            result[-1]['message'] = '%s\n%s' % (e, traceback.format_exc())
    driver.quit()
    return result
Пример #3
0
# coding: utf-8

import sys

sys.path.append('../')
import util

prefixes = ['www.miumiu.com']


def parse(driver, url):
    products = []
    driver.get(url)
    elements = util.find_elements_by_css_selector(driver,
                                                  'div.product > div > a')
    for element in elements:
        products.append(element.get_attribute('href').strip())
    return ';'.join(products)


if __name__ == '__main__':
    driver = util.create_chrome_driver()
    print(parse(driver, sys.argv[1]))
    driver.quit()