Пример #1
0
def add_market():
    root = tkinter.Tk()
    root.withdraw()
    path = filedialog.askdirectory(parent=root,
                                   initialdir="/",
                                   title='请选择上传产品目录')
    if path:
        name = os.path.basename(path)
        path = path.replace('/', '\\')
        market = {'name': name, 'directory': path}
        markets = current_app.data.markets

        if market['name'] not in markets:
            markets[market['name']] = market
            JSON.serialize(markets, '.', 'storage', 'markets.json')
            return market
        else:
            msg = {
                'type':
                'warning',
                'content':
                'The Market of ' + market.name + ' was already in system!'
            }
            emit('notify', msg, room=request.sid)
            return
    else:
        msg = {
            'type': 'primary',
            'content': 'No directory of market was selected.'
        }
        emit('notify', msg, room=request.sid)
        return
Пример #2
0
    def serialize(self, visitors):
        dates = {}
        for v in visitors:
            date = v['date']
            if date not in dates:
                dates[date] = []
            dates[date].append(v)

        for d in dates:
            fn = 'visitors_'+d+'.json'
            JSON.serialize(dates[d], self.confi_dir, 'visitors', fn)
Пример #3
0
 def save_tracking_ids(self):
     fn = 'inquiry_tracking_ids_' + self.lname.split(' ')[0] + '.json'
     root = self.market['directory'] + '_config'
     tracking_ids = {}
     for key in self.tracking_ids:
         tracking_ids[key] = {}
         tracking_ids[key]['datetime'] = self.tracking_ids[key][
             'datetime'].to_atom_string()
         tracking_ids[key]['status'] = self.tracking_ids[key]['status']
         if 'emails' in self.tracking_ids[key]:
             tracking_ids[key]['emails'] = self.tracking_ids[key]['emails']
     JSON.serialize(tracking_ids, root, [], fn)
Пример #4
0
def get_alibaba(node):

    if app_data['alibaba'] is not None:
        return app_data['alibaba']

    text = node.split('@')[0]
    market_name = text.split(':')[0].split('[')[0]
    lname = text.split(':')[1] if len(text.split(':')) == 2 else None
    market = JSON.deserialize('.', 'storage', 'markets.json')[market_name]

    if lname is None or lname == market['lname']:
        app_data['alibaba'] = Alibaba(market['lid'],
                                      market['lpwd'],
                                      headless=headless,
                                      browser=get_browser())
        # app_data['browser'] = app_data['alibaba'].browser
    else:
        for account in market['accounts']:
            print(lname, account)
            if lname in account['lname']:
                app_data['alibaba'] = Alibaba(alibaba['lid'],
                                              market['lpwd'],
                                              headless=headless,
                                              browser=get_browser())
                # app_data['browser'] = app_data['alibaba'].browser
    app_data['alibaba'].login()
    return app_data['alibaba']
Пример #5
0
def get_products(market, paths):
    root = market['directory']
    path = os.path.join(root, *paths)
    folders = [
        d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))
    ]
    files = {}
    for folder in folders:
        files[folder] = os.listdir(os.path.join(path, folder))

    attrs = {}
    root_config = (market['directory'] + '_config')
    path_config = os.path.join(root_config, *paths)
    if os.path.exists(path_config):
        folders_config = os.listdir(path_config)
        for folder in folders_config:
            if not os.path.exists(os.path.join(path_config, folder)):
                continue
            if os.path.isfile(os.path.join(path_config, folder)):
                continue
            files_config = os.listdir(os.path.join(path_config, folder))
            for file in files_config:
                if not file.endswith('_attributes.json'):
                    continue
                pid = file.split('_')[0]
                ps = paths[:]
                ps.append(folder)
                attrs[folder + '_' + pid] = JSON.deserialize(
                    root_config, ps, file)

    return dict(folders=folders, files=files, attributes=attrs)
Пример #6
0
def get_visitor(node):

    if 'visitor' in app_data:
        return app_data['visitor']

    text = node.split('@')[0]
    market_name = text.split(':')[0].split('[')[0]
    lname = text.split(':')[1] if len(text.split(':')) == 2 else None
    market = JSON.deserialize('.', 'storage', 'markets.json')[market_name]

    if lname is None or lname == market['lname']:
        app_data['visitor'] = Visitor(market,
                                      headless=headless,
                                      browser=get_browser())
        # app_data['browser'] = app_data['visitor'].browser
    else:
        for account in market['accounts']:
            print(lname, account)
            if lname in account['lname']:
                app_data['visitor'] = Visitor(market,
                                              account,
                                              headless=headless,
                                              browser=get_browser())
                # app_data['browser'] = app_data['visitor'].browser
    return app_data['visitor']
Пример #7
0
def remove_market(market):
    markets = JSON.deserialize('.', 'storage', 'markets.json')
    if market['name'] in markets:
        del markets[market['name']]
        JSON.serialize(markets, '.', 'storage', 'markets.json')
        return True
    else:
        msg = {
            'type':
            'warning',
            'content':
            'Market ' + market['name'] +
            ' was not found. Try Refesh Your Browser!'
        }
        emit('notify', msg, room=request.sid)
        return False
Пример #8
0
def get_market(name):
    markets = JSON.deserialize('.', 'storage', 'markets.json')
    if name in markets:
        market = markets[name]
    else:
        market = None
    return market
Пример #9
0
def get_p4p(node):

    if app_data['p4p'] is not None:
        return app_data['p4p']

    text = node.split('@')[0]
    market_name = text.split(':')[0].split('[')[0]
    lname = text.split(':')[1] if len(text.split(':')) == 2 else None
    market = JSON.deserialize('.', 'storage', 'markets.json')[market_name]

    if lname is None or lname == market['lname']:
        lid = market['lid']
        lpwd = market['lpwd']
        lname = market['lname']
        app_data['p4p'] = P4P(market,
                              lid,
                              lpwd,
                              broker_url=app.conf.broker_url,
                              browser=get_browser(),
                              headless=headless)
        # app_data['browser'] = app_data['p4p']
    else:
        for account in market['accounts']:
            if lname in account['lname']:
                lid = account['lid']
                lpwd = account['lpwd']
                app_data['p4p'] = P4P(market,
                                      lid,
                                      lpwd,
                                      broker_url=app.conf.broker_url,
                                      browser=get_browser(),
                                      headless=headless)
                # app_data['browser'] = app_data['p4p']

    return app_data['p4p']
Пример #10
0
def settings():
    markets = JSON.deserialize('.', 'storage', 'markets.json')
    if markets is None:
        markets = {}
    response = make_response(
        render_template('settings/index.html', markets=markets))
    return response
Пример #11
0
 def deserialize(self):
     files = os.listdir(self.visitor_dir)
     visitors = []
     for f in files:
         if not f.startswith('visitors_') or not f.endswith('.json'):
             continue
         visitors += JSON.deserialize(self.confi_dir, 'visitors', f)
     return visitors
Пример #12
0
    def crawl_product_ranking(self, keyword, pages):
        self.current_page = 0
        records = []
        print(keyword, end=': ')

        while self.current_page < pages:
            html = self.next_page(keyword)
            self.crawl_current_page(html, records=records)
        print('length:', len(records), end=', ')
        print('done!')

        obj = {
            'datetime': pendulum.now().to_datetime_string(),
            'records': records
        }
        JSON.serialize(obj, self.market['directory'] + '_config',
                       'products_ranking', keyword + '.json')
        return obj
Пример #13
0
 def load_keywords(self, tp):
     if not tp:
         return
     else:
         fn = 'p4p_keywords_list_'+tp+'.json'
         root = self.market['directory'] + '_config'
         kws_list = JSON.deserialize(root, [], fn)
         if kws_list is None:
             kws_list = []
         self.keywords_list[tp] = kws_list
Пример #14
0
def deserialize(market, paths, filename, shallow=False):
    root = (market['directory'] + '_config')

    if shallow:
        return JSON.deserialize(root, paths, filename)

    objects = []
    while True:
        objects.append(JSON.deserialize(root, paths[:], filename))

        if len(paths) == 0:
            break

        if len(paths) and paths[-1].lower().endswith(
                ' serie') and '_' in filename:
            filename = filename.split('_')[1]
        else:
            paths.pop()

    return objects
Пример #15
0
def markets(name):
    markets = JSON.deserialize('.', 'storage', 'markets.json')
    if markets is None:
        markets = {}
        market = None
    else:
        market = markets[name]

    response = make_response(
        render_template('markets/index.html', markets=markets, market=market))
    return response
Пример #16
0
def get_products_ranking(node):

    if app_data['products_ranking'] is not None:
        return app_data['products_ranking']

    text = node.split('@')[0]
    market_name = text.split(':')[0].split('[')[0]
    market = JSON.deserialize('.', 'storage', 'markets.json')[market_name]

    pr = ProductsRanking(market=market)
    app_data['products_ranking'] = pr

    return pr
Пример #17
0
    def check_balance(self):
        balance = self.browser.find_element_by_css_selector(
            '.sc-manage-edit-price-dialog span[data-role="span-balance"]').text

        if self.broker_url:
            self.balance = self.redis.getset(self.market['name']+'_p4p_balance', balance)

            if float(balance) == self.initial_balance or self.balance is None:
                return
            else:
                self.balance = self.balance.decode()
                if self.balance != balance:
                    diff = format(float(self.balance) - float(balance), '.2f')

                    changes = 0 - float(diff)
                    sub_budget = self.redis.get(self.market['name'] + '_p4p_sub_budget')
                    if sub_budget is not None and float(sub_budget) > 0:
                        if self.redis.incrbyfloat(self.market['name'] + '_p4p_sub_budget', changes) < 0:
                            self.redis.set(self.market['name'] + '_p4p_sub_budget_overflow', True)

                    time_str = arrow.now().format('YYYY-MM-DD HH:mm:ss')
                    date_str = time_str.split(' ')[0]
                    root = self.market['directory'] + '_config'
                    fn = 'p4p_balance_change_history_' + date_str + '.json.gz'
                    JSON.serialize([time_str, diff], root, [], fn, append=True)
        else:
            if self.balance is None:
                self.balance = balance
            elif float(balance) == self.initial_balance:
                return
            elif self.balance != balance:
                diff = format(float(self.balance) - float(balance), '.2f')
                self.balance = balance

                time_str = arrow.now().format('YYYY-MM-DD HH:mm:ss')
                date_str = time_str.split(' ')[0]
                root = self.market['directory'] + '_config'
                fn = 'p4p_balance_change_history_'+date_str+'.json.gz'
                JSON.serialize([time_str, diff], root, [], fn, append=True)
Пример #18
0
    def crawl_keywords(self):
        keywords = []
        with self.lock:
            self.load_url()

            while True:
                html = self.browser.find_element_by_css_selector("div.keyword-manage .bp-table-main-wraper>table tbody").get_attribute('outerHTML')
                tbody = pq(html)
                trs = tbody.find('tr')

                for tr in trs:
                    kws = {}
                    kws['id'] = pq(tr).find('td:first-child input').val()
                    kws['status'] = pq(tr).find('td.bp-cell-status .bp-dropdown-main i').attr('class').split('-').pop()
                    kws['kws'] = pq(tr).find('td.bp-cell-left').text().strip()
                    groups = pq(tr).find('td[data-role="table-col-tag"]').text().strip()
                    kws['my_price'] = pq(tr).find('td:nth-child(5) a').text().strip()
                    kws['average_price'] = pq(tr).find('td:nth-child(6)').text().strip()
                    string = pq(tr).find('span.qs-star-wrap i').attr('class')
                    kws['match_level'] = re.search('qsstar-(\d+)',string).group(1)
                    string = pq(tr).find('.bp-icon-progress-orange').html()
                    kws['search_count'] = re.search(':(\d+%)',string).group(1)
                    string = pq(tr).find('.bp-icon-progress-blue').html()
                    kws['buy_count'] = re.search(':(\d+%)',string).group(1)
                    for grp in groups.split(','):
                        group = grp.strip()
                        obj = kws.copy()
                        obj['group'] = group
                        keywords.append(obj)

                if not self.next_page():
                    break

        root = self.market['directory'] + '_config'
        fn = 'p4p_keywords_list.json'
        JSON.serialize(keywords, root, [], fn)
        return keywords
Пример #19
0
    def __init__(self, market, account=None, headless=True, browser=None):
        self.market = market
        self.account = account
        self.lid = account['lid'] if account else market['lid']
        self.lpwd = account['lpwd'] if account else market['lpwd']
        self.lname = account['lname'] if account else market['lname']
        self.mobile = account['mobile'] if account else market['mobile']

        self.account = {}
        self.account['lid'] = self.lid
        self.account['lpwd'] = self.lpwd
        self.account['lname'] = self.lname
        self.account['mobile'] = self.mobile

        self.confi_dir = market['directory'] + '_config'
        self.visitor_dir = self.confi_dir + '//'+'visitors'

        self.alibaba = None
        self.browser = browser
        self.headless = headless

        self.products = {}
        root = market['directory']+'_config'
        product_list = JSON.deserialize(root, '.', 'product_list.json')
        for p in product_list:
            self.products[p['ali_id']] = p

        rp = {}
        rp['mink eyelash'] = {'ali_id': '60761530720', 'price': 1.5}
        rp['silk eyelash'] = {'ali_id': '60795757606', 'price': 0.8}
        rp['magnetic eyelash'] = {'ali_id': '60763607812', 'price': 1.1}
        rp['individual eyelash'] = {'ali_id': '60732345735', 'price': 2.3}
        rp['flat eyelash'] = {'ali_id': '60764332933', 'price': 3.9}
        rp['premade fans glue bonded'] = {'ali_id': '60762376749', 'price': 0.9}
        rp['premade fans heat bonded'] = {'ali_id': '60795873604', 'price': 1.9}

        self.recommended = {}
        self.recommended['mink eyelash'] = [rp['mink eyelash'], rp['silk eyelash'], rp['magnetic eyelash']]
        self.recommended['silk eyelash'] = [rp['silk eyelash'], rp['mink eyelash'], rp['magnetic eyelash']]
        self.recommended['magnetic eyelash'] = [rp['magnetic eyelash'], rp['mink eyelash'], rp['silk eyelash']]
        self.recommended['flat eyelash'] = [rp['flat eyelash'], rp['individual eyelash'], rp['premade fans glue bonded'], rp['premade fans heat bonded']]
        self.recommended['individual eyelash'] = [rp['individual eyelash'], rp['flat eyelash'], rp['premade fans glue bonded'], rp['premade fans heat bonded']]
        self.recommended['glue bonded'] = [rp['premade fans glue bonded'], rp['premade fans heat bonded'], rp['individual eyelash'], rp['flat eyelash']]
        self.recommended['heat bonded'] = [rp['premade fans heat bonded'], rp['premade fans glue bonded'], rp['individual eyelash'], rp['flat eyelash']]
        self.recommended['premade fans'] = self.recommended['glue bonded']
        self.recommended['default'] = self.recommended['mink eyelash'] + self.recommended['individual eyelash']

        self.mail_message = "Hi,\\nNice Day. This is Ada.\\nThanks for your visit to our products.\\nWould you pls tell us your WhatsApp number? I would like to send our product catalog and price list to you. Thanks\\nMy WhatsApp  is +8618563918130.\\n\\nRegards\\nAda"
Пример #20
0
 def load_tracking_ids(self):
     fn = 'inquiry_tracking_ids_' + self.lname.split(' ')[0] + '.json'
     root = self.market['directory'] + '_config'
     tracking_ids = JSON.deserialize(root, [], fn)
     self.tracking_ids = {}
     if tracking_ids is not None:
         for key in tracking_ids:
             d = pendulum.parse(tracking_ids[key]['datetime'])
             if d.diff().in_hours() <= 24:
                 self.tracking_ids[key] = {}
                 self.tracking_ids[key]['datetime'] = d
                 self.tracking_ids[key]['status'] = tracking_ids[key][
                     'status']
                 if 'emails' in tracking_ids[key]:
                     self.tracking_ids[key]['emails'] = tracking_ids[key][
                         'emails']
Пример #21
0
def get_products_rankings(market, keywords):

    root = market['directory'] + '_config'
    products_rankings_dir = root + '//' + 'products_ranking'

    products_rankings = []
    for kw in keywords:
        file = products_rankings_dir + '//' + kw + '.json'

        if not os.path.isfile(file):
            continue

        obj = JSON.deserialize(root, 'products_ranking', file)
        obj['keyword'] = kw
        products_rankings.append(obj)
    return products_rankings
Пример #22
0
def get_visitors(market, start_date='2018-08-01', end_date=None):

    start = pendulum.parse(start_date)
    if end_date:
        end = pendulum.parse(end_date)
    else:
        end = pendulum.now()

    root = market['directory'] + '_config'
    visitor_dir = root + '//' + 'visitors'
    files = os.listdir(visitor_dir)

    visitors = []
    for f in files:
        if not f.startswith('visitors_') or not f.endswith('.json'):
            continue

        dt = pendulum.parse(re.search('visitors_(.*).json', f).group(1))
        if start <= dt <= end:
            visitors += JSON.deserialize(root, 'visitors', f)
    return visitors
Пример #23
0
def create_app(debug=True):
    """Create an application."""
    app = Flask(__name__)
    app.debug = debug
    app.config['SECRET_KEY'] = 'gjr39dkjn344_!67#'

    from .main import main as main_blueprint
    app.register_blueprint(main_blueprint)

    socketio.init_app(app)

    if app.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true':
        return app

    data = SimpleNamespace()

    data.reserve_title_mutex = threading.Lock()
    #
    # load products
    #
    markets = JSON.deserialize('.', 'storage', 'markets.json')
    if not markets:
        markets = {}
    data.markets = markets

    chrome_options_headless = webdriver.ChromeOptions()
    # chrome_options_headless.add_argument('--headless')
    chrome_options_headless.add_argument('--disable-gpu')
    chrome_options_headless.add_argument('--disable-extensions')
    chrome_options_headless.add_argument('--disable-logging')
    chrome_options_headless.add_argument('--ignore-certificate-errors')
    data.chrome_options = chrome_options_headless

    data.alibaba = None
    app.data = data

    return app
Пример #24
0
 def add_keywords(self, tp, kws):
     self.keywords_list[tp].append(kws)
     fn = 'p4p_keywords_list_'+tp+'.json'
     root = self.market['directory'] + '_config'
     JSON.serialize(self.keywords_list[tp], root, [], fn)
Пример #25
0
def serialize(obj, market, paths, filename):
    root = market['directory'] + '_config'
    JSON.serialize(obj, root, paths, filename)
    return
Пример #26
0
def backgound_crawling_keywords(keyword, website, page_quantity, sid, socketio,
                                market):
    filename = 'keywords.json'
    root = market['directory'] + '_config'

    msg = {'type': "primary", 'content': "打开浏览器 ... ..."}
    socketio.emit('notify', msg, namespace='/markets', room=sid)

    chrome_options = webdriver.ChromeOptions()
    # chrome_options_headless.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--disable-extensions')
    chrome_options.add_argument('--disable-logging')
    chrome_options.add_argument('--ignore-certificate-errors')
    browser = webdriver.Chrome(chrome_options=chrome_options)

    if website == 'alibaba':
        crawler_name = re.sub(' ', '_',
                              keyword) + ' - ' + str(page_quantity) + '页 - 阿里'
        crawler = KwclrAlibaba(browser, keyword, page_quantity, sid, socketio)
    if website == 'alibaba_sp':
        supplier = re.search('https:\/\/([^\.]+)', keyword).group(1)
        category = 'all_products'
        if 'productgrouplist' in keyword:
            category = re.search('\/([^\/]+.html)', keyword).group(1)
        crawler_name = supplier + ' - ' + category + ' - ' + str(
            page_quantity) + '页 - 阿里(商家)'
        crawler = KwclrAliSp(browser, keyword, page_quantity, sid, socketio)
    if website == 'alibaba_sr':
        crawler_name = re.sub(
            '', '_', keyword) + ' - ' + str(page_quantity) + '页 - 阿里(橱窗)'
        crawler = KwclrAliSr(browser, keyword, page_quantity, sid, socketio)
    if website == 'amazon':
        crawler_name = re.sub(
            ' ', '_', keyword) + ' - ' + str(page_quantity) + '页 - Amazon'
        crawler = KwclrAmazon(browser, keyword, page_quantity, sid, socketio)

    msg = {'type': 'primary', 'content': "开始爬取 ... ..."}
    socketio.emit('notify', msg, namespace='/markets', room=sid)

    result = crawler.start()

    msg = {'type': "primary", 'content': "爬取结束,关闭浏览器 ... ..."}
    socketio.emit('notify', msg, namespace='/markets', room=sid)
    browser.quit()

    msg = {'type': "primary", 'content': "保存结果 ... ..."}
    socketio.emit('notify', msg, namespace='/markets', room=sid)
    obj = JSON.deserialize(root, [], filename)
    if not obj:
        obj = {}
    obj[crawler_name] = result
    JSON.serialize(obj, root, [], filename)

    socketio.emit('keyword_crawling_result', {
        'key': crawler_name,
        'result': result
    },
                  namespace='/markets',
                  room=sid)
    browser.quit()
Пример #27
0
def execute(market):
    print(market['name'])
    p4p = P4P(market, market['lid'], market['lpwd'])

    if market['name'] == 'Eyelashes':
        time.sleep(3)
        group = '直通车App'
    if market['name'] == 'Tools':
        group = '0直通车'

    p4p.monitor(group=group)
    time.sleep(30)
    p4p.turn_all_off(group=group)


if __name__ == '__main__':

    market_eyelash = JSON.deserialize('.', 'storage',
                                      'markets.json')['Eyelashes']
    market_tools = JSON.deserialize('.', 'storage', 'markets.json')['Tools']

    proc_eyelash = Process(target=execute, args=[market_eyelash])
    proc_eyelash.daemon = True
    proc_tools = Process(target=execute, args=[market_tools])
    proc_tools.daemon = True
    proc_eyelash.start()
    proc_tools.start()
    proc_eyelash.join()
    proc_tools.join()
    print('process is end')
Пример #28
0
 def set_keywords(self, tp, kws_list):
     self.keywords_list[tp] = kws_list
     fn = 'p4p_keywords_list_'+tp+'.json'
     root = self.market['directory'] + '_config'
     JSON.serialize(self.keywords_list[tp], root, [], fn)
Пример #29
0
 def save_crawling_result(self, keywords):
     root = self.market['directory'] + '_config'
     date_str = keywords[0][0].split(' ')[0]
     fn = 'p4p_keywords_crawl_result_'+date_str+'.json.gz'
     JSON.serialize(keywords, root, [], fn, append=True)
Пример #30
0
 def del_keywords(self, tp, kws):
     if kws in self.keywords_list[tp]:
         self.keywords_list[tp].remove(kws)
         fn = 'p4p_keywords_list_'+tp+'.json'
         root = self.market['directory'] + '_config'
         JSON.serialize(self.keywords_list[tp], root, [], fn)