Пример #1
0
def VisitGoodsPage(mongo_collection, driver, key, brand):
    # 初始化各个变量
    url = brand['original_url']

    try:
        driver.get(url)
    except WebDriverException as e:
        time.sleep(10)
        Logger.info('Error!' + str(e))
        driver.quit()
        driver = loginTmall.login_tmall()
        driver.get(url)
        time.sleep(random.uniform(2, 4))

    time.sleep(random.uniform(0.5, 1))

    print('准备访问商品页面')
    print('商品详细信息')

    time.sleep(random.uniform(2, 4))
    driver.execute_script("scrollTo(0,1000)")
    time.sleep(random.uniform(1, 2))
    driver.execute_script("scrollTo(0,5000)")
    time.sleep(random.uniform(1, 2))
    driver.execute_script("scrollTo(0,10000)")
    time.sleep(random.uniform(1, 2))
    # driver.execute_script("scrollTo(0,30000)")

    bs_obj = BeautifulSoup(driver.page_source, 'lxml')
    dealWith(mongo_collection, bs_obj, key, brand)


    print("done..")
Пример #2
0
class Login(object):
    logger = Logger().logger
    driver = None
    username = Location("用户名输入框", "input[type=text]")
    passwd = Location("密码输入框", "input[type=password]")
    submit = Location("登录按钮", "button[type=submit]")
    img = Location("后台管理系统果壳logo", '//*[@id="root"]/div/section/header/div[1]/div/span[1]/img', "XPATH")

    def __init__(self):
        # 启动driver
        try:
            self.driver = ChromeDriver()
            self.driver.maximize_window()
            self.driver.get(Config.url)
            self.driver.set_page_load_timeout(Config.TIMEOUT)
            self.driver.set_script_timeout(10)
        except Exception as e:
            Log.error("driver初始化失败....\n系统信息: {} \n浏览器类型: {}\n详细信息: {}".format(
                Config.system, Config.BROWSER, str(e)))
            if self.driver:
                self.driver.quit()
            raise Exception(e)

    def login(self):
        # 屏蔽登录部分
        self.driver.send(self.username, Config.USER)
        self.driver.send(self.passwd, Config.PWD)
        self.driver.click(self.submit)
        assert self.driver.exists(self.img), "登录失败, 未找到大后台左上角logo"
        assert self.driver.title == "GOOCKR CHARGING", \
            "打开GOOCKR CHARGING失败, 浏览器title不为'GOOCKR CHARGING',可能未进入GOOCKR CHARGING首页"
        return self.driver
Пример #3
0
class Login(object):
    logger = Logger().logger
    driver = None

    # username = Location("用户名输入框", "input[type=text]")
    # passwd = Location("密码输入框", "input[type=password]")
    # submit = Location("登录按钮", ".btn-login")
    # logo = Location("大后台管理系统图标", "#leftpanel .img")

    def __init__(self):
        # 启动driver
        try:
            self.driver = ChromeDriver()
            self.driver.maximize_window()
            self.driver.get(Config.url)
            self.driver.set_page_load_timeout(Config.TIMEOUT)
            self.driver.set_script_timeout(10)
        except Exception as e:
            Log.error("driver初始化失败....\n系统信息: {} \n浏览器类型: {}\n详细信息: {}".format(
                Config.system, Config.BROWSER, str(e)))
            if self.driver:
                self.driver.quit()
            raise Exception(e)

    def login(self):
        # 屏蔽登录部分
        # self.driver.send(self.username, Config.USER)
        # self.driver.send(self.passwd, Config.PWD)
        # self.driver.click(self.submit)
        # assert self.driver.exists(self.logo), "登录失败, 未找到大后台左上角logo"
        assert self.driver.title == "微软 Bing 搜索 - 国内版", \
            "打开bing失败, 浏览器title不为'微软 Bing 搜索 - 国内版',可能未进入bing首页"
        return self.driver
Пример #4
0
            auc = _eval(sess, model, test_len, fd, auc_op)
            logger.get_log().info(
                'Epoch %d DONE\tCost time: %.2f Train_loss: %.5f\tTest_loss: %.5f\tEval_AUC: %.5f'
                % (i, time.time() - start_time, loss_sum / batch_per_epoch,
                   auc['loss'], auc['auc']))

            start_time = time.time()
            sys.stdout.flush()

    print('*' * 16)
    print('best test_auc:', best_auc)
    res = best_auc
    best_auc = 0.0
    print('*' * 16)
    sys.stdout.flush()
    return res


if __name__ == '__main__':
    logger = Logger('fm_auc_diff')
    sample = '../Data/adult_uci/raw_adult_sample.tfrecord'
    config = '../Config/feature_adult.yaml'

    fdtmp, idxtmp, train_lentmp, test_lentmp = input_fn(sample,
                                                        config,
                                                        rand_col=None)

    auc_value = train_epochs(fdtmp, idxtmp, train_lentmp, test_lentmp)
    print('auc: ', auc_value)
Пример #5
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2020-07-13 20:35
# @Author  : 刘开
import os
import xlrd

from Tools.logger import Logger
logger = Logger(logger='execl').getlog()


class ExeclReader():
    def __init__(self, fielname, sheet):
        self.filename = fielname
        self.sheet = sheet

    def get_execl_name(self):

        #路径参数化
        base_dir = str(os.path.dirname(os.path.dirname(__file__)))  #返回路径名称
        base_dir = base_dir.replace('\\', '/')
        # 拼凑文件路径
        file_paht = base_dir + '/Data/' + self.filename
        logger.info('开始查找文件')

        #读取execl
        execl = xlrd.open_workbook(file_paht)
        #获取sheet页
        worksheet = execl.sheet_by_name(self.sheet)
        logger.info('通过sheet名称读取文件内容')
        #        读表头
Пример #6
0
text_logger, MODELS_FOLDER, SUMMARIES_FOLDER = save_context(
    __file__, KEY_ARGUMENTS)

torch.manual_seed(1234)
torch.cuda.manual_seed(1235)
np.random.seed(1236)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
FLAGS.device = device

nlabels = FLAGS.y_dist.dim
batch_size = FLAGS.training.batch_size

checkpoint_io = CheckpointIO(checkpoint_dir=MODELS_FOLDER)
logger = Logger(log_dir=SUMMARIES_FOLDER)

itr = inputs.get_data_iter(batch_size)
GNet, GOptim = inputs.get_generator_optimizer()
DNet, DOptim = inputs.get_discriminator_optimizer()
GNet_test = copy.deepcopy(GNet)
update_average(GNet_test, GNet, 0.0)
ydist = get_ydist(**vars(FLAGS.y_dist))
zdist = get_zdist(**vars(FLAGS.z_dist))

checkpoint_io.register_modules(GNet=GNet,
                               GOptim=GOptim,
                               DNet=DNet,
                               DOptim=DOptim)
checkpoint_io.register_modules(GNet_test=GNet_test)
Пример #7
0
    dataBase = mongo_conn['db_ysld']  # Database

    key_list = []
    for cat in urls_collections_config.keys():
        key_list.append(cat)

    for key in key_list:
        collection_name = urls_collections_config[key][1]
        collection = dataBase[collection_name]  # collection

        name = urls_collections_config[key][3]
        url = urls_collections_config[key][0]
        category_id = urls_collections_config[key][2]
        category = key
        brand = {}
        brand['category_id'] = category_id

        brand['id'] = ''
        brand['original_url'] = url
        brand['name'] = name
        brand['category'] = category
        brand['store_id'] = 1


        Logger.info(name)
        Logger.info(url)
        VisitGoodsPage(collection, driver, key, brand)
        time.sleep(random.uniform(8, 15))
    driver.quit()
    mongo_conn.close()
    output.close()
Пример #8
0
import base64
from functools import wraps
from selenium.common.exceptions import ElementNotVisibleException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from Tools.logger import Logger
from Tools.web_tool import Tools
from config import Config

Log = Logger().logger


def wait(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        Log.info("当前Page: {} 操作: {} 控件名: {}".format(
            args[1].file, func.__name__,
            "->".join([str(x) for x in args[1:]])))
        try:
            WebDriverWait(args[0], Config.TIMEOUT).until(
                EC.element_to_be_clickable(
                    (getattr(By, args[1].method), args[1].value)))
            # WebDriverWait(args[0], Config.TIMEOUT).until(
            #     lambda x: x.find_element(getattr(By, args[1].method), args[1].value).send_keys("1")
            # )
        except Exception:
            Log.error("等待元素超时! \n文件名: {}\n函数名: {}\n控件名: {}".format(
                args[1].file, func.__name__, args[1].name))
            assert 0, "等待元素超时! \n文件名: {}\n函数名: {}\n控件名: {}".format(
Пример #9
0
# @Time    : 2020-07-13 21:46
# @Author  : 刘开
import ast
from Tools.logger import Logger
import requests
import json
from Tools.config import Config
from Tools.execlReader import ExeclReader
from Tools.config import Config
from Tools.TimeGlobal import *
config = Config()
print(year_time())
cookies = ast.literal_eval(config.get_value('cookie.conf', 'cookies',
                                            'cookie'))
print(type(cookies))
loger = Logger('SendRequest').getlog()


class SendRequest():

    # 去掉初始化
    @staticmethod
    def request_api(host, url, method, data, cookie):
        test_url = host + url
        # 做一个没加http://的判断
        if not test_url.startswith('http://'):
            test_url = 'http://' + test_url

        # 封装请求方法
        try:
            if method == 'GET':
Пример #10
0
class ChromeDriver(base()):
    logger = Logger().logger

    def __init__(self):

        if Config.SYS == "linux":
            Browser.set_browser()
            # 创建虚拟桌面
            display = Display(visible=0, size=(1920, 1080))
            display.start()
            super(ChromeDriver, self).__init__()
        else:
            # 无头模式
            # op = chrome_op()
            # op.add_argument("--headless")
            if getattr(Config, "DRIVER_PATH", None) is None:
                Browser.set_browser()
            super(ChromeDriver, self).__init__(executable_path=getattr(Config, "DRIVER_PATH", None))

    # 更多方法查看根目录Selenium.jpg
    def get_element(self, ele):
        """
        获取element
        :param ele:
        :return:
        """
        return self.find_element(by=getattr(By, ele.method), value=ele.value)

    def get_elements(self, ele):
        """
        获取elements
        :param ele:
        :return:
        """
        return self.find_elements(by=getattr(By, ele.method), value=ele.value)

    @wait
    def send(self, ele, text):
        """
        向输入框输入文本
        :param ele:
        :param text:
        :return:
        """
        self.get_element(ele).send_keys(text)

    @wait
    def bounds(self, ele):
        """
        获取元素bounds
        :param ele:
        :return:
        """
        element = self.get_element(ele)
        location = element.location
        size = element.size
        bounds = (location["x"], location["y"], location["x"] + size["width"], location["y"] + size["height"])
        return bounds

    @wait
    def select(self, ele, text):
        """
        选择下拉框内文本
        :param ele:
        :param text:
        :return:
        """
        select = Select(self.get_element(ele))
        select.select_by_visible_text(text)

    @wait
    def click(self, ele):
        """
        点击元素
        :param ele:
        :return:
        """
        time.sleep(1)
        return self.get_element(ele).click()

    @wait
    def clear(self, ele):
        """
        清除输入框内容
        :param ele:
        :return:
        """
        return self.get_element(ele).clear()

    @wait
    def click_text(self, ele_list, name):
        """
        点击文本(如导航条和菜单)
        :param ele_list:
        :param name:
        :return:
        """
        elements = self.get_elements(ele_list)
        for e in elements:
            if e.text == name:
                e.click()
                break
        else:
            assert 0, "未找到{}中的文本: {}".format(ele_list.name, name)

    def switch_handle(self):
        """
        切换到新窗口
        :return:
        """
        handles = self.window_handles
        for hand in handles:
            if hand != self.current_window_handle:
                self.switch_to.window(hand)
        else:
            self.logger.info("没有可用的新窗口!")

    def switch_frame(self, ele):
        """
        进入iframe(可传webelement元素)
        :param ele:
        :return:
        """
        frame = self.get_element(ele)
        self.switch_to.frame(frame)

    def switch_back(self):
        """
        退出iframe, 返回主层
        :return:
        """
        self.switch_to.default_content()

    def alert_confirm(self):
        """
        在弹出的alert窗口中选择确认
        :return:
        """
        self.switch_to.alert().accept()

    def alert_refuse(self):
        """
        在弹出的alert窗口中选择取消
        :return:
        """
        self.switch_to.alert().dismiss()

    @wait
    def get_text(self, ele):
        """
        获取html标签的文本
        :param ele:
        :return:
        """
        return self.get_element(ele).text

    @wait
    def attr(self, ele, attribute):
        """
        获取html标签的属性
        :param ele:
        :param attribute:
        :return:
        """
        return self.get_element(ele).get_attribute(attribute)

    def set_attr(self, ele, attribute, value):
        """
        设置html标签的属性
        :param ele:
        :param attribute:
        :param value:
        :return:
        """
        ele = self.get_element(ele)
        self.execute_script("arguments[0].{}='{}';".format(attribute, value), ele)

    @wait
    def move(self, pos):
        """
        移动到某个元素
        :param pos: 元素定位
        :return:
        """
        positon = self.get_element(pos)
        # 移动到目的元素
        ActionChains(self).move_to_element(positon).perform()

    def exists(self, ele):
        try:
            WebDriverWait(self, Config.TIMEOUT).until(EC.visibility_of_element_located(
                (getattr(By, ele.method), ele.value)))
            return True
        except:
            return False

    def is_show(self, ele):
        return self.execute_script("return arguments[0].display;", self.get_element(ele))
def VisitGoodsPage(mongo_collection, driver, key, brand):
    # 初始化各个变量
    url = brand['original_url']

    try:
        driver.get(url)
    except WebDriverException as e:
        time.sleep(10)
        Logger.info('Error!' + str(e))
        driver.quit()
        driver = loginTmall.login_tmall()
        driver.get(url)
        time.sleep(random.uniform(2, 4))

    time.sleep(random.uniform(0.5, 1))
    # 判断是否跳入了验证码
    current_url = driver.current_url
    if 'https://sec.taobao.com' in current_url:
        collection_name = mongo_collection.name
        writeToCsv(url, brand, collection_name)
        time.sleep(random.uniform(2, 4))
        return
    # 判断是否没有商品
    if isNoItem(driver):
        return
    max_page = getMaxPage(driver)
    if max_page == 0:
        return
    Logger.info('最大页数:' + str(max_page))
    print('准备访问商品页面')
    print('商品详细信息')

    time.sleep(random.uniform(2, 4))
    driver.execute_script("scrollTo(0,1000)")
    time.sleep(random.uniform(1, 2))
    driver.execute_script("scrollTo(0,5000)")
    time.sleep(random.uniform(1, 2))
    driver.execute_script("scrollTo(0,10000)")
    time.sleep(random.uniform(1, 2))
    # driver.execute_script("scrollTo(0,30000)")

    bs_obj = BeautifulSoup(driver.page_source, 'lxml')
    dealWith(mongo_collection, bs_obj, key, brand)
    N = 2
    while N <= int(max_page):

        time.sleep(2)

        element = WebDriverWait(
            driver, 60).until(lambda driver: driver.find_element_by_xpath(
                "//a[@class='ui-page-s-next']"))
        element.click()
        time.sleep(2)
        driver.execute_script("scrollTo(0,1000)")
        time.sleep(1)
        driver.execute_script("scrollTo(0,5000)")
        time.sleep(1)
        driver.execute_script("scrollTo(0,10000)")
        time.sleep(1)
        # driver.execute_script("scrollTo(0,30000)")
        Logger.info(driver.current_url)

        # driver.execute_script("scrollTo(0,30000)")

        bs_obj = BeautifulSoup(driver.page_source, 'lxml')
        dealWith(mongo_collection, bs_obj, key, brand)
        # time.sleep(5)
        current_page = getCurrentPage(driver)
        Logger.info('完成当前页爬取:' + str(current_page))
        if int(current_page) == int(max_page):
            Logger.info(brand['original_name'])
            Logger.info('''
                                    #########################################################################
                                    |                            最大页数爬取完毕                               |
                                    #########################################################################
                                ''')
        N = int(current_page) + 1

    print("done..")
    cursors.close()
    return res


if __name__ == '__main__':

    driver = loginTmall.login_tmall()
    time.sleep(1)
    mongo_conn = connect_mongo(mongodb_host, mongodb_port, mongodb_username,
                               mongodb_password)
    dataBase = mongo_conn['power']  # Database

    url_list = []
    with open('fail_url.txt', 'r', encoding='utf-8') as fc:
        for i in fc:
            temp = i.split('/*/')
            url_list.append(temp)
        for url_item in url_list:
            collection_name = url_item[0].strip()
            collection = dataBase[collection_name]
            brand_str = url_item[1].replace('ObjectId', '')
            brand = eval(brand_str)

            Logger.info(brand['original_name'])
            Logger.info(brand['original_url'])
            VisitGoodsPage(collection, driver, brand['category'], brand)
            time.sleep(random.uniform(5, 20))
    driver.quit()
    mongo_conn.close()
    output.close()
Пример #13
0
class Suite(unittest.TestSuite):

    logger = Logger().logger

    def run(self, result, debug=False):
        topLevel = False

        if getattr(result, '_testRunEntered', False) is False:
            result._testRunEntered = topLevel = True

        for index, test in enumerate(self):
            retry = getattr(test, "retry", Config.RETRY)
            if result.shouldStop:
                break

            for i in range(1, retry + 2):
                if _isnotsuite(test):
                    self._tearDownPreviousClass(test, result)
                    self._handleModuleFixture(test, result)
                    self._handleClassSetUp(test, result)
                    # result._previousTestClass = test.__class__
                    if (getattr(test.__class__, '_classSetupFailed', False)
                            or getattr(result, '_moduleSetUpFailed', False)):
                        continue

                self.logger.info("用例: {}正在尝试第{}次运行!".format(
                    test.__class__.__name__, i))
                if not debug:
                    test(result)
                else:
                    test.debug()
                if i < retry + 1:
                    # 重试判断  这段写的很丑就别细看了,欢迎优化
                    error, fail = None, None
                    fail_id = [x.get("case_id") for x in result.failures]
                    error_id = [x.get("case_id") for x in result.errors]
                    if test.case_id in fail_id:
                        fail = fail_id.index(test.case_id)
                    if test.case_id in error_id:
                        error = error_id.index(test.case_id)
                    if error is not None and fail is not None:
                        break
                    elif error is not None:
                        self.logger.warning("用例: {} 第{}次失败 原因: {}".format(
                            test.__class__.__name__, i,
                            str(result.errors[error]['msg'])))
                        del result.errors[error]
                    elif fail is not None:
                        self.logger.warning("用例: {} 第{}次失败 原因: {}".format(
                            test.__class__.__name__, i,
                            str(result.failures[fail]['msg'])))
                        del result.failures[fail]
                    result._previousTestClass = test.__class__
                    continue
            if self._cleanup:
                self._removeTestAtIndex(index)

        if topLevel:
            self._tearDownPreviousClass(None, result)
            self._handleModuleTearDown(result)
            result._testRunEntered = False
        return result