def __init__(self):
        if getattr(sys, 'frozen', False):
            dir_ = os.path.dirname(sys.executable)
        else:
            dir_ = os.path.dirname(os.path.realpath(__file__))
        self.log_file = os.path.join(dir_, "log.txt")
        self.scrapy_log_file = os.path.join(dir_, "scrapy_log.txt")
        self.mysql_file = os.path.join(dir_, "mysql_setting.txt")

        self.logger = logging.getLogger("log")
        self.handler = logging.FileHandler(self.log_file)
        self.logger.addHandler(self.handler)
        self.logger.setLevel(logging.DEBUG)

        self.starttime = time.localtime(time.time())
        self.logger.info("\n\n---------------------------------\n%s" %
                         time.strftime('%Y-%m-%d %H:%M:%S', self.starttime))

        self.settings = get_project_settings()

        self.flag = 0  # 若spider在运行则为1,否则为0

        GlobalLogging.getInstance().setLoggingToHanlder(
            self.getLog)  # 初始化GlobalLogging的设置
        GlobalLogging.getInstance().setLoggingLevel(logging.INFO)
    def spider_closing(self, spider):
        GlobalLogging.getInstance().info("Spider closed: %s" % spider)
        self.crawler_list.remove(spider)

        if not self.crawler_list:
            self.flag = 0
            reactor.stop()
            try:
                self.conn.close()
            except Exception, e:
                GlobalLogging.getInstance().info(str(e))
示例#3
0
    def __init__(self):  #初始化,读取包含用户设置信息的文件
        if getattr(sys, 'frozen', False):
            dir_ = os.path.dirname(sys.executable)
        else:
            dir_ = os.path.dirname(os.path.realpath(__file__))

        with open(os.path.join(dir_, "setting.txt"), 'r') as f:
            self.text = f.readlines()

        for i in self.text:
            if i.strip():
                GlobalLogging.getInstance().info(i.strip())
示例#4
0
    def process_item(self, item,
                     spider):  #对爬取到的页面进行分类统计,其中的CrawledItem传给SavePipeline类进行下载

        if isinstance(item, PassItem):  #若页面是PassItem

            if self.pagecount_max != 0:  #设置“最大爬取页面数”不为0
                if self.pagecount == self.pagecount_max:  #若爬取页面数达到最大值
                    GlobalLogging.getInstance().info(
                        "[stop_pagecount] reach max pagecount : {0}".format(
                            self.pagecount))  #发消息停止spider
                    self.pagecount += 1  #使self.pagecount > self.pagecount_max,之后不再接收新的PassItem
                    raise DropItem("PassItem: %s" % item['url'])  #丢弃该item
                elif self.pagecount > self.pagecount_max or self.itemcount >= self.itemcount_max:  #若爬取页面数或抓取下载条目数超过最大值
                    raise DropItem("PassItem: %s" % item['url'])

            spider.linkmatrix.addentirelink(
                item['url'], item['referer'])  #记录到entire_struct_0字典对象中

            url = item['url'].strip('/')
            if url not in self.page_seen:  #判断item是否重复
                self.page_seen.add(url)
                self.pagecount += 1  #爬取页面数加1

            raise DropItem("PassItem: %s" % item['url'])  #丢弃该item

        elif isinstance(item, CrawledItem):  #若页面是CrawledItem

            if self.itemcount_max != 0:  #设置“最大抓取条目数”不为0
                if self.itemcount == self.itemcount_max:  #若抓取下载条目数达到最大值
                    GlobalLogging.getInstance().info(
                        "[stop_itemcount] reach max itemcount : {0}".format(
                            self.itemcount))  #发消息停止spider
                    self.itemcount += 1  #使self.itemcount > self.itemcount_max,之后不再接收新的CrawledItem
                    raise DropItem("Duplicate item found: %s" %
                                   item['url'])  #丢弃该item
                elif self.itemcount > self.itemcount_max or self.pagecount >= self.pagecount_max:  #若抓取下载条目数或爬取页面数超过最大值
                    raise DropItem("Duplicate item found: %s" % item['url'])

            spider.linkmatrix.addforwardlink(
                item['url'], item['referer'])  #记录到forwardlinks_0字典对象中

            url = item['url'].strip('/')
            if url not in self.item_seen:  #判断item是否重复
                self.item_seen.add(url)
                self.itemcount += 1  #爬取下载条目数加1
                return item
            else:
                raise DropItem("Duplicate item found: %s" %
                               item['url'])  #丢弃该item
示例#5
0
    def __init__(self, rule, contrl_conn, result_conn, stats_conn):
        self.rule = rule
        self.ctrl_conn = contrl_conn
        self.result_conn = result_conn
        self.stats_conn = stats_conn

        self.settings = get_project_settings()
        self.crawler = Crawler(self.settings)
        self.crawler.configure()

        self.crawler.signals.connect(
            self.stop, signal=signals.spider_closed)  #当spider终止时,自动调用stop函数

        self.spider = None

        GlobalLogging.getInstance().setLoggingToHanlder(
            self.getLog)  #初始化GlobalLogging的设置
        GlobalLogging.getInstance().setLoggingLevel(logging.INFO)
    def updateDatabase(self):
        conn_flag = 0  # 标记是否成功连接数据库
        number = 0  # 标记尝试连接数据库的次数, 上限为10次

        while 1:
            try:
                self.conn = pymysql.connect(host=self.host,
                                            port=self.port,
                                            user=self.user,
                                            passwd=self.password)  # 数据库连接
                cur = self.conn.cursor()
                conn_flag = 1
            except Exception as e:
                GlobalLogging.getInstance().info(str(e))
            if conn_flag:
                break
            elif number == 10:
                return False
            else:
                number += 1
                GlobalLogging.getInstance().info(
                    'Try connecting to mysql again')
                time.sleep(3)

        try:
            # 创建数据库
            cur.execute('CREATE DATABASE IF NOT EXISTS %s '
                        'DEFAULT CHARSET utf8 COLLATE utf8_general_ci' %
                        self.database)
            cur.execute('USE %s' % self.database)

            # 创建网页表
            sql = '''CREATE TABLE IF NOT EXISTS %s (
            id INT(11) NOT NULL AUTO_INCREMENT,
            eventid CHAR(14) DEFAULT NULL,
            publishedtime VARCHAR(30) DEFAULT NULL,
            typename VARCHAR(255) DEFAULT NULL,
            source VARCHAR(255) DEFAULT NULL,
            title VARCHAR(255) DEFAULT NULL,
            summary LONGTEXT,
            content LONGTEXT,
            url VARCHAR(255) DEFAULT NULL,
            crawledtime TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
            PRIMARY KEY (id))''' % self.webpages_table
            cur.execute(sql)
            self.conn.commit()

        except Exception, e:
            GlobalLogging.getInstance().info(str(e))
            return False
示例#7
0
    def process_item(self, item, spider):  #下载保存(抓取下载范围内的)页面
        try:  #try部分: 报错前的程序不回滚,即前两个计数器始终执行+1; 报错后的程序不执行
            self.index += 1
            self.page_count.setdefault(item['url'].strip('/'), self.index)
            GlobalLogging.getInstance().info(
                "[stats] scrapeditem : {0}".format(self.index))

            with open(self.getpath(title=item['title']), "w") as downpage:
                downpage.write(item['body'])

            self.success += 1

            spider.log('downloaded item from {0}'.format(item['url']), INFO)
            GlobalLogging.getInstance().info(
                u"[success] downloaded {0}\n         url: {1}".format(
                    item['title'], item['url']))
            GlobalLogging.getInstance().info(
                "[stats] downloaditem : {0}".format(self.success))
        except IOError as e:
            GlobalLogging.getInstance().info(
                u"[fail] download, {1}: {2}\n         url: {0}".format(
                    item['url'], e.strerror, e.filename))

        return item
示例#8
0
 def min_value(self, key, value, spider=None):
     super(SpiderStatsCollector, self).min_value(key, value, spider)
     GlobalLogging.getInstance().info("[stats]" +
                                      self._get_stats_key(spider, key) +
                                      ":" + str(self.get_value(key)))
示例#9
0
 def inc_value(self, key, count=1, start=0, spider=None):
     super(SpiderStatsCollector, self).inc_value(key, count, start, spider)
     GlobalLogging.getInstance().info("[stats]" +
                                      self._get_stats_key(spider, key) +
                                      ":" + str(self.get_value(key)))
示例#10
0
 def _set_value(self, key, value, spider):
     if isinstance(value, (int, float)) and key not in self.ignore_keys:
         k = self._get_stats_key(spider, key)
         GlobalLogging.getInstance().info("[stats]" + k + ":" + value)
示例#11
0
    def run(self):
        try:
            done = 0
            get_allconfig()
            GlobalLogging.getInstance().info('-' * 50)
            GlobalLogging.getInstance().info('Copying the ' + build_number +
                                             ' build to host.')

            release_package_full_path = os.path.join(
                '{rp}\{rpd}\ArchiveManagerInstaller.msi'.format(
                    rp=release_path, rpd=build_number))
            # Copy the latest mis packet from share path to host,if fail,it will retry 3 times
            i = 0
            while i < 3:
                copy_return = shell.SHFileOperation(
                    (0, shellcon.FO_COPY, release_package_full_path,
                     host_os_files_path_for_am,
                     shellcon.FOF_NOCONFIRMATION | shellcon.FOF_SILENT, None,
                     None))
                if copy_return[0] == 0:
                    GlobalLogging.getInstance().info(
                        "Copied the " + build_number + " build from " +
                        release_package_full_path + " to " +
                        host_os_files_path_for_am)
                    break
                i += 1
                GlobalLogging.getInstance().error(
                    "Copy error with error code:{error}".format(
                        error=copy_return))

            # new a VixHost instance
            _vm_host = VixHost()

            # open the dc vm
            GlobalLogging.getInstance().info('Opening the dc vm.')
            vm_dc = _vm_host.open_vm(dc_vmx_path)
            GlobalLogging.getInstance().info('Power on the dc vm.')
            vm_dc.power_on(launch_gui=True)
            GlobalLogging.getInstance().info('Power on the dc vm - Done.')
            # revert to a snapshot
            GlobalLogging.getInstance().info('Reverting to snapshot.')
            vm_dc_snapshot = vm_dc.snapshot_get_named(snapshot_name)
            vm_dc.snapshot_revert(snapshot=vm_dc_snapshot,
                                  options=VixVM.VIX_VMPOWEROP_LAUNCH_GUI)
            GlobalLogging.getInstance().info(
                'Reverted to snapshot Base - Done.')

            # Login to dc guest for guest operation
            GlobalLogging.getInstance().info('Waiting for tools.')
            vm_dc.wait_for_tools()
            GlobalLogging.getInstance().info('Login dc guest.')
            vm_dc.login(guest_login_name,
                        guest_login_password,
                        require_interactive=True)
            time.sleep(2.0)
            GlobalLogging.getInstance().info(
                'Login to dc guest for guest operation - Done.')

            # copy vix folder from host to guest for dc
            GlobalLogging.getInstance().info(
                'Copying vix folder from host to dc guest.')
            vm_dc.copy_host_to_guest(host_os_files_path_for_dc,
                                     guest_os_files_path)
            GlobalLogging.getInstance().info(
                'Copied vix folder from host to dc guest - Done.')

            # run program on am dc
            dc_program_name = program_run_on_dc.split(',')
            dc_program_block = not (program_run_on_dc_imme == str(True))
            for dc_program in dc_program_name:
                if dc_program is not '':
                    GlobalLogging.getInstance().info('Running program ' +
                                                     dc_program + '.')
                    vm_dc.proc_run(guest_os_files_path + "\\" + dc_program,
                                   None, dc_program_block)
                    GlobalLogging.getInstance().info('Running program ' +
                                                     dc_program + ' - Done.')

            # run script on am dc
            dc_script = script_run_on_dc.split(',')
            dc_scr_block = not (script_run_on_dc_imme == str(True))
            for dc_scr in dc_script:
                if dc_scr is not '':
                    GlobalLogging.getInstance().info('Running script ' +
                                                     dc_scr + '.')
                    vm_dc.run_script(dc_scr, None, dc_scr_block)
                    GlobalLogging.getInstance().info('Running script ' +
                                                     dc_scr + ' - Done.')

            # logout guest of dc
            vm_dc.logout()
            GlobalLogging.getInstance().info('Logout dc guest - Done.')

            # open the am vm
            GlobalLogging.getInstance().info('Opening the am vm.')
            vm_am = _vm_host.open_vm(am_vmx_path)
            GlobalLogging.getInstance().info('Power on the am vm.')
            vm_am.power_on(launch_gui=True)
            GlobalLogging.getInstance().info('Power on the am vm - Done.')
            # revert to a snapshot
            GlobalLogging.getInstance().info('Reverting to snapshot.')
            vm_am_snapshot = vm_am.snapshot_get_named(snapshot_name)
            vm_am.snapshot_revert(snapshot=vm_am_snapshot,
                                  options=VixVM.VIX_VMPOWEROP_LAUNCH_GUI)
            GlobalLogging.getInstance().info(
                'Reverted to snapshot Base - Done.')

            # Login to am guest for guest operation
            GlobalLogging.getInstance().info('Waiting for tools.')
            vm_am.wait_for_tools()

            GlobalLogging.getInstance().info('Login am guest.')
            vm_am.login(guest_login_name,
                        guest_login_password,
                        require_interactive=True)
            time.sleep(2.0)
            GlobalLogging.getInstance().info(
                'Login to am guest for guest operation - Done.')

            # copy vix folder from host to guest for am
            GlobalLogging.getInstance().info(
                'Copying vix folder from host to am guest.')
            vm_am.copy_host_to_guest(host_os_files_path_for_am,
                                     guest_os_files_path)
            GlobalLogging.getInstance().info(
                'Copy vix folder from host to am guest - Done.')

            # run program on am vm
            am_program_name = program_run_on_am.split(',')
            am_program_block = not (program_run_on_am_imme == str(True))
            for am_program in am_program_name:
                if am_program is not '':
                    GlobalLogging.getInstance().info('Running program ' +
                                                     am_program + '.')
                    vm_am.proc_run(guest_os_files_path + "\\" + am_program,
                                   None, am_program_block)
                    GlobalLogging.getInstance().info('Running program ' +
                                                     am_program + ' - Done.')

            # run script on am vm
            am_script = script_run_on_am.split(',')
            am_scr_block = not (script_run_on_am_imme == str(True))
            for am_scr in am_script:
                if am_scr is not '':
                    GlobalLogging.getInstance().info('Running script ' +
                                                     am_scr + '.')
                    vm_am.run_script(am_scr, None, am_scr_block)
                    GlobalLogging.getInstance().info('Running script ' +
                                                     am_scr + ' - Done.')

            # logout guest of am
            vm_am.logout()
            GlobalLogging.getInstance().info('Logout am guest - Done.')
            done = 1

        except Exception as ex:
            GlobalLogging.getInstance().exception("Catch a exception.")
        finally:
            _vm_host.disconnect()
            self.run_button.setEnabled(True)
            self.run_button.setText("Run")
            if done == 1:
                self._change_logcolour_signal.emit("green")
                GlobalLogging.getInstance().info(build_number +
                                                 " installed successfully.")
            else:
                self._change_logcolour_signal.emit("red")
                GlobalLogging.getInstance().info(build_number +
                                                 " install failed.")
示例#12
0
                                               'program_run_on_am_imme')
    script_run_on_am_imme = config_parser.get('config',
                                              'script_run_on_am_imme')


def get_release_package_dirs():
    global release_package_dirs
    release_path = config_parser.get('config', 'release_path')
    release_package_dirs = os.listdir(release_path)


if __name__ == '__main__':
    f = open(r'.\config\help.txt', "r")
    config_parser = configparser.ConfigParser()

    config_parser.read(r'.\config\myapp.conf')

    get_allconfig()
    get_release_package_dirs()
    host_os_files_path_for_dc = r".\vix\dc"
    host_os_files_path_for_am = r".\vix\am"
    guest_os_files_path = r"C:\vix"

    app = QApplication(sys.argv)
    main_window = MainWindow()
    main_window.show()
    GlobalLogging.getInstance().setLoggingToFile(r'./logs/log.txt')
    GlobalLogging.getInstance().setLoggingToQTextBrowserHanlder(main_window)
    GlobalLogging.getInstance().setLoggingLevel(logging.INFO)
    sys.exit(app.exec_())