示例#1
0
 def __init__(self):
     clear()
     self.tip()
     self.log = MyLog()
     self.title = 'unknow'
     self.packageSize = 1024 * 1024
     self.mvPlayUrl = self.getMvPlayUrl()
示例#2
0
 def __init__(self, username, password):
     self.log = MyLog()  #获得打印日志对象
     self.username = username
     self.password = password
     self.driver = webdriver.Chrome()
     self.driver.implicitly_wait(5)  #静静等待10s
     self.isLogin = 0
     self.uid = ""
示例#3
0
class TestTime(object):
    def __init__(self):
        self.log = MyLog()
        self.testTime()
        self.testLocaltime()
        self.testSleep()
        self.testStrftime()

    def testTime(self):
        self.log.info(u'開始測試time.time()函數')
        print(u'現在時間戳為:time.time() = %f' % time.time())
        print(u'這裡返回的是一個浮點型的數值,它是從1970紀元後經過的浮點秒數')
        print('\n')

    def testLocaltime(self):
        self.log.info(u'開始測試time.localtime()函數')
        print(u'現在本地時間為:time.localtime() = %s' % time.localtime())
        print(u'這裡返回的是一個struct_time結構的元組')
        print('\n')

    def testSleep(self):
        self.log.info(u'開始測試time.sleep()函數')
        print(u'這是個計時器:time.sleep(5)')
        print(u'閉上眼睛數上5秒就可以了')
        time.sleep(5)
        print('\n')

    def testStrftime(self):
        self.log.info(u'開始測試time.strftime()函數')
        print(u'這個函數返回的是一個格式化的時間')
        print('time.strftime("%%Y-%%m-%%d %%X",time.localtime()) = %s' %
              time.strftime("%Y-%m-%d %X", time.localtime()))
        print('\n')
示例#4
0
class TestTime(object):
    def __init__(self):
        self.log=MyLog()
        self.testTime()
        self.testLocaltime()
        self.testSleep()
        self.testStrftime()
    def testTime(self):
        self.log.info(u'开始测试time.time()函数')
        print(u'当前时间戳为:time.time()=%f'%time.time())
        print(u'这里返回的是一个浮点型的数值,它是从1970纪元后经过的浮点秒数')
        print('\n')

    def testLocaltime(self):
        self.log.info(u'开始测试time.localtime()函数')
        print(u'当前本地时间为:time.localtime()= %s'%str(time.localtime()))
        print(u'这里返回的是一个struct_time结构的元组')
        print('\n')

    def testSleep(self):
        self.log.info(u'开始测试time.sleep()函数')
        print(u'这是个计时器:time.sleep(5)')
        print(u'闭上眼睛数上5s就可以')
        time.sleep(5)
        print('\n')

    def testStrftime(self):
        self.log.info(u'开始测试time.strftime()函数')
        print(u'这个函数返回的是一个格式化的时间')
        print(u'time.strftime("%%Y-%%m-%%d %%X",time.localtime())= %s'%time.strftime("%Y-%m-%d %X",time.localtime()))
        print('\n')
示例#5
0
	def __init__(self):
		clear()
		self.tip()
		self.log = MyLog()
		self.title = 'unknow'
		self.packageSize = 1024*1024
		self.mvPlayUrl = self.getMvPlayUrl()
示例#6
0
    def getFtpFileMain(self):
        while True:
            ml = MyLog()
            scanSN = ScanSernum()
            SN = scanSN.scanMain()
            snlist = []
            if "/" in SN:	#Input file
                with open(SN,'r') as snfile:
                   for asn in snfile.readlines():
                       if "FDO" in asn:
                           snlist.append(asn.strip())
            else:
                snlist.append(SN)
            myarea = ScanAreaType()
            logarea = myarea.askArea()
            logtype = myarea.asklogType().lower()
            if "&" in logtype:
                logtype = "FDO"   #Copy mview and mtype
            else:
                logtype = logtype[1:]   #the logtype finally value is type or view
	    logList = []
            for sn in snlist:
	            pcsn = GetPCSN()
	            pcsntuple = pcsn.getMain(sn)
	            childsn = pcsntuple[0]
	            parentsn = pcsntuple[1]
	            self.chDir()
	            getlogpath = GetLogPath()
	            childFolderList = getlogpath.getMain(parentsn,childsn,logarea)
	            input_str = '>>>>Start find SN: %s/%s test log in ftp server'%(childsn, parentsn)
	            ml.info(input_str)
	            print UseStyle(input_str, fore='blue')
	            for childFolder in childFolderList:
	                if "KFCR" in childFolder:
	                    KFCRYear = int(childFolder.split('/')[1])
	                    KFCRWeek = int(childFolder.split('/')[3][2:])
	                    #Rename the KFCR log name during copying it to ftp server on 2018/3/2
	                    if KFCRYear <= 2018 and KFCRWeek < 10 or KFCRYear == 2017:
	                        logtype = "FDO"    #Both mtype and mview will in zip file, eg:FDO2114B0KD_1491454226.zip
	                myftp = LoginFTP()
	                ftp = myftp.ftpserver()
                        ftp.cwd('/')	#Enter the FTP top folder
	                ftp.cwd(self.baseFtpFolder)
	                try:
	                    ftp.cwd(childFolder)
	                    for fileList in ftp.nlst():
	                        if (childsn in fileList or parentsn in fileList) and logtype in fileList:
	                            input_str = '--->Copy file:%s to %s'%(fileList,os.getcwd())
	                            print UseStyle(input_str, fore='blue')
	                            ml.info(input_str)
				    if not os.path.lexists(fileList):    #Log already exist in /usr/auto/testlog
	                                mydlfile = DLFtpFile()
	                                mydlfile.downloadFile(ftp, fileList)
	                            logList.append(fileList)
	                except Exception, err:	#The script still running although met error
	                    outStr = r"!!!No file in C:/Backup/BU3/%s in FTP server, ERR:%s"%(childFolder,err)
	                    print UseStyle(outStr, fore='black', back='yellow')
	                    if len(childFolderList) == 1: break
	                finally:
                            pass
示例#7
0
    def process_item(self, item, spider):
        m1 = MyLog()
        cityName = item['cityName'].encode('utf8')
        img = os.path.basename(item['img'])
        week = item['week'].encode('utf8')
        weather = item['weather'].encode('utf8')
        shidu = item['shidu'].encode('utf8')
        air = item['air'].encode('utf8')

        m1.info('进行mysql存储')
        
        conn = MySQLdb.connect(
            host='localhost',
            port=3306,
            user='******',
            password='******',
            db='scrapyDB',
            charset='utf8'
        )
        cur = conn.cursor()
        cur.execute("insert into weather(cityName,img,week,weather,shidu,air) values(%s,%s,%s,%s,%s,%s)", (cityName,img,week,weather,shidu,air))
        cur.close()
        conn.commit()
        conn.close()

        m1.info('mysql存储完成')
        return item
示例#8
0
 def process_item(self, item, spider):
     m1 = MyLog()
     today = time.strftime('%Y%m%d', time.localtime())
     fileName = 'weather' + today + '.json'
     m1.error('转换json开始')
     with codecs.open(fileName, 'a', encoding='utf8') as fp:
         line = json.dumps(dict(item), ensure_ascii=False) + '\n'
         fp.write(line)
     m1.warn('转换json结束')
     return item
示例#9
0
 def process_item(self, item, spider):
     m1 = MyLog()
     today = time.strftime('%Y%m%d', time.localtime())
     fileName = 'weather' + today + '.txt'
     m1.info('同步开始')
     with open(fileName, 'a') as fp:
         fp.write(item['cityName'].encode('utf-8') + '\t')
         fp.write(item['weather'].encode('utf-8') + '\t')
         imgName = os.path.basename(item['img'])
         fp.write(imgName + '\t')
         if os.path.exists(imgName):
             pass
         else:
             with open(imgName, 'wb') as fp:
                 response = urllib2.urlopen(item['img'])
                 fp.write(response.read())
         fp.write(item['shidu'].encode('utf-8') + '\t')
         fp.write(item['air'].encode('utf-8') + '\n\n')
         time.sleep(1)
     m1.info('同步结束')
     return item
示例#10
0
 def __init__(self):
     self.log=MyLog()
     self.testTime()
     self.testLocaltime()
     self.testSleep()
     self.testStrftime()
示例#11
0
def delete_proxy(self, proxy):
    requests.get("ht("
    t("http://127.0.0.1:5010/delete/?proxy={}".
    {}
    ".format(proxy))


def get_html(self, url):
    retry_count = 5
    proxy = self.get_pro_proxy()
    print(proxy)
    while retry_count > 0:
        try:
            html = requests.get(url(url, proxies={"http": "http://{}".format(proxy)})
            # 使用代理访问
            return rn
            html.text

        except Exception:
            retry_count -= 1
        # 出错5次, 删除代理池中代理
    self.delete_pro_proxy(proxy)
    self.get_htm_html(url)
logger = MyLog()
def get_html(url):
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        logger.error('get_html出错页面为: ' + url)
        return " ERROR when get html"
def get_content(url):
    print("当前爬取的网页为"+url)
    soup = BeautifulSoup(get_html(url),'lxml')
    try:
        forum_name = soup.find('p',attrs={'class':"nrbt"}).a.text
        print("当前爬取的论坛名为:"+forum_name)
    except:
        logger.error("get_forum_name出错"+"页面为"+url)
        forum_name = None
    try:
        time = soup.find('p',attrs={'class':"fbsj"}).text[4:]
        time = datetime.strptime(time,'%Y-%m-%d %H:%M')
        print("这个帖子的发表时间为:"+time)
    except:
        logger.error("get_publish_time出错" + "页面为" + url)
        time = None
    try:
        topic = soup.find('div',attrs={'class':"nr_r_c"}).find('p',attrs={'class':"contitle"}).text
        print("帖子的主题为:"+topic)
        topic = None
    except:
        logger.error("get_topic出错" + "页面为" + url)
        topic = None
    # all_neirong = []
    # for part in soup.find_all('div',attrs={"class":"neirong"}):
    #     # print(part.text)
    #     neirong_div = part.children
    #     neirong = ''
    #     try:
    #         neirong+=neirong.text
    #     except:
    #         None
    #     for i in neirong_div:
    #         try:
    #             img = i.find('img')
    #             if img:
    #                 neirong = neirong+'['+img.attrs['src'] + ']'
    #         except:
    #             None
    #             try:
    #                 text = i.text
    #                 # print('text'+text)
    #                 neirong = neirong+text
    #             except:
    #                 None
    #     # print("模块的内容为:"+neirong)
    #     if neirong != None:
    #         all_neirong.append(neirong)


    print(all_neirong)









# get_content("http://bbs.12365auto.com/postcontent.aspx?tID=47547&sId=1527&ppage=1&from=s")
get_content("http://bbs.12365auto.com/postcontent.aspx?tID=133692&sId=1147&ppage=1&from=s")
示例#12
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from myLog import MyLog

if __name__ == '__main__':
    ml = MyLog()
    ml.debug("I am the debug message")
    ml.info("I am the info message")
    ml.warn("I am the warn message")
    ml.error("I am the error message")
    ml.critical("I am the critical message")
示例#13
0
__doc__ = """The PythonDemo.py is to generate the reports of the config files which had been modified in the online services of the servers。This file use the PyH module to convert the text to the html.
"""
__author__ = "Wang Rui"
__version__ = 'v0.1.0'
__date__ = '2017-09-09'

import argparse
import sys
import os
import re
from myLog import MyLog
# def execAnsible():
#     status = os.system('sh ~/svnrepos/1.sh')

ml = MyLog()
host = []
serviceName = ""
modifyFiles = ""
modifyContentBefore = ""
modifyContentAfter = ""

content_before = '+<Server port="5211" shutdown="SHUTDOWN"> + <Connector port="5201" protocol="HTTP/1.1" + <Connector port="5221" protocol="AJP/1.3" redirectPort="8443" />'

content_after = '-<Server port="5210" shutdown="SHUTDOWN"> - <Connector port="5200" protocol="HTTP/1.1" - <Connector port="5220" protocol="AJP/1.3" redirectPort="8443" />'


def getPath():
    pass

示例#14
0
#!/usr/bin/env python
#-*- coding: utf-8 -*-
__author__ = 'hstking [email protected]'

from myLog import MyLog

if __name__ == '__main__':
    ml = MyLog()
    ml.debug('I am debug message')
    ml.info('I am info message')
    ml.warn('I am warn message')
    ml.error('I am error message')
    ml.critical('I am critical message')
示例#15
0
class DownloadYinyuetaiMv(object):
    def __init__(self):
        clear()
        self.tip()
        self.log = MyLog()
        self.title = 'unknow'
        self.packageSize = 1024 * 1024
        self.mvPlayUrl = self.getMvPlayUrl()

    def getMvPlayUrl(self):
        '''获取音乐台mv的播放地址 '''
        self.log.info('获取mv的播放地址')
        self.mvPlayUrl = raw_input(
            '输入音乐台中MV的播放地址\n如http://v.yinyuetai.com/video/615494:\n')
        self.checkMvPlayUrl(self.mvPlayUrl)

    def checkMvPlayUrl(self, url):
        '''检查输入的mv播放地址是否有效 '''
        self.log.info('检查mv播放地址')
        try:
            id = url.replace('http://v.yinyuetai.com/video/', '')
            idNum = int(id)
        except ValueError:
            self.log.error('输入的mv播放地址有误,退出程序')
        res = urllib2.urlopen(url, timeout=5)
        mat = re.compile(r'<h3 class="fl f18">(.*?)</h3>')
        self.title = re.findall(mat, res.read())[0]

        print('MV:%s' % self.title)

        downUrl = self.getMvDownloadUrl(id)
        self.downloadMv(downUrl)

    def getMvDownloadUrl(self, id):
        '''获取mv的下载地址 '''
        self.log.info('获取mv下载地址')
        url = 'http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id
        try:
            res = urllib2.urlopen(url, timeout=5)
        except:
            self.log.error('网页连接错误')
        mat = re.compile(
            r'http://h.?.yinyuetai.com/uploads/videos/common/.*?\.flv')
        urls = re.findall(mat, res.read())
        return urls[-1]

    def downloadMv(self, url):
        '''开始下载mv '''
        fileName = './' + self.title + '.mp4'
        res = urllib2.urlopen(url, timeout=5)
        self.log.info('开始下载MV %s' % fileName)
        rSize = int(dict(res.headers).get('content-length'))
        t1 = time.time()
        with open(fileName, 'wb') as fp:
            st = res.read(self.packageSize)
            offset = 0
            while st:
                fp.write(st)
                st = res.read(self.packageSize)
                offset += len(st)
                p = multiprocessing.Process(target=self.pLen,
                                            args=(
                                                fileName,
                                                offset,
                                                rSize,
                                            ))
                p.start()
        t2 = time.time()
        time.sleep(2)
        print(u'\n下载时间共%ds\n' % (t2 - t1))

    def pLen(self, fileName, offset, rSize):
        if offset < rSize:
            print('%s\t%dbytes/%dbytes\r' % (fileName, offset, rSize)),
            time.sleep(1)

    def tip(self):
        print('|' + '-' * 40)
        print('|' + u'这是一个下载音悦台MV的脚本')
        print('|' + '-' * 40)
示例#16
0
def testLog():
    mylog = MyLog()
    mylog.debug('it is debug')
    mylog.error("I'm error")
示例#17
0
#!/usr/bin/env python
#-*- coding:utf-8 -*-
from myLog import MyLog

if __name__ == "__main__":
    ml = MyLog()
    ml.debug('debug')
    ml.info('info')
    ml.warn('warn')
    ml.error('error')
    ml.critical('critical')
示例#18
0
class DownloadYinyuetaiMv(object):
	def __init__(self):
		clear()
		self.tip()
		self.log = MyLog()
		self.title = 'unknow'
		self.packageSize = 1024*1024
		self.mvPlayUrl = self.getMvPlayUrl()

	def getMvPlayUrl(self):
		'''获取音乐台mv的播放地址 '''
		self.log.info('获取mv的播放地址')
		self.mvPlayUrl = raw_input('输入音乐台中MV的播放地址\n如http://v.yinyuetai.com/video/615494:\n')
		self.checkMvPlayUrl(self.mvPlayUrl)
			

	def checkMvPlayUrl(self,url):
		'''检查输入的mv播放地址是否有效 '''
		self.log.info('检查mv播放地址')
		try:
			id = url.replace('http://v.yinyuetai.com/video/','')
			idNum = int(id)
		except ValueError:
			self.log.error('输入的mv播放地址有误,退出程序')
		res = urllib2.urlopen(url,timeout=5)
		mat = re.compile(r'<h3 class="fl f18">(.*?)</h3>')
		self.title = re.findall(mat,res.read())[0]

		print('MV:%s' %self.title)

		downUrl = self.getMvDownloadUrl(id)
		self.downloadMv(downUrl)

	def getMvDownloadUrl(self,id):
		'''获取mv的下载地址 '''
		self.log.info('获取mv下载地址')
		url = 'http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id
		try:
			res = urllib2.urlopen(url,timeout=5)
		except:
			self.log.error('网页连接错误')
		mat = re.compile(r'http://h.?.yinyuetai.com/uploads/videos/common/.*?\.flv')
		urls = re.findall(mat,res.read())
		return urls[-1]

	def downloadMv(self,url):
		'''开始下载mv '''
		fileName = './' + self.title + '.mp4'
		res = urllib2.urlopen(url,timeout=5)
		self.log.info('开始下载MV %s' %fileName)
		rSize = int(dict(res.headers).get('content-length'))
		t1 = time.time()
		with open(fileName,'wb') as fp:
			st = res.read(self.packageSize)
			offset = 0
			while st:
				fp.write(st)
				st = res.read(self.packageSize)
				offset += len(st)
				p = multiprocessing.Process(target=self.pLen,args=(fileName,offset,rSize,))
				p.start()
		t2 = time.time()
		time.sleep(2)
		print(u'\n下载时间共%ds\n' %(t2 - t1))

	def pLen(self,fileName,offset,rSize):
		if offset < rSize:
			print('%s\t%dbytes/%dbytes\r' %(fileName,offset,rSize)),
			time.sleep(1)

	def tip(self):
		print('|' + '-'*40)
		print('|' + u'这是一个下载音悦台MV的脚本')
		print('|' + '-'*40)
示例#19
0
from myLog import MyLog
if __name__ == '__main__':
    ml = MyLog()
    ml.debug("1'm a debug message")
    ml.info("I'm an info message")
    ml.warn("I'm a warn message")
    ml.error("I'm an error message")
    ml.critical("I'm a critical message")
示例#20
0
# coding:utf-8

from myLog import MyLog

mylog = MyLog()


def testMylog():
    try:
        a = 1 / 0
    except ZeroDivisionError as e:
        mylog.error("def testMylog " + str(e))


if __name__ == '__main__':
    testMylog()
示例#21
0
class WBSpider(object):
    '''
    属性:
        username:微博的用户名
        password:微博的密码
        driver:浏览器,默认是PhantomJS
    '''
    def __init__(self, username, password):
        self.log = MyLog()  #获得打印日志对象
        self.username = username
        self.password = password
        self.driver = webdriver.Chrome()
        self.driver.implicitly_wait(5)  #静静等待10s
        self.isLogin = 0
        self.uid = ""

    '''
    析构函数
    在销毁该类的实例的时候将浏览器关闭。
    '''

    def __del__(self):
        self.driver.close()  #关闭浏览器

    '''
    登录微博的函数
    登陆成功则属性isLogin为1,否则为0
    '''

    def loginWeibo(self):
        #输入用户名/密码登录
        self.driver.get("http://login.sina.com.cn/")
        self.driver.implicitly_wait(5)
        elem_user = self.driver.find_element_by_name("username")  #找到用户名输入框
        elem_user.send_keys(self.username)  #传送用户名
        #找到密码输入框
        elem_pwd = self.driver.find_element_by_name("password")
        elem_pwd.send_keys(self.password)  #传送密码
        try:
            time.sleep(5)
            elem_pwd.send_keys(Keys.RETURN)  #直接传送回车键
            time.sleep(2)
            self.log.info('登陆成功...')
            self.isLogin = 1  #是否登录的标志
        except:
            self.Log.error("Login Error")
            self.isLogin = 0  #是否登录的标志

    '''
    设置需要爬虫微博主的Uid
    '''

    def setUid(self, Uid):
        self.uid = Uid

    '''
    获取微博
    PageNum:输入爬取微博的页数
    返回:微博的列表
    '''

    def getWeibo(self, PageNum):
        total = PageNum
        #判断不成立的条件
        if self.isLogin == 0:
            self.log.error("没有登录微博!")
            return
        if self.uid == "":
            self.log.error("待爬取的微博主的uid为空,请设置!")
            return
        if PageNum < 0:
            self.log.error("页数设置不合法")
            return
        #开始爬取
        weiboList = []
        url = "http://weibo.com/" + self.uid
        self.driver.get(url)
        self.driver.implicitly_wait(5)
        #爬取名称
        self.log.debug("准备访问个人网站....." + str(url))
        self.log.info('个人详细信息')
        #用户id
        print(u'用户id: ' + self.uid)
        self.driver.implicitly_wait(5)
        #昵称
        str_name = self.driver.find_element_by_xpath(
            "//div[@class='pf_username']/h1")
        name = str_name.text  #str_name.text是unicode编码类型
        self.log.info("昵称:" + str(name))
        self.driver.implicitly_wait(5)
        try:
            while (1):
                #让selenium直接滚动到下一页,用来获取“下一页”按钮
                print("正在爬取第" + str(total - PageNum + 1) + "页")
                next_page = None
                try:
                    next_page = self.driver.find_element_by_link_text('下一页')
                except:
                    next_page = None
                Count = 0
                while (next_page is None):
                    try:
                        next_page = self.driver.find_element_by_link_text(
                            '下一页')
                    except:
                        next_page = None
                    Count = Count + 1
                    print(Count)
                    time.sleep(3)
                    self.driver.execute_script(
                        "window.scrollTo(0, document.body.scrollHeight);")
                    time.sleep(3)
                    if Count == 200:
                        break
                #获取微博元素
                weiboelem = self.driver.find_elements_by_xpath(
                    "//div[@action-type='feed_list_item']/div[@node-type='feed_content']/div[@class='WB_detail']/div[@node-type='feed_list_content']"
                )
                #将微博元素列表转换成字符串并加入到微博列表中
                for i in range(len(weiboelem)):
                    weiboList.append(weiboelem[i].text)
                #获得下一页按钮并点击,此处可能会出现加载不出来下一页按钮的异常
                if (next_page is None):
                    break
                if (PageNum == 0):
                    self.log.info("到达尾页")
                    break
                #下一页按钮被覆盖,不能clickable
                ActionChains(self.driver).move_to_element(next_page).click(
                    next_page).perform()
                next_page.click()
                Pagenum = Pagenum - 1
                self.driver.implicitly_wait(5)
        except:
            self.log.error("爬取异常")
        finally:
            return weiboList