Пример #1
0
    def getFtpFileMain(self):
        while True:
            ml = MyLog()
            scanSN = ScanSernum()
            SN = scanSN.scanMain()
            snlist = []
            if "/" in SN:	#Input file
                with open(SN,'r') as snfile:
                   for asn in snfile.readlines():
                       if "FDO" in asn:
                           snlist.append(asn.strip())
            else:
                snlist.append(SN)
            myarea = ScanAreaType()
            logarea = myarea.askArea()
            logtype = myarea.asklogType().lower()
            if "&" in logtype:
                logtype = "FDO"   #Copy mview and mtype
            else:
                logtype = logtype[1:]   #the logtype finally value is type or view
	    logList = []
            for sn in snlist:
	            pcsn = GetPCSN()
	            pcsntuple = pcsn.getMain(sn)
	            childsn = pcsntuple[0]
	            parentsn = pcsntuple[1]
	            self.chDir()
	            getlogpath = GetLogPath()
	            childFolderList = getlogpath.getMain(parentsn,childsn,logarea)
	            input_str = '>>>>Start find SN: %s/%s test log in ftp server'%(childsn, parentsn)
	            ml.info(input_str)
	            print UseStyle(input_str, fore='blue')
	            for childFolder in childFolderList:
	                if "KFCR" in childFolder:
	                    KFCRYear = int(childFolder.split('/')[1])
	                    KFCRWeek = int(childFolder.split('/')[3][2:])
	                    #Rename the KFCR log name during copying it to ftp server on 2018/3/2
	                    if KFCRYear <= 2018 and KFCRWeek < 10 or KFCRYear == 2017:
	                        logtype = "FDO"    #Both mtype and mview will in zip file, eg:FDO2114B0KD_1491454226.zip
	                myftp = LoginFTP()
	                ftp = myftp.ftpserver()
                        ftp.cwd('/')	#Enter the FTP top folder
	                ftp.cwd(self.baseFtpFolder)
	                try:
	                    ftp.cwd(childFolder)
	                    for fileList in ftp.nlst():
	                        if (childsn in fileList or parentsn in fileList) and logtype in fileList:
	                            input_str = '--->Copy file:%s to %s'%(fileList,os.getcwd())
	                            print UseStyle(input_str, fore='blue')
	                            ml.info(input_str)
				    if not os.path.lexists(fileList):    #Log already exist in /usr/auto/testlog
	                                mydlfile = DLFtpFile()
	                                mydlfile.downloadFile(ftp, fileList)
	                            logList.append(fileList)
	                except Exception, err:	#The script still running although met error
	                    outStr = r"!!!No file in C:/Backup/BU3/%s in FTP server, ERR:%s"%(childFolder,err)
	                    print UseStyle(outStr, fore='black', back='yellow')
	                    if len(childFolderList) == 1: break
	                finally:
                            pass
Пример #2
0
    def process_item(self, item, spider):
        m1 = MyLog()
        cityName = item['cityName'].encode('utf8')
        img = os.path.basename(item['img'])
        week = item['week'].encode('utf8')
        weather = item['weather'].encode('utf8')
        shidu = item['shidu'].encode('utf8')
        air = item['air'].encode('utf8')

        m1.info('进行mysql存储')
        
        conn = MySQLdb.connect(
            host='localhost',
            port=3306,
            user='******',
            password='******',
            db='scrapyDB',
            charset='utf8'
        )
        cur = conn.cursor()
        cur.execute("insert into weather(cityName,img,week,weather,shidu,air) values(%s,%s,%s,%s,%s,%s)", (cityName,img,week,weather,shidu,air))
        cur.close()
        conn.commit()
        conn.close()

        m1.info('mysql存储完成')
        return item
Пример #3
0
class TestTime(object):
    def __init__(self):
        self.log=MyLog()
        self.testTime()
        self.testLocaltime()
        self.testSleep()
        self.testStrftime()
    def testTime(self):
        self.log.info(u'开始测试time.time()函数')
        print(u'当前时间戳为:time.time()=%f'%time.time())
        print(u'这里返回的是一个浮点型的数值,它是从1970纪元后经过的浮点秒数')
        print('\n')

    def testLocaltime(self):
        self.log.info(u'开始测试time.localtime()函数')
        print(u'当前本地时间为:time.localtime()= %s'%str(time.localtime()))
        print(u'这里返回的是一个struct_time结构的元组')
        print('\n')

    def testSleep(self):
        self.log.info(u'开始测试time.sleep()函数')
        print(u'这是个计时器:time.sleep(5)')
        print(u'闭上眼睛数上5s就可以')
        time.sleep(5)
        print('\n')

    def testStrftime(self):
        self.log.info(u'开始测试time.strftime()函数')
        print(u'这个函数返回的是一个格式化的时间')
        print(u'time.strftime("%%Y-%%m-%%d %%X",time.localtime())= %s'%time.strftime("%Y-%m-%d %X",time.localtime()))
        print('\n')
Пример #4
0
class TestTime(object):
    def __init__(self):
        self.log = MyLog()
        self.testTime()
        self.testLocaltime()
        self.testSleep()
        self.testStrftime()

    def testTime(self):
        self.log.info(u'開始測試time.time()函數')
        print(u'現在時間戳為:time.time() = %f' % time.time())
        print(u'這裡返回的是一個浮點型的數值,它是從1970紀元後經過的浮點秒數')
        print('\n')

    def testLocaltime(self):
        self.log.info(u'開始測試time.localtime()函數')
        print(u'現在本地時間為:time.localtime() = %s' % time.localtime())
        print(u'這裡返回的是一個struct_time結構的元組')
        print('\n')

    def testSleep(self):
        self.log.info(u'開始測試time.sleep()函數')
        print(u'這是個計時器:time.sleep(5)')
        print(u'閉上眼睛數上5秒就可以了')
        time.sleep(5)
        print('\n')

    def testStrftime(self):
        self.log.info(u'開始測試time.strftime()函數')
        print(u'這個函數返回的是一個格式化的時間')
        print('time.strftime("%%Y-%%m-%%d %%X",time.localtime()) = %s' %
              time.strftime("%Y-%m-%d %X", time.localtime()))
        print('\n')
Пример #5
0
 def process_item(self, item, spider):
     m1 = MyLog()
     today = time.strftime('%Y%m%d', time.localtime())
     fileName = 'weather' + today + '.txt'
     m1.info('同步开始')
     with open(fileName, 'a') as fp:
         fp.write(item['cityName'].encode('utf-8') + '\t')
         fp.write(item['weather'].encode('utf-8') + '\t')
         imgName = os.path.basename(item['img'])
         fp.write(imgName + '\t')
         if os.path.exists(imgName):
             pass
         else:
             with open(imgName, 'wb') as fp:
                 response = urllib2.urlopen(item['img'])
                 fp.write(response.read())
         fp.write(item['shidu'].encode('utf-8') + '\t')
         fp.write(item['air'].encode('utf-8') + '\n\n')
         time.sleep(1)
     m1.info('同步结束')
     return item
Пример #6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from myLog import MyLog

if __name__ == '__main__':
    ml = MyLog()
    ml.debug("I am the debug message")
    ml.info("I am the info message")
    ml.warn("I am the warn message")
    ml.error("I am the error message")
    ml.critical("I am the critical message")
Пример #7
0
#!/usr/bin/env python
#-*- coding: utf-8 -*-
__author__ = 'hstking [email protected]'

from myLog import MyLog

if __name__ == '__main__':
    ml = MyLog()
    ml.debug('I am debug message')
    ml.info('I am info message')
    ml.warn('I am warn message')
    ml.error('I am error message')
    ml.critical('I am critical message')
Пример #8
0
class DownloadYinyuetaiMv(object):
    def __init__(self):
        clear()
        self.tip()
        self.log = MyLog()
        self.title = 'unknow'
        self.packageSize = 1024 * 1024
        self.mvPlayUrl = self.getMvPlayUrl()

    def getMvPlayUrl(self):
        '''获取音乐台mv的播放地址 '''
        self.log.info('获取mv的播放地址')
        self.mvPlayUrl = raw_input(
            '输入音乐台中MV的播放地址\n如http://v.yinyuetai.com/video/615494:\n')
        self.checkMvPlayUrl(self.mvPlayUrl)

    def checkMvPlayUrl(self, url):
        '''检查输入的mv播放地址是否有效 '''
        self.log.info('检查mv播放地址')
        try:
            id = url.replace('http://v.yinyuetai.com/video/', '')
            idNum = int(id)
        except ValueError:
            self.log.error('输入的mv播放地址有误,退出程序')
        res = urllib2.urlopen(url, timeout=5)
        mat = re.compile(r'<h3 class="fl f18">(.*?)</h3>')
        self.title = re.findall(mat, res.read())[0]

        print('MV:%s' % self.title)

        downUrl = self.getMvDownloadUrl(id)
        self.downloadMv(downUrl)

    def getMvDownloadUrl(self, id):
        '''获取mv的下载地址 '''
        self.log.info('获取mv下载地址')
        url = 'http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id
        try:
            res = urllib2.urlopen(url, timeout=5)
        except:
            self.log.error('网页连接错误')
        mat = re.compile(
            r'http://h.?.yinyuetai.com/uploads/videos/common/.*?\.flv')
        urls = re.findall(mat, res.read())
        return urls[-1]

    def downloadMv(self, url):
        '''开始下载mv '''
        fileName = './' + self.title + '.mp4'
        res = urllib2.urlopen(url, timeout=5)
        self.log.info('开始下载MV %s' % fileName)
        rSize = int(dict(res.headers).get('content-length'))
        t1 = time.time()
        with open(fileName, 'wb') as fp:
            st = res.read(self.packageSize)
            offset = 0
            while st:
                fp.write(st)
                st = res.read(self.packageSize)
                offset += len(st)
                p = multiprocessing.Process(target=self.pLen,
                                            args=(
                                                fileName,
                                                offset,
                                                rSize,
                                            ))
                p.start()
        t2 = time.time()
        time.sleep(2)
        print(u'\n下载时间共%ds\n' % (t2 - t1))

    def pLen(self, fileName, offset, rSize):
        if offset < rSize:
            print('%s\t%dbytes/%dbytes\r' % (fileName, offset, rSize)),
            time.sleep(1)

    def tip(self):
        print('|' + '-' * 40)
        print('|' + u'这是一个下载音悦台MV的脚本')
        print('|' + '-' * 40)
Пример #9
0
class DownloadYinyuetaiMv(object):
	def __init__(self):
		clear()
		self.tip()
		self.log = MyLog()
		self.title = 'unknow'
		self.packageSize = 1024*1024
		self.mvPlayUrl = self.getMvPlayUrl()

	def getMvPlayUrl(self):
		'''获取音乐台mv的播放地址 '''
		self.log.info('获取mv的播放地址')
		self.mvPlayUrl = raw_input('输入音乐台中MV的播放地址\n如http://v.yinyuetai.com/video/615494:\n')
		self.checkMvPlayUrl(self.mvPlayUrl)
			

	def checkMvPlayUrl(self,url):
		'''检查输入的mv播放地址是否有效 '''
		self.log.info('检查mv播放地址')
		try:
			id = url.replace('http://v.yinyuetai.com/video/','')
			idNum = int(id)
		except ValueError:
			self.log.error('输入的mv播放地址有误,退出程序')
		res = urllib2.urlopen(url,timeout=5)
		mat = re.compile(r'<h3 class="fl f18">(.*?)</h3>')
		self.title = re.findall(mat,res.read())[0]

		print('MV:%s' %self.title)

		downUrl = self.getMvDownloadUrl(id)
		self.downloadMv(downUrl)

	def getMvDownloadUrl(self,id):
		'''获取mv的下载地址 '''
		self.log.info('获取mv下载地址')
		url = 'http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id
		try:
			res = urllib2.urlopen(url,timeout=5)
		except:
			self.log.error('网页连接错误')
		mat = re.compile(r'http://h.?.yinyuetai.com/uploads/videos/common/.*?\.flv')
		urls = re.findall(mat,res.read())
		return urls[-1]

	def downloadMv(self,url):
		'''开始下载mv '''
		fileName = './' + self.title + '.mp4'
		res = urllib2.urlopen(url,timeout=5)
		self.log.info('开始下载MV %s' %fileName)
		rSize = int(dict(res.headers).get('content-length'))
		t1 = time.time()
		with open(fileName,'wb') as fp:
			st = res.read(self.packageSize)
			offset = 0
			while st:
				fp.write(st)
				st = res.read(self.packageSize)
				offset += len(st)
				p = multiprocessing.Process(target=self.pLen,args=(fileName,offset,rSize,))
				p.start()
		t2 = time.time()
		time.sleep(2)
		print(u'\n下载时间共%ds\n' %(t2 - t1))

	def pLen(self,fileName,offset,rSize):
		if offset < rSize:
			print('%s\t%dbytes/%dbytes\r' %(fileName,offset,rSize)),
			time.sleep(1)

	def tip(self):
		print('|' + '-'*40)
		print('|' + u'这是一个下载音悦台MV的脚本')
		print('|' + '-'*40)
Пример #10
0
from myLog import MyLog
if __name__ == '__main__':
    ml = MyLog()
    ml.debug("1'm a debug message")
    ml.info("I'm an info message")
    ml.warn("I'm a warn message")
    ml.error("I'm an error message")
    ml.critical("I'm a critical message")
Пример #11
0
class WBSpider(object):
    '''
    属性:
        username:微博的用户名
        password:微博的密码
        driver:浏览器,默认是PhantomJS
    '''
    def __init__(self, username, password):
        self.log = MyLog()  #获得打印日志对象
        self.username = username
        self.password = password
        self.driver = webdriver.Chrome()
        self.driver.implicitly_wait(5)  #静静等待10s
        self.isLogin = 0
        self.uid = ""

    '''
    析构函数
    在销毁该类的实例的时候将浏览器关闭。
    '''

    def __del__(self):
        self.driver.close()  #关闭浏览器

    '''
    登录微博的函数
    登陆成功则属性isLogin为1,否则为0
    '''

    def loginWeibo(self):
        #输入用户名/密码登录
        self.driver.get("http://login.sina.com.cn/")
        self.driver.implicitly_wait(5)
        elem_user = self.driver.find_element_by_name("username")  #找到用户名输入框
        elem_user.send_keys(self.username)  #传送用户名
        #找到密码输入框
        elem_pwd = self.driver.find_element_by_name("password")
        elem_pwd.send_keys(self.password)  #传送密码
        try:
            time.sleep(5)
            elem_pwd.send_keys(Keys.RETURN)  #直接传送回车键
            time.sleep(2)
            self.log.info('登陆成功...')
            self.isLogin = 1  #是否登录的标志
        except:
            self.Log.error("Login Error")
            self.isLogin = 0  #是否登录的标志

    '''
    设置需要爬虫微博主的Uid
    '''

    def setUid(self, Uid):
        self.uid = Uid

    '''
    获取微博
    PageNum:输入爬取微博的页数
    返回:微博的列表
    '''

    def getWeibo(self, PageNum):
        total = PageNum
        #判断不成立的条件
        if self.isLogin == 0:
            self.log.error("没有登录微博!")
            return
        if self.uid == "":
            self.log.error("待爬取的微博主的uid为空,请设置!")
            return
        if PageNum < 0:
            self.log.error("页数设置不合法")
            return
        #开始爬取
        weiboList = []
        url = "http://weibo.com/" + self.uid
        self.driver.get(url)
        self.driver.implicitly_wait(5)
        #爬取名称
        self.log.debug("准备访问个人网站....." + str(url))
        self.log.info('个人详细信息')
        #用户id
        print(u'用户id: ' + self.uid)
        self.driver.implicitly_wait(5)
        #昵称
        str_name = self.driver.find_element_by_xpath(
            "//div[@class='pf_username']/h1")
        name = str_name.text  #str_name.text是unicode编码类型
        self.log.info("昵称:" + str(name))
        self.driver.implicitly_wait(5)
        try:
            while (1):
                #让selenium直接滚动到下一页,用来获取“下一页”按钮
                print("正在爬取第" + str(total - PageNum + 1) + "页")
                next_page = None
                try:
                    next_page = self.driver.find_element_by_link_text('下一页')
                except:
                    next_page = None
                Count = 0
                while (next_page is None):
                    try:
                        next_page = self.driver.find_element_by_link_text(
                            '下一页')
                    except:
                        next_page = None
                    Count = Count + 1
                    print(Count)
                    time.sleep(3)
                    self.driver.execute_script(
                        "window.scrollTo(0, document.body.scrollHeight);")
                    time.sleep(3)
                    if Count == 200:
                        break
                #获取微博元素
                weiboelem = self.driver.find_elements_by_xpath(
                    "//div[@action-type='feed_list_item']/div[@node-type='feed_content']/div[@class='WB_detail']/div[@node-type='feed_list_content']"
                )
                #将微博元素列表转换成字符串并加入到微博列表中
                for i in range(len(weiboelem)):
                    weiboList.append(weiboelem[i].text)
                #获得下一页按钮并点击,此处可能会出现加载不出来下一页按钮的异常
                if (next_page is None):
                    break
                if (PageNum == 0):
                    self.log.info("到达尾页")
                    break
                #下一页按钮被覆盖,不能clickable
                ActionChains(self.driver).move_to_element(next_page).click(
                    next_page).perform()
                next_page.click()
                Pagenum = Pagenum - 1
                self.driver.implicitly_wait(5)
        except:
            self.log.error("爬取异常")
        finally:
            return weiboList
Пример #12
0
#!/usr/bin/env python
#-*- coding:utf-8 -*-
from myLog import MyLog

if __name__ == "__main__":
    ml = MyLog()
    ml.debug('debug')
    ml.info('info')
    ml.warn('warn')
    ml.error('error')
    ml.critical('critical')