示例#1
0
def initializeSpiders():
    homePageList = ['http://finans.mynet.com/borsa/hisseler/', 'http://finans.mynet.com/borsa/hisseler/c-e/',
                    'http://finans.mynet.com/borsa/hisseler/f-j/', 'http://finans.mynet.com/borsa/hisseler/k-q/',
                    'http://finans.mynet.com/borsa/hisseler/r-z/']

    for i in range(0,5):
        Spider(PROJECT_NAME,homePageList[i],DOMAIN_NAME)
示例#2
0
def main(currentTime, startTime, web, weblevel, app, applevel, device,
         devicelevel, sys, syslevel, method):
    start_time = time.time()
    print("程序运行中....")
    #新建一个爬虫对象
    spider = Spider.Spider()
    #web应用漏洞
    page = web
    level = weblevel
    type = 1
    spider = SpiderMain(currentTime,
                        startTime,
                        spider,
                        page,
                        level=level,
                        type=type,
                        method=method)

    #应用程序漏洞
    page = app
    level = applevel
    type = 2
    spider = SpiderMain(currentTime,
                        startTime,
                        spider,
                        page,
                        level=level,
                        type=type,
                        method=method)

    #网络设备漏洞
    page = device
    level = devicelevel
    type = 3
    spider = SpiderMain(currentTime,
                        startTime,
                        spider,
                        page,
                        level=level,
                        type=type,
                        method=method)

    #操作系统漏洞
    page = sys
    level = syslevel
    type = 4
    spider = SpiderMain(currentTime,
                        startTime,
                        spider,
                        page,
                        level=level,
                        type=type,
                        method=method)

    spider.save_doc(currentTime, startTime)
    end_time = time.time()
    print("总共花费了%s" % str((end_time - start_time) / 60) + "分钟!")
示例#3
0
文件: Main.py 项目: windform/Python
 def __init__(self):
     #,self.handle.application
     self.data = []
     httpd = make_server('', 1234, self.handle)
     print('Server HTTP on port 1234...')
     #Application类的实例化
     self.app = Application()
     #Spider类的实例化
     self.spider = Spider()
     httpd.serve_forever()
示例#4
0
 def __init__(self, dbUser, dbPassword, homeWebPageInfoFile):
     """constructor
     """
     self._init(homeWebPageInfoFile)
     self.spider = Spider.Spider()
     self.parser = Parser.Parser()
     self.conn = Connection(conf.dbHost,
                            conf.dbName,
                            user=dbUser,
                            password=dbPassword)  #db instance
示例#5
0
def startSpider():
    print('WhiteList spider started!', file=sys.stderr)
    try:
        daemonize(PIDFILE,
                  stdout='/tmp/spider-log.log',
                  stderr='/tmp/spider-err.log')
    except RuntimeError as e:
        print(e, file=sys.stderr)
        raise SystemExit(1)

    io = IO.IO()
    spider = Spider.Spider(io)
    spider.start()
示例#6
0
	def __init__(self, word):
		'''
		Constructor to crawl web for a word 
		'''
		self.word = word
		sp = Spider(word, spread=2, limit=0.01)
		self.web = sp.crawl('Graph.shelve')	# Crawled web
		self.graph = Shelveopen('Graph.shelve')

		self.paths = []	# To store all paths
		self.scores = []	# To store corresponding pathscores

		self.clientfeatures = []	# Feature vector for client
		self.standardfeatures = []	# To compare against
示例#7
0
def main():
    # parse arguments
    args = parse_args()
    if args is None:
        exit()

    resultsFilePath = args.results_path
    #resultsFile = open(resultsFilePath, 'w')

    regionUrl = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/index.html"
    regionUrlStarter = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/"
    spider = Spider(regionUrl, regionUrlStarter)
    spider.processData()
    print(spider.provinceList)
    province_dict = spider.provinceList[0]

    with open(resultsFilePath, "w") as f:
        json.dump(province_dict, f, ensure_ascii=False)
        print("加载入文件完成...")
示例#8
0
def worker(config, verbose=False):
    """
    worker - start the spider

    Args:
        config      - the Config object
        verbose     - should the verbosity logs should be printed

    Returns:
        None
    """
    # All spiders consume the global common queue of tasks
    spider = Spider.Spider(config.max_depth,
                           config.crawl_interval,
                           config.crawl_timeout,
                           config.target_url,
                           config.output_directory,
                           tasks,
                           verbose)
    spider.start()
示例#9
0
def buildDataSet():

    books = open("data/item/book.txt", "r")
    bookList = []
    for book in books:
        bookList.append(book.rstrip())

    movies = open("data/item/movie.txt", "r")
    movieList = []
    for movie in movies:
        movieList.append(movie.rstrip())

    songs = open("data/item/music.txt", "r")
    songList = []
    for song in songs:
        songList.append(song.rstrip())

    spider = Spider.Spider()

    for item in bookList:
        urls = search(item + " book", 'com', 'en', '0', 'off', 1, 0, 11,
                      random.uniform(15.0, 45.0), True, {}, '')
        for url in urls:
            if (type(url) is str):
                print(spider.fetch(url, "book", item))

    for item in movieList:
        for url in search(item + " movie", 'com', 'en', '0', 'off', 1, 0, 11,
                          random.uniform(15.0, 45.0), True, {}, ''):
            if type(url) is str:
                print(spider.fetch(url, "movie", item))

    for item in songList:
        for url in search(item + " song", 'com', 'en', '0', 'off', 1, 0, 11,
                          random.uniform(15.0, 45.0), True, {}, ''):
            if type(url) is str:
                print(spider.fetch(url, "song", item))

    books.close()
    movies.close()
    songs.close()
# list for seed URLs that user inputs
usrInput = input("Enter seed URL or press ENTER to continue: ")

while (usrInput != "" and usrInput != " "):
    check = urllib.parse.urlparse(usrInput)
    if check.netloc != '' and check.scheme != '':
        seed = [0, usrInput]
        seeds.append(seed)
    else:
        print('Invalid URL')
    usrInput = input("Enter seed URL or press ENTER to continue: ")

keyword = input("Enter keyword to search for: ").lower()

spider = Spider.Spider(
    seeds, maxDepth, keyword
)  # Create spider object initialised with the users seed urls and keyword
spider.crawl()  # Start crawling process

results = spider.results  # Get list of URL results from the Spider
adjList = spider.adjacencyList
c = 0.15  # This is the damping factor
pArray = [[0 for col in range(len(adjList))]
          for row in range(len(adjList))]  # Stores the page ranks matrix
vArray = [1 / len(adjList) for col in range(len(adjList))]  # PageRank Vector
temp = [0 for col in range(len(adjList))]
rankRes = [
]  # Store the results hyperlinks and corresponding page rank centrality

# Calculate PageRank Matrix
for i in range(len(adjList)):
示例#11
0
# -*- coding: gb18030 -*-
import Spider
import sys
import time
import random
import threading

s = Spider.Spider()
# 日期
# year=int(sys.argv[1])
# month=int(sys.argv[2])
# day=int(sys.argv[3])
year = 2012
month = 12
day = 3
delta = s.timeDelta(year, month)


# 一个月一个月的抓取
def get(year, month, day):
    # 日期
    date = s.handleDate(year, month, day)
    # 页数
    try:
        allNum = s.getAllNum(date)
    except Exception as e:
        print e
    while allNum == 200:
        print ("I suspect there is not as many as 200 pages in one day. Let's try again!")
        time.sleep(random.random())
        allNum = s.getAllNum(date)
示例#12
0
文件: main.py 项目: 936968629/crawl
import Spider
spi = Spider.Spider()
spi.getCont()
示例#13
0
文件: Re.py 项目: tumitumi/Spider
 def __init__(self, url, page=1):
     self.Spider = Spider.Spider()
     self.url = url
     self.Filename = "./Page.htm"
     self.page = page
示例#14
0
"""
author: Yann Liu
target: scrawl the weather data from http://www.tianqihoubao.com/
usage:
    host: the ip of mysql, default 127.0.0.1
    username: the user of mysql, default root
    password:the password of mysql
    db: database that store weather data
    start_time: when to spider, default Jan,2015
    end_time: when to end, default Dec,2019
"""
from Spider import *

if __name__ == '__main__':
    spider = Spider(host='127.0.0.1',
                    username='******',
                    password='******',
                    db='weather_data',
                    start_time='201501',
                    end_time='201912')
    spider.run()
示例#15
0
spiderCnf['path'] = site.path
spiderCnf['maxTimeCount'] = 30
spiderCnf['webScanTimeout'] = task.web_scan_timeout
spiderCnf['endTime'] = time.time() + 1800
spiderCnf['maxnum'] = task.spider_url_count
spiderCnf['title'] = site.title
spiderCnf['ip'] = site.ip
spiderCnf['cookie'] = cookie
spiderCnf['webSearchSiteState'] = task.web_search_site_state
spiderCnf['webSearchSiteTimeout'] = task.web_search_site_timeout
spiderCnf['includeUrl'] = site.include_url
spiderCnf['excludeUrl'] = site.exclude_url
spiderCnf['downloadDir'] = SCANER_SPIDER_DOWNLOAD_DIR
#import plugins.lib.common
#argv['rec'] = plugins.lib.common.request_exception_counter(200)
spiderCnf['rec'] = None

# import Spider2 as Spider
import Spider
'''
if task.spider_type == 2:
    import Spider2 as Spider
else:
    import Spider
'''
spider = Spider.Spider(spiderCnf)
spider.start()

# url="http://192.168.5.135:8503/vulnerabilities/sqli/"
# spider.startTester(url)
示例#16
0
 def newSpider(self, env="", limit=""):
     return Spider.Spider(env, limit=limit)
示例#17
0
import Spider

s = Spider.Spider(666)
s.crawl()
示例#18
0
def main():
    start_url = argv[1]
    spider = Spider(levels=2)
    spider.build_web(start_url)
    spider.save()
示例#19
0
import Spider
import csv
from bs4 import BeautifulSoup
spider = Spider.Spider()
rows = []
for line in open('url.txt','r'):
    rows.append(line.rstrip('\n'))
for row in rows:
    soup = spider.getSoup(row)
    items = spider.getContent(soup)
    spider.createPersonCSV(items)
# soup = spider.getSoup('https://baike.baidu.com/item/%EF%BB%BF%E8%94%A1%E6%98%89/1020848')
# items = spider.getContent(soup)
# spider.printContent(items)
示例#20
0
class Engine:

    # 最大关键词长度
    MAXKEYWORDLEN = 16

    #映射表
    targetMap = {}

    #目标url
    targetUrl = "http://www.csdn.net"

    #目标深度
    targetDepth = 2

    #爬虫
    spider = Spider.Spider(targetUrl, targetDepth)

    #文件分析
    htmlIndexer = FileAnalyzer.HtmlIndexer()

    #倒排索引表建立
    mapBuilder = MapBuilder.MapBuilder()

    #匹配摘要前后的文字正则
    #briefPat = u"[\u4e00-\u9fa5]{"
    #maxBrief = 40
    briefPat = u"[\u4e00-\u9fa5]{0,40}"

    def __init__(self):
        #抓取
        print "fetching......"
        for i in range(1, self.targetDepth + 1):
            self.spider.visitCurrent()
            print "depth: ", i, '/', self.spider.maxDepth, " done"
        #建立索引文件
        print "indexing......"
        self.htmlIndexer.getHtml()
        self.htmlIndexer.startIndex()
        #获取倒排索引表
        print "mapping"
        self.targetMap = self.mapBuilder.getMap()

    def __getUrlAndWeight(self, word):
        res = []
        if (word in self.targetMap):
            res = self.targetMap[word]
        return res

    def __mergeUrlAndWeight(self, result):
        ans = []
        while 0 != len(result):
            temp = result[0]
            result.remove(temp)
            i = 0
            while i >= 0 and i < len(result):
                if (result[i][0] == temp[0]):
                    temp[1] += result[i][1]
                    result.remove(result[i])
                    i = i - 1
                i = i + 1
            ans.append(temp)
        return ans

    def __getBrief(self, targetWord, targetResult):
        resList = []
        for res in targetResult:
            try:
                filename = self.spider.path + res[0].replace(
                    '/', '_') + self.spider.HTMLEXT
                file = codecs.open(filename, "r", "UTF-8")
                content = file.read()
                '''length = self.maxBrief
                brief = ""
                while(length > 0):
                    brief = re.search(self.briefPat + str(length) + u'}' + targetWord + self.briefPat + str(length) + ur'}', content)
                    if (brief):
                        break
                    length -= 1'''
                brief = re.search(self.briefPat + targetWord + self.briefPat,
                                  content)
                if (brief):
                    string = brief.group()
                    res.append(string)
                    res.append(len(string.split(targetWord)[0]))
                    res.append(res[len(res) - 1] + len(targetWord) - 1)
                    resList.append(res)

                file.close()
            except:
                None
        return resList

    def getResult(self, targetWord):

        #截取关键词
        targetWord = targetWord.decode('utf-8')
        if (len(targetWord) > self.MAXKEYWORDLEN):
            targetWord = targetWord[0:self.MAXKEYWORDLEN]

        result = []
        #将搜索词作为关键字查找
        #targetWord = targetWord.decode('utf-8')
        #tempResult = self.__getUrlAndWeight(targetWord)
        #tempResult = self.__getBrief(targetWord, tempResult)
        #result += tempResult
        #将分词的结果作为关键字
        #targetSplit = Analyzer.getChiSegList(targetWord, self.htmlIndexer.chiStopWordsList)

        #chiTargetSplit =
        #engTargetSplit =

        targetSplit = Analyzer.getChiSegList(
            Analyzer.getAllChiInStr(targetWord),
            self.htmlIndexer.chiStopWordsList) + Analyzer.getEngSegList(
                Analyzer.getAllEngInStr(targetWord),
                self.htmlIndexer.engStopWordsList)

        for word in targetSplit:
            tempResult = self.__getUrlAndWeight(word)
            tempResult = self.__getBrief(word, tempResult)
            result += tempResult
        #将url结果相同的条目合并
        mergedRes = self.__mergeUrlAndWeight(result)
        #将结果按照权重排序
        mergedRes.sort(key=lambda uaw: uaw[1], reverse=True)
        '''for res in mergedRes:
            if(len(res) >= 3):
                mergedRes.remove(res)

        result = []'''
        for i in mergedRes:
            i[0] = 'http://' + i[0]
        return mergedRes

    def startSearch(self):
        while (1):
            print "请输入关键字############################################"
            key = raw_input()
            #key = key.decode('utf-8')

            result = self.getResult(key)

            writer = HtmlWriter.HtmlWriter()

            writer.write(result)

            for urlAndWeight in result:
                print urlAndWeight[0], urlAndWeight[1], urlAndWeight[2]
示例#21
0
文件: main.py 项目: phend1/WebCrawler
# Python Web Crawler Tutorial - 17 - Running the Final Program - https://www.youtube.com/watch?v=ciwWSedS1XY&t=331s

import threading
from queue import Queue
from domain import *
from general import *
from Spider import *

PROJECT_NAME = 'thenewboston'
HOMEPAGE = 'https://thenewboston.com/'
DOMAIN_NAME = get_domain_name(HOMEPAGE)
QUEUE_FILE = PROJECT_NAME + '/' + PROJECT_NAME + '_queue_url.txt'
CRAWLED_FILE = PROJECT_NAME + '/' + PROJECT_NAME + '_crawled_url.txt'
NUMBER_OF_THREADS = 8  # There are a lot of factors for this one
thread_queue = Queue()
Spider(PROJECT_NAME, HOMEPAGE, DOMAIN_NAME)


# Create Worker Threads
# Threads will die when main exit
def create_spiders():
    # just loop 8 time, using _ to disregards the values
    for _ in range(NUMBER_OF_THREADS):
        thread = threading.Thread(target=crawl)
        thread.daemon = True
        thread.start()


# Execute next in the queue
def crawl():
    while True:
示例#22
0
#!/usr/bin/env python
# coding: utf-8

# In[11]:

from Spider import *
from Domain import *
from Queue import *

# In[13]:

project_name = "Crawler"
home_page = "https://www.cognizant.com/"
domain_name = get_domain(home_page)
spider = Spider(project_name, home_page, domain_name)
spider.find_link(home_page)
spider.to_crawl()
示例#23
0
import Spider
import config
import load_data
import workflow
import urlGenerator
import io
import locateCareerPage

spidy = Spider.Spider()
spidy.crawl(config.employer_links)
spidy.get_job_description_all()

db = workflow.connect_to_database('sqlite:///data.db')
table_clean = workflow.create_table_clean(db)
table_raw= workflow.create_table_raw(db)

#workflow.update_table_clean(table_clean)

load_data.save_list_to_file(spidy.url_list, "clean_job_urls.txt")

db_company = workflow.connect_to_database('sqlite:///data_company_name.db')
table_companies = db_company['companies']
company_names = workflow.get_column_from_table(table_companies, 'name', True)

data = []
f = open('site_blacklist.csv','r')
data=f.read()
f.close()

blacklist_company_names = []
blacklist_company_url = []
示例#24
0
文件: test.py 项目: BlitzTeam/Arachne
def testWalk():
    legs = configLegs(connexion=Connexion())
    spider = Spider(legs)
    spider.move()
示例#25
0
# encoding:utf-8
import Spider
import time
import threading
from ProxyPool import app
from Config import parse
from Spider import Spider

# parse为从config中读取出的参数
host = parse['pool']['host']
port = parse['pool']['port']
database_name = parse['database']['database_name']
sleep_time = int(parse['spider']['sleep_time'])
s = Spider(database_name)


# 定时获取
def get_ip():
    while True:
        s()
        time.sleep(sleep_time)


thread = threading.Thread(target=get_ip())
app.run(host=host, port=port)
示例#26
0
from Spider import *
from Download import *

if __name__ == "__main__":
    spider = Spider()
    manager = DownloadManager(spider.getImagePaths())
示例#27
0
 def getOriginalPrice(self, url):
     spider = Spider.Spider(url)
     return Spider.searchOriginalPrice(spider.getHtmltree())
示例#28
0
                           motor_id[6], motor_id[7], motor_id[8]))
                    print("\t\t  |_____________|")
                    print("\t\t\t %d" % motor_id[9])
                    print("\t\t\t %d" % motor_id[10])
                    print("\t\t\t %d" % motor_id[11])

            elif line == "quit" or line == "":
                pass
            else:
                print("Unknown command")


if __name__ == "__main__":
    spider = None
    ctrl = dyn.create_controller(verbose=False,
                                 timeout=0.5,
                                 motor_range=[0, 20])
    spider = Spider(configLegs(ctrl.motors, simulator=False))

    gamepadThread = GamepadHandler(spider)
    gamepadThread.daemon = True
    gamepadThread.start()

    terminalThread = TerminalThread(spider)
    terminalThread.daemon = True
    terminalThread.start()

    while True:
        time.sleep(1.0)
        spider.move(startNow=False)
示例#29
0
    def searchOrderGoods(self):
        #没有分包的包裹路径
        basepath = '//*[@id="normalorder"]//*[@class="merch_bord"]//table[@class="tabl_merch"]'

        #有分包的包裹中每个包件的路径
        subpath = '//*[@id="normalorder"]//*[@class="merch_bord"]//*[@class="sort_package_list"]/table'
        packages = self._htmltree.xpath(subpath)
        if len(packages) > 0:
            basepath = subpath

        #todo: #每个订单可能有若干个包裹,每个包裹可能有若干个分包, 当前只考虑了其中一种情况

        books = self._htmltree.xpath(
            basepath + '//*[@class="tab_w1"]/*[@name="productname"]')
        titles = self._htmltree.xpath(
            basepath + '//*[@class="tab_w1"]/*[@name="productname"]/@title')
        hrefs = self._htmltree.xpath(
            basepath + '//*[@class="tab_w1"]/*[@name="productname"]/@href')
        prices = self._htmltree.xpath(basepath + '//*[@class="tab_w3"]')
        bonuses = self._htmltree.xpath(basepath + '//*[@class="tab_w2"]')
        amounts = self._htmltree.xpath(basepath + '//*[@class="tab_w6"]')
        sums = self._htmltree.xpath(basepath + '//*[@class="tab_w4"]')

        #换购商品或分册信息
        subbooks = self._htmltree.xpath(
            basepath + '//*[@class="tab_w1"]/*[@class="present"]')

        ordernr = self._htmltree.xpath(
            '//*[@id="normalorder"]//div[@id="divorderhead"][@class="order_news"]/p/text()'
        )
        parcel = self._htmltree.xpath(
            '//*[@id="normalorder"]//div[@class="business_package"]')
        ordertime = self._htmltree.xpath(
            '//*[@id="normalorder"]//div[@id="divorderhead"][@class="order_news"]//span[@class="order_news_hint"]/span'
        )
        others = self._htmltree.xpath(
            '//*[@id="normalorder"]//div[@class="ditail_frame_notop"]/table[@class="tabl_other"]'
        )
        endprice = self._htmltree.xpath(
            '//*[@id="normalorder"]//div[@class="price_total"]/span[1]')
        payment = self._htmltree.xpath(
            '//*[@id="normalorder"]//*[@class="order_detail_frame"]/ul[position()=4]/li'
        )

        #国内物流信息
        logispath = '//*[@id="normalorder"]//p[@class="p_space"]'
        logiscompany = self._htmltree.xpath(logispath + '/span[4]/span')
        logisnr = self._htmltree.xpath(logispath + '/span[7]/span')

        cncompany = ""
        if (logiscompany):
            cncompany = logiscompany[0].text
        cnnr = ""
        if (logisnr):
            cnnr = logisnr[0].text

        #国际物流信息
        header = ""
        consignee = self._htmltree.xpath('//*[@id="label_name"]')[0].text
        for code, (en, cn, pattern) in get_transports_info().items():
            if re.match(pattern, consignee):
                header += u"【" + cn + u"】"
                break

        #采购账号
        for code, pattern in get_ddusers_info().items():
            if re.match(pattern, consignee):
                header += u"【" + code + u"】"
                break

        wb = openpyxl.Workbook()
        ws = wb.active
        j = 0
        for i, book in enumerate(books):
            #预售商品
            res = book.xpath('../span[@class="c_red"]')
            if len(res) != 0:  #是预售
                ws.cell(row=i + j + 1, column=1,
                        value='[YS] ' + titles[i]).hyperlink = hrefs[i]
            else:
                ws.cell(row=i + j + 1, column=1,
                        value=titles[i]).hyperlink = hrefs[i]

            if len(prices[i].xpath('./text()')) != 0:  #没有折扣的情况,比如订单35737447378
                ws.cell(row=i + j + 1, column=2, value=prices[i].text)
            else:
                res = prices[i].xpath('./span')
                ws.cell(row=i + j + 1, column=2, value=res[0].text)

            ws.cell(row=i + j + 1, column=3, value=bonuses[i].text)
            ws.cell(row=i + j + 1, column=4, value=amounts[i].text)

            #小计以数字形式保存
            sum = re.findall('\d+.\d+', sums[i].text)[0]
            ws.cell(row=i + j + 1, column=5, value=sum)

            #当当编号
            sn = Spider.split_ddsn(hrefs[i])

            #团购所需图书信息
            #团购表和采购表从第7行开始有区别
            spider = Spider.Spider(hrefs[i])
            if self._tuan:
                titlesn = ws.cell(row=i + j + 1,
                                  column=1).value + ' [' + sn + ']'
                ws.cell(row=i + j + 1, column=1, value=titlesn)
                ws.cell(row=i + j + 1,
                        column=7,
                        value=self.getOriginalPrice(hrefs[i]))
                ws.cell(row=i + j + 1, column=8, value=sn)

                ws.cell(row=i + j + 1, column=9, value=spider.searchISBN())
                ws.cell(row=i + j + 1, column=10, value=spider.searchPress())

                adress = spider.searchSmallAndBigPicture()
                if adress:
                    ws.cell(row=i + j + 1, column=11, value=adress[0])
                    ws.cell(row=i + j + 1, column=12, value=adress[1])
            else:
                ws.cell(row=i + j + 1, column=7, value=sn)
                ws.cell(row=i + j + 1, column=8, value=spider.searchISBN())

            #换购商品或分册信息
            res = books[i].xpath('../br')
            subbook = books[i].xpath('../span[@class="present"]')
            for s, elem in enumerate(subbook):
                j += 1
                hgtitle = elem.xpath('../a/@title')
                hghref = elem.xpath('../a/@href')
                hgprice = prices[i].xpath('./span/text()')
                hgamount = amounts[i].xpath('./text()')
                hgsum = sums[i].xpath('./text()')

                stext = elem.xpath('./text()')
                if re.match(u'.*换购', stext[0]):  #有换购
                    ws.cell(row=i + j + 1,
                            column=1,
                            value='[HG] ' +
                            hgtitle[1 + s]).hyperlink = hghref[1 + s]
                else:  #有分册
                    ws.cell(row=i + j + 1,
                            column=1,
                            value='[FC] ' +
                            hgtitle[1 + s]).hyperlink = hghref[1 + s]

                ws.cell(row=i + j + 1, column=2, value=hgprice[s])
                if amounts[i].text:
                    ws.cell(row=i + j + 1, column=4, value=hgamount[1 + s])
                else:
                    ws.cell(row=i + j + 1, column=4, value=hgamount[s])
                ws.cell(row=i + j + 1,
                        column=5,
                        value=re.findall('\d+.\d+', hgsum[1 + s])[0])
                ws.cell(row=i + j + 1,
                        column=7,
                        value=Spider.split_ddsn(hghref[1 + s]))

        lastrow = len(books) + len(subbooks)

        if len(ordernr) != 0:  #普通订单不分包裹
            #订单号,下单时间,付款方式,快递单号等
            nr = ''
            for n in ordernr:
                if n.strip() != '':
                    nr = n.strip()
                    break
            if len(ordertime) == 0:
                ws.cell(row=lastrow + 1,
                        column=1,
                        value=header + nr + payment[0].text + cncompany + cnnr)
            elif len(ordertime) == 1:
                ws.cell(row=lastrow + 1,
                        column=1,
                        value=header + nr + ordertime[0].text +
                        payment[0].text + cncompany + cnnr)
            elif len(ordertime) == 2:
                ws.cell(row=lastrow + 1,
                        column=1,
                        value=header + nr + ordertime[0].text +
                        ordertime[1].text + payment[0].text + cncompany + cnnr)
            #最终价
            if (endprice[0].text.find(u'\xa5')) >= 0:  #包含¥符号
                ws.cell(row=lastrow + 1,
                        column=6,
                        value=endprice[0].text.replace(u'\xa5', u''))
            else:
                ws.cell(row=lastrow + 1, column=6, value=endprice[0].text)
            #优惠
            bonus = others[0].xpath('.//span')
            for i, elem in enumerate(bonus):
                if i == 0:
                    if (bonus[0].text.find(u'\xa5')) >= 0:  #包含¥符号
                        ws.cell(row=lastrow + 1,
                                column=5,
                                value=bonus[0].text.replace(u'\xa5', u''))
                    else:
                        ws.cell(row=lastrow + 1, column=5, value=bonus[0].text)
                else:
                    ws.cell(row=lastrow + 1 + i - 1,
                            column=3,
                            value=bonus[i].text)
        else:  #分包裹
            for i, elem in enumerate(parcel):
                note = elem.xpath(
                    './/span[@class="business_package_bg"]/b/text()')
                nr = elem.xpath(
                    './/span[@class="business_package_bg"]/text()[1]')
                time = elem.xpath(
                    './/span[@class="business_package_bg"]//span[@class="t_time_n"]'
                )
                if len(logiscompany) >= i + 1:
                    ws.cell(row=lastrow + 1 + i,
                            column=1,
                            value=header + note[0] + nr[0] + time[0].text +
                            payment[0].text + logiscompany[i].text +
                            logisnr[i].text)
                else:
                    ws.cell(row=lastrow + 1 + i,
                            column=1,
                            value=header + note[0] + nr[0] + time[0].text +
                            payment[0].text)
                ws.cell(row=lastrow + 1 + i, column=6, value=endprice[i].text)
                bonus = others[i].xpath('.//span')
                ws.cell(row=lastrow + 1 + i, column=5, value=bonus[0].text)

        wb.save(get_excel_name())