class NisbetProduct(QtCore.QThread): scrapProductData = QtCore.pyqtSignal(object) stopThread = QtCore.pyqtSignal(int) def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.totalProducts = 0 self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('nisbets.csv', 0) self.csvWriter = Csv('nisbets.csv') self.mainUrl = 'http://www.nisbets.co.uk' csvHeaderList = ['URL', 'Product Code', 'Product Technical Specifications', 'Product Name', 'Brand', 'Product Price', 'Product Short Description', 'Product Long Description', 'Image File Name', 'User Manual File Name', 'Exploded View File Name', 'Spares Code', 'Accessories', 'Product Status' 'Category1', 'Category2', 'Category3', 'Category4'] if 'URL' not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList[0]) self.utils = Utils() def run(self): self.scrapData() def stop(self): self.isExiting = True def scrapData(self): if self.isExiting: return self.scrapProductData.emit('<font color=green><b>Main URL: </b>%s</font>' % self.mainUrl) self.logger.debug('===== URL [' + self.mainUrl + '] =====') data = self.spider.fetchData(self.mainUrl) if data and len(str(data).strip()) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category1Chunk = self.regex.getAllSearchedData('(?i)<li id="li-id-\d+">(.*?)</ul> </li>', data) if category1Chunk and len(str(category1Chunk).strip()) > 0: i = 0 for category1Data in category1Chunk: category1 = self.regex.getSearchedData('(?i)<a href="[^"]*">([^<]*)</a>', category1Data) category2Chunk = self.regex.getAllSearchedData('(?i)<li><a href="([^"]*)">([^<]*)</a>', category1Data) if category2Chunk and len(str(category2Chunk).strip()) > 0: for category2Data in category2Chunk: try: self.scrapCategory2Data(self.mainUrl + category2Data[0], category1, category2Data[1]) except Exception, x: self.logger.error(x) self.scrapProductData.emit('<font color=red><b>Finish Scraping Product data from %s</b></font>' % self.mainUrl)
class CsBrands(QThread): notifyBrand = pyqtSignal(object) def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow("cs_Brands.csv") self.csvWriter = Csv("cs_Brands.csv") self.mainUrl = "http://www.cs-catering-equipment.co.uk/brands" self.isExiting = False headerData = [ "URL", "Parent Category", "Brand Category", "Brand Description", "Image File", "Product Codes in this category", ] if headerData not in self.dupCsvRows: self.csvWriter.writeCsvRow(headerData) def run(self): self.scrapBrands() self.notifyBrand.emit("<font color=red><b>Finished Scraping All Brands.</b></font>") def scrapBrands(self): self.notifyBrand.emit("<font color=green><b>Main URL: %s<b></font>" % self.mainUrl) self.notifyBrand.emit("<b>Try To scrap All Brands.<b>") data = self.spider.fetchData(self.mainUrl) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) brandChunks = self.regex.getAllSearchedData('(?i)<div class="man-group man-group-[a-z]">(.*?)</div>', data) if brandChunks and len(brandChunks) > 0: for brandChunk in brandChunks: brands = self.regex.getAllSearchedData('(?i)<a href="([^"]*)"[^>]*?>([^<]*)</a>', brandChunk) self.notifyBrand.emit("<b>Total Brands Found: %s<b>" % str(len(brands))) if brands and len(brands) > 0: for brand in brands: try: self.scrapBrandInfo(brand[0], "Shop By Brand", brand[1]) except Exception, x: self.logger.error(x)
class CsTest(QThread): notifyProduct = pyqtSignal(object) def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows0 = dupCsvReader.readCsvRow('cs_product.csv', 0) self.dupCsvRows = dupCsvReader.readCsvRow('cs_product.csv', 1) self.csvWriter = Csv('cs_product.csv') # self.mainUrl = 'http://www.cs-catering-equipment.co.uk/' self.mainUrl = 'http://www.cs-catering-equipment.co.uk/brands' self.utils = Utils() if 'Product Code' not in self.dupCsvRows: self.csvWriter.writeCsvRow( ['URL', 'Product Code', 'Product Name', 'Manufacturer', 'List Price', 'Product Price', 'Discount', 'Product Short Description', 'Product Long Description', 'Product Technical Specifications', 'Warranty' , 'Delivery', 'Product Image', 'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Brand Image']) self.totalProducts = len(self.dupCsvRows) def run(self): self.scrapBrands() self.notifyProduct.emit('<font color=red><b>Finished Scraping All Brands.</b></font>') def scrapBrands(self): self.notifyProduct.emit('<font color=green><b>Main URL: %s<b></font>' % self.mainUrl) self.notifyProduct.emit('<b>Try To scrap All Brands.<b>') data = self.spider.fetchData(self.mainUrl) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) brandChunks = self.regex.getAllSearchedData('(?i)<div class="man-group man-group-[a-z]">(.*?)</div>', data) if brandChunks and len(brandChunks) > 0: for brandChunk in brandChunks: brands = self.regex.getAllSearchedData('(?i)<a href="([^"]*)"[^>]*?>([^<]*)</a>', brandChunk) self.notifyProduct.emit('<b>Total Brands Found: %s<b>' % str(len(brands))) if brands and len(brands) > 0: for brand in brands: try: self.scrapBrandInfo(brand[0], 'Shop By Brand', brand[1]) except Exception, x: self.logger.error(x)
class MyLinkedInMembers(QThread): notifyLinkedIn = pyqtSignal(object) notifyMembers = pyqtSignal(object) cookieL = pyqtSignal(object) def __init__(self, spider, url, pageRange=None): QThread.__init__(self) # self.spider = Spider() self.spider = spider self.regex = Regex() self.url = url self.startPage = None self.endPage = None if self.regex.isFoundPattern('(?i)(\d+)-(\d+)', str(pageRange).strip()): pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)-(\d+)', str(pageRange).strip()) self.startPage = int(pageRangeFormat.group(1)) self.endPage = int(pageRangeFormat.group(2)) elif self.regex.isFoundPattern('(?i)(\d+)', str(pageRange).strip()): pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)', str(pageRange).strip()) self.startPage = int(pageRangeFormat.group(1)) self.endPage = self.startPage def run(self): self.getMembers(self.url) self.notifyLinkedIn.emit('<font color=red><b>Finish scraping members.<b></font>') def getMembers(self, url, pageNumber=0): print 'Members URL: ' + url self.notifyLinkedIn.emit('<font color=green><b>Start Scraping All Members.<b></font>') self.notifyLinkedIn.emit('<b>Wait For 15 seconds Break...<b>') time.sleep(15) self.notifyLinkedIn.emit('<b>15 seconds Break Finish.<b>') groupData = self.spider.fetchData(str(url).replace('&', '&')) groupData = self.regex.reduceNewLine(groupData) groupData = self.regex.reduceBlankSpace(groupData) print groupData print 'page number: ' + str(pageNumber) if pageNumber > 0: harvestedMembers = [] allMembers = self.regex.getAllSearchedData('(?i)<li class="member" id="member-[^"]*"[^>]*?>(.*?)</div>', groupData) for members in allMembers: memberId = self.regex.getSearchedData('(?i)data-li-memberId="([^"]*)"', members) memberName = self.regex.getSearchedData('(?i)data-li-fullName="([^"]*)"', members) memberTitle = self.regex.getSearchedData('(?i)<p class="headline">([^<]*?)</p>', members) memberTitle = self.regex.replaceData('(?i)&', '&', memberTitle) harvestedMembers.append((memberId, memberName, memberTitle)) self.notifyLinkedIn.emit('<b>Member ID: </b>%s <b>Member Name: </b>%s' % (memberId, memberName + ' (' + memberTitle + ')')) # members = self.regex.getAllSearchedData( # '(?i)class="send-message" data-li-memberId="([^"]*)" data-li-fullName="([^"]*)"', groupData) # print members self.notifyMembers.emit(harvestedMembers) # for member in members: # print member # self.notifyLinkedIn.emit('<b>Member Name: </b>%s <b>Member ID: </b>%s' % (member[1], member[0])) urlNext = self.regex.getSearchedData('(?i)<a href="([^"]*)"[^>]*?>\s*?<strong>\s*?next', groupData) if urlNext and len(urlNext) > 0: # nextP = int(self.regex.getSearchedData('(?i).*?(\d+)$', urlNext.strip())) urlNext = self.regex.replaceData('(?i)&', '&', urlNext) urlNext = self.regex.replaceData('(?i)split_page=\d+', 'split_page=', urlNext) pageNumber += 1 if self.startPage <= pageNumber <= self.endPage: self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>') time.sleep(15) print 'sleep 15 s' self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>') self.getMembers('http://www.linkedin.com' + urlNext + str(pageNumber), pageNumber) elif pageNumber < self.startPage: pageNumber = self.startPage self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>') time.sleep(15) print 'page number less 0 sleep' self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>') self.getMembers('http://www.linkedin.com' + urlNext + str(pageNumber), pageNumber) if self.startPage is None and self.endPage is None: pageNumber += 1 self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>') time.sleep(15) print 'page number less 0 sleep' self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>') self.getMembers('http://www.linkedin.com' + urlNext + str(pageNumber), pageNumber)
class NisbetProduct(QtCore.QThread): scrapProductData = QtCore.pyqtSignal(object) stopThread = QtCore.pyqtSignal(int) def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow("nisbets.csv", 0) self.csvWriter = Csv("nisbets.csv") self.mainUrl = "http://www.nisbets.co.uk" csvHeaderList = [ "URL", "Product Code", "Product Technical Specifications", "Product Name", "Brand", "Product Price", "Product Short Description", "Product Long Description", "Image File Name", "User Manual File Name", "Exploded View File Name", "Spares Code", "Accessories", "Product Status" "Category1", "Category2", "Category3", "Category4", ] if "URL" not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList[0]) self.utils = Utils() def run(self): self.scrapData() def stop(self): self.isExiting = True def scrapData(self): if self.isExiting: return self.scrapProductData.emit("<font color=green><b>Main URL: </b>%s</font>" % self.mainUrl) self.logger.debug("===== URL [" + self.mainUrl + "] =====") data = self.spider.fetchData(self.mainUrl) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category1Chunk = self.regex.getAllSearchedData('(?i)<li id="li-id-\d+">(.*?)</ul> </li>', data) if category1Chunk: for category1Data in category1Chunk: category1 = self.regex.getSearchedData('(?i)<a href="[^"]*">([^<]*)</a>', category1Data) category2Chunk = self.regex.getAllSearchedData( '(?i)<li><a href="([^"]*)">([^<]*)</a>', category1Data ) if category2Chunk: for category2Data in category2Chunk: self.scrapCategory2Data(self.mainUrl + category2Data[0], category1, category2Data[1]) self.scrapProductData.emit("<font color=red><b>Finish Scraping Product data from %s</b></font>" % self.mainUrl) def scrapCategory2Data(self, url, category1, category2): if self.isExiting: return self.scrapProductData.emit("<b>Category 2 URL: </b>%s" % url) self.logger.debug("== Category 2 URL [" + url + "] ==") data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category3Chunks = self.regex.getSearchedData('(?i)<ul class="topCat clear-fix">(.*?)</ul>', data) if category3Chunks: category3Chunk = self.regex.getAllSearchedData('(?i)<a href="([^"]*)">([^<]*)<', category3Chunks) if category3Chunk: for category3Data in category3Chunk: self.scrapCategory3Data(self.mainUrl + category3Data[0], category1, category2, category3Data[1]) def scrapCategory3Data(self, url, category1, category2, category3): if self.isExiting: return self.scrapProductData.emit("<b>Category 3 URL: </b>%s" % url) self.logger.debug("== Category 3 URL [" + url + "] ==") data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category4Chunks = self.regex.getSearchedData('(?i)<ul class="topCat clear-fix">(.*?)</ul>', data) if category4Chunks: category4Chunk = self.regex.getAllSearchedData('(?i)<a href="([^"]*)">([^<]*)<', category4Chunks) if category4Chunk: for category4Data in category4Chunk: category4Url = self.mainUrl + category4Data[0] self.scrapCategory4Data(category4Url, category1, category2, category3, category4Data[1]) def scrapCategory4Data(self, url, category1, category2, category3, category4): if self.isExiting: return self.scrapProductData.emit("<b>Category 4 URL: </b>%s" % url) self.logger.debug("== Category 4 URL [" + url + "] ==") data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categoryChunk = self.regex.getAllSearchedData( '(?i)<div class="product-list-row clear-after">(.*?)</fieldset>', data ) if categoryChunk: for categoryData in categoryChunk: if self.isExiting: return productInfo = self.regex.getSearchedDataGroups( '(?i)<h3 class="product-name"> <a href="([^"]*)"[^>]*?>([^<]*)</a>', categoryData ) productUrl = self.mainUrl + productInfo.group(1) productName = productInfo.group(2) if productUrl not in self.dupCsvRows: self.dupCsvRows.append(productUrl) else: self.scrapProductData.emit( "<font color=green><b>Already exists this item in csv Skip it</b></font>" ) self.logger.debug("========= Already exists this item Skip it ===========") return productImageInfo = self.regex.getSearchedDataGroups( '(?i)<img class="primaryImage" src="([^"]*)" alt="([^"]*)"', categoryData ) image = self.regex.replaceData("(?i)medium", "xlarge", str(productImageInfo.group(1))) productImageUrl = self.mainUrl + image productImage = self.regex.getSearchedData("(?i)/([a-zA-Z0-9-_.]*)$", image) self.utils.downloadFile(productImageUrl, "images/" + productImage) productCode = productImageInfo.group(2) productTechSpecs = self.regex.getSearchedData( '(?i)<p class="description">([^<]*)</p>', categoryData ) brandName = self.regex.getSearchedData( '(?i)<img class="brand-image" src="[^"]*" alt="([^"]*)"', categoryData ) price = self.regex.getSearchedData( '(?i)<div class="reduced-price"> <span class="[^"]*">([^<]*)</span>', categoryData ) if price: price = price.strip()[1:] productStatus = self.regex.getSearchedData( '(?i)<div class="availibility"> <img alt="([^"]*)"', categoryData ) productDesc = "" productLongDesc = "" spareCodes = "" accessoryCode = "" userManual = "" explodedView = "" self.scrapProductData.emit( "<br /><font color=green><b>Product Details URL: </b>%s</font>" % productUrl ) productChunk = self.spider.fetchData(productUrl) if productChunk: productChunk = self.regex.reduceNewLine(productChunk) productChunk = self.regex.reduceBlankSpace(productChunk) productDesc = self.regex.getSearchedData( '(?i)<div class="productDesc"> <h1 class="[^"]*"[^>]*?>[^<]*?</h1>.*?<p>([^<]*)</p>', productChunk, ) productLongDesc = self.regex.getSearchedData( '(?i)<div class="info-product[^>]*?>(.*?)</div>', productChunk ) otherUrl = self.regex.getSearchedData("(?i)(^.*?/)[a-zA-Z0-9._-]*?$", productUrl) self.logger.debug("== Common Product URL [" + otherUrl + "] ==") sparesUrl = otherUrl + "AjaxProductSpares.raction" self.logger.debug("== Spares URL [" + sparesUrl + "] ==") spares = self.spider.fetchData(sparesUrl) if spares: spares = self.regex.getAllSearchedData( '(?i)<p class="code"><span class="bold">Code:</span>([^<]*)</p>', spares ) if spares: spareCodes = ", ".join(spares) accessoriesUrl = otherUrl + "AjaxProductAccessories.raction" self.logger.debug("== Accessories URL [" + accessoriesUrl + "] ==") accessories = self.spider.fetchData(accessoriesUrl) if accessories: accessories = self.regex.getAllSearchedData( '(?i)<p class="code"><span class="bold">Code:</span>([^<]*)</p>', accessories ) if accessories: accessoryCode = ", ".join(accessories) docUrl = otherUrl + "AjaxProductDocuments.raction" self.logger.debug("== Document URL[" + docUrl + "] ==") userManuals = self.spider.fetchData(docUrl) if userManuals: userManual = self.regex.getSearchedData( '(?i)<a class="document-icon" href="([^"]*)"[^>]*?>Download User Manual</a>', userManuals, ) self.logger.debug("Manual URL: " + userManual) if userManual: userManualUrl = self.mainUrl + self.regex.replaceData(" ", "%20", userManual) self.logger.debug("User Manual URL: " + userManualUrl) self.scrapProductData.emit("<b>User Manual PDF URL: </b>%s" % userManualUrl) userManual = self.regex.getSearchedData("(?i)/([a-zA-Z0-9-_. ]*)$", userManual) userManual = self.regex.replaceData("\s+", "_", userManual.strip()) self.scrapProductData.emit( "<font color=green><b>Downloading User Manual: </b>%s <b>Please Wait...</b>" % userManual ) self.utils.downloadFile(userManualUrl, "user_manual/" + userManual) explodedView = self.regex.getSearchedData( '(?i)<a class="document-icon" href="([^"]*)"[^>]*?>Download Exploded Diagram</a>', userManuals, ) if explodedView: explodedViewUrl = self.mainUrl + self.regex.replaceData(" ", "%20", explodedView) self.scrapProductData.emit("<b>Exploded Diagram PDF URL: </b>%s" % explodedViewUrl) explodedView = self.regex.getSearchedData("(?i)/([a-zA-Z0-9-_. ]*)$", explodedView) explodedView = self.regex.replaceData("\s+", "_", explodedView.strip()) self.scrapProductData.emit( "<font color=green><b>Downloading Exploded Diagram: </b>%s <b>Please Wait...</b>" % explodedView ) self.utils.downloadFile(explodedViewUrl, "exploded_view/" + explodedView) csvData = [ productUrl, productCode, productTechSpecs, productName, brandName, price.strip(), productDesc, productLongDesc, productImage, userManual, explodedView, spareCodes, accessoryCode, productStatus, category1, category2, category3, category4, ] self.csvWriter.writeCsvRow(csvData) self.logger.debug("Scraped data " + str(csvData)) self.scrapProductData.emit("<div><b>Scraped Data: </b>%s<br /></div>" % str(csvData))
class CsCat(QThread): notifyCategory = pyqtSignal(object) def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('cs_cat.csv') self.csvWriter = Csv('cs_cat.csv') dupFilterCsvReader = Csv() self.dupFilterCsvRows = dupFilterCsvReader.readCsvRow('filter_cat' + '.csv') self.csvW = Csv('filter_cat' + '.csv') self.mainUrl = 'http://www.cs-catering-equipment.co.uk/' self.totalCategory = 0 def run(self): self.scrapCategories() self.notifyCategory.emit('<font color=red><b>Finished Scraping All Categories.</b></font>') def scrapCategories(self): # self.scrapFinalCategory('http://www.cs-catering-equipment.co.uk/kitchen-equipment/food-prep-machines/chocolate-fountains', '', '') # return self.notifyCategory.emit('<b>Start scraping Category.</b>') self.notifyCategory.emit('<font color=green><b>Main URL: %s</b></font>' % self.mainUrl) data = self.spider.fetchData(self.mainUrl) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) # <a href="http://www.cs-catering-equipment.co.uk/kitchen-equipment" class="level-top" title="Kitchen Equipment" categories = self.regex.getAllSearchedData('(?i)<a href="([^"]*)" class="level-top" title="([^"]*)"', data) if categories and len(categories) > 0: self.totalCategory += len(categories) self.notifyCategory.emit( '<font color=green><b>Total Category Found [%s]</b></font>' % unicode(self.totalCategory)) for category in categories: homeCategoryName = 'Home' categoryName = unicode(category[1]).strip() self.scrapCategory(str(category[0]).strip(), homeCategoryName, categoryName) def scrapCategory(self, url, rootCategoryName, categoryName): self.notifyCategory.emit('<font color=green><b>Start scraping URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: print 'category 1' data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) self.filterCategory(data, categoryName) categoryDesc = self.regex.getSearchedData('(?i)<div class="category-description std">([^<]*)</div>', data) if categoryDesc and len(categoryDesc) > 0: categoryDesc = unicode(categoryDesc).strip() csvData = [rootCategoryName, categoryName, categoryDesc] if csvData not in self.dupCsvRows: self.notifyCategory.emit('<b>Scraped Data: %s</b>' % unicode(csvData)) self.csvWriter.writeCsvRow(csvData) self.dupCsvRows.append(csvData) else: self.notifyCategory.emit('<font color=green><b>Already Exits Category [%s] in csv file. Skip it.</b></font>' % categoryName) subCategories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if subCategories and len(subCategories) > 0: self.totalCategory += len(subCategories) self.notifyCategory.emit( '<font color=green><b>Total Category Found [%s]</b></font>' % unicode(self.totalCategory)) for subCategory in subCategories: print subCategory self.scrapSubCategory(subCategory[0], categoryName, subCategory[1]) def scrapSubCategory(self, url, rootCategoryName, categoryName): self.notifyCategory.emit('<font color=green><b>Start scraping URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) self.filterCategory(data, categoryName) categoryDesc = self.regex.getSearchedData('(?i)<div class="category-description std">([^<]*)</div>', data) categoryDesc = unicode(categoryDesc).strip() csvData = [rootCategoryName, categoryName, categoryDesc] if csvData not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvData) self.dupCsvRows.append(csvData) self.notifyCategory.emit('<b>Scraped Data: %s</b>' % unicode(csvData)) else: self.notifyCategory.emit('<font color=green><b>Already Exits Category [%s] in csv file. Skip it.</b></font>' % categoryName) subCategories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if subCategories and len(subCategories) > 0: self.totalCategory += len(subCategories) self.notifyCategory.emit( '<font color=green><b>Total Category Found [%s]</b></font>' % unicode(self.totalCategory)) for subCategory in subCategories: self.scrapFinalCategory(subCategory[0], categoryName, subCategory[1]) def scrapFinalCategory(self, url, rootCategoryName, categoryName): self.notifyCategory.emit('<font color=green><b>Start scraping URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) self.filterCategory(data, categoryName) categoryDesc = self.regex.getSearchedData(u'(?i)<div class="category-description std">([^<]*)</div>', data) if len(categoryDesc) > 0: categoryDesc = categoryDesc.strip() csvData = [rootCategoryName, categoryName, categoryDesc] if csvData not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvData) self.dupCsvRows.append(csvData) self.notifyCategory.emit('<b>Scraped Data: %s</b>' % unicode(csvData)) else: self.notifyCategory.emit('<font color=green><b>Already Exits Category [%s] in csv file. Skip it.</b></font>' % categoryName) def filterCategory(self, data, categoryName): # self.csvW = Csv(category + '.csv') filterData = self.regex.getSearchedData('(?i)<h4>Filter your results</h4> <dl id="narrow-by-list">(.*?)</dl>', data) if filterData and len(filterData) > 0: self.notifyCategory.emit('<b>Filter Data found writing to csv</b>') allFilters = self.regex.getAllSearchedData('(?i)<dt>([^<]*)</dt> <dd>(.*?)</dd>', filterData) topData = [categoryName] childData = [] maxLen = 0 for allFilter in allFilters: topData.append(allFilter[0]) print 'Filter: ' + allFilter[0] filterName = self.regex.replaceData('(?i)<span class="price">', '', allFilter[1]) filterName = self.regex.replaceData('(?i)</span>', '', filterName) filters = self.regex.getAllSearchedData('(?i)<a href=[^>]*>([^<]*)</a>', filterName) if filters is not None and len(filters) > 0: childData.append(filters) if len(filters) > maxLen: maxLen = len(filters) if topData not in self.dupFilterCsvRows: self.csvW.writeCsvRow(topData) self.notifyCategory.emit( '<font color=green><b>Filters Found For Category [%s].</b></font> <br /><b>Filters are: %s</b>' % ( unicode(categoryName), unicode(topData[1:]))) else: self.notifyCategory.emit('<font color=green><b>Already scraped Filter For Category [%s]. Skip it.</b></font>' % categoryName) return for row in range(maxLen): rowData = [''] for columnData in childData: if len(columnData) > row: rowData.append(columnData[row]) else: rowData.append('') print rowData self.csvW.writeCsvRow(rowData) else: self.notifyCategory.emit( '<font color=green><b>No Filter Found For Category[%s].</b></font>' % categoryName)
class NisbetCat(QtCore.QThread): scrapCategoryData = QtCore.pyqtSignal(object) stopThread = QtCore.pyqtSignal(int) def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('nisbetCat.csv') self.csvWriter = Csv('nisbetCat.csv') self.mainUrl = 'http://www.nisbets.co.uk' csvHeaderList = ['Parent Category', 'Category Name', 'Category Description'] if csvHeaderList not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList) def run(self): self.scrapData() def stop(self): self.isExiting = True def scrapData(self): if self.isExiting: return self.scrapCategoryData.emit('<font color=green><b>Main URL: </b>%s</font>' % self.mainUrl) self.logger.debug('===== URL [' + self.mainUrl + '] =====') data = self.spider.fetchData(self.mainUrl) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) links = self.regex.getAllSearchedData('(?i)<li id="li-id-\d+"> <a href="([^"]*)">([^<]*)</a>', data) if links: for link in links: self.scrapCategoryData.emit('<b>Link URL: </b>%s' % (self.mainUrl + link[0])) self.logger.debug('===Link URL [' + self.mainUrl + link[0] + '] ===') csvData = ['Home'] category = link[1] csvData.append(category) linkInfo = self.spider.fetchData(self.mainUrl + link[0]) if linkInfo: linkInfo = self.regex.reduceNewLine(linkInfo) linkInfo = self.regex.reduceBlankSpace(linkInfo) csvData.append( self.regex.getSearchedData('(?i)<p class="br5px padding10 mb0 mt10">([^<]*)</p>', linkInfo)) self.logger.debug('Category ' + str(csvData)) if csvData not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvData) self.dupCsvRows.append(csvData) self.scrapCategoryData.emit('<b>Scraped Data: </b>%s<br />' % str(csvData)) else: self.scrapCategoryData.emit( '<font color=green><b>Already Scrapped Skip This Category</b></font>') ## After write first cat data subUrlsChunk = self.regex.getSearchedData('(?i)<ul class="topCat clear-fix">(.*?)</ul>', linkInfo) if subUrlsChunk: subUrls = self.regex.getAllSearchedData('(?i)<a href="([^"]*)">([^<]*)<span', subUrlsChunk) if subUrls: for subUrl in subUrls: self.scrapSubCat(self.mainUrl + subUrl[0], category, subUrl[1]) self.scrapCategoryData.emit( '<font color=red><b>Finish Scraping Category data from %s</b></font>' % self.mainUrl) def scrapSubCat(self, url, parentCat, category): if self.isExiting: return self.scrapCategoryData.emit('<b>Link URL: </b>%s' % url) self.logger.debug('== Sub URL [' + url + '] ==') data = self.spider.fetchData(url) if data: csvData = [parentCat, category] data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) csvData.append(self.regex.getSearchedData('(?i)<p class="br5px padding10 mb0 mt10">([^<]*)</p>', data)) self.logger.debug('Sub Category ' + str(csvData)) if csvData not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvData) self.dupCsvRows.append(csvData) self.scrapCategoryData.emit('<b>Scraped Data: </b>%s<br />' % str(csvData)) else: self.scrapCategoryData.emit('<font color=green><b>Already Scrapped Skip This Category</b></font>') ## After write first cat data subUrlsChunk = self.regex.getSearchedData('(?i)<ul class="topCat clear-fix">(.*?)</ul>', data) if subUrlsChunk: subUrls = self.regex.getAllSearchedData('(?i)<a href="([^"]*)">([^<]*)<span', subUrlsChunk) if subUrls: for subUrl in subUrls: self.scrapSubSubCat(self.mainUrl + subUrl[0], category, subUrl[1]) def scrapSubSubCat(self, url, parentCat, category): if self.isExiting: return self.scrapCategoryData.emit('<b>Link URL: </b>%s' % url) self.logger.debug('== SUb SUb URL [' + url + '] ==') data = self.spider.fetchData(url) if data: csvData = [parentCat, category] data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) csvData.append(self.regex.getSearchedData('(?i)<p class="br5px padding10 mb0 mt10">([^<]*)</p>', data)) self.logger.debug('Sub SUb Category ' + str(csvData)) if csvData not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvData) self.dupCsvRows.append(csvData) self.scrapCategoryData.emit('<b>Scraped Data: </b>%s<br />' % str(csvData)) else: self.scrapCategoryData.emit('<font color=green><b>Already Scrapped Skip This Category</b></font>')
class MyLinkedIn(QThread): notifyLinkedIn = pyqtSignal(object) notifyMember = pyqtSignal(object) cookieL = pyqtSignal(object) def __init__(self, username, password): QThread.__init__(self) self.spider = Spider() self.regex = Regex() self.username = username self.password = password def run(self): if self.login(): self.getAllGroups() def login(self): print "login start" self.notifyLinkedIn.emit("<b>Trying to login. Please wait...</b>") loginPageData = self.spider.fetchData("https://www.linkedin.com/uas/login?goback=&trk=hb_signin") loginPageData = self.regex.reduceNewLine(loginPageData) loginPageData = self.regex.reduceBlankSpace(loginPageData) ## <input type="hidden" name="session_redirect" value="" id="session_redirect-login"><input type="hidden" name="csrfToken" value="ajax:9073845200579364133" id="csrfToken-login"><input type="hidden" name="sourceAlias" value="0_7r5yezRXCiA_H0CRD8sf6DhOjTKUNps5xGTqeX8EEoi" id="sourceAlias-login"> self.sessionRedirect = self.regex.getSearchedData( '(?i)<input type="hidden" name="session_redirect" value="([^"]*)"', loginPageData ) self.token = self.regex.getSearchedData( '(?i)<input type="hidden" name="csrfToken" value="([^"]*)"', loginPageData ) self.alias = self.regex.getSearchedData( '(?i)<input type="hidden" name="sourceAlias" value="([^"]*)"', loginPageData ) loginParam = { "csrfToken": self.token, "isJsEnabled": "true", "session_key": self.username, "session_password": self.password, # 'session_key': '*****@*****.**', # 'session_password': '******', "session_redirect": self.sessionRedirect, "signin": "Sign In", "sourceAlias": self.alias, "source_app": "", } print loginParam print "start login" time.sleep(5) loginData = self.spider.login("https://www.linkedin.com/uas/login-submit", loginParam) loginData = self.regex.reduceNewLine(loginData) loginData = self.regex.reduceBlankSpace(loginData) # print loginData isLoggedIn = self.regex.isFoundPattern('(?i)<li class="signout">', loginData) if isLoggedIn: self.notifyLinkedIn.emit("<font color=green><b>Successfully Logged In.</b></font>") print "login success" self.cookieL.emit(self.spider) return True else: self.notifyLinkedIn.emit( "<font color=red><b>Something wrong with logging in. Please try again or check manually with this username/password</b></font>" ) return False def getAllGroups(self): print "start groups" self.notifyLinkedIn.emit("<font color=green><b>Start Scraping All Groups.</b></font>") self.notifyLinkedIn.emit("<b>Wait for 15 second break...</b>") time.sleep(15) self.notifyLinkedIn.emit("<b>15 second break finish!!!</b>") self.notifyLinkedIn.emit("<font color=green><b>Fetching data for scraping your groups.</b></font>") groupsUrl = "http://www.linkedin.com/myGroups?trk=hb_side_grps_top" groupsData = self.spider.fetchData(groupsUrl) self.notifyLinkedIn.emit("<font color=green><b>Data fetching complete for scraping your groups.</b></font>") if groupsData is not None and len(groupsData) > 0: print "starting groups" groupsData = self.regex.reduceNewLine(groupsData) groupsData = self.regex.reduceBlankSpace(groupsData) print groupsData ## <a href="/groups?gid=72881&trk=myg_ugrp_ovr" class="private" title="This group is members only">MySQL Professionals</a> groupInfo = self.regex.getAllSearchedData('(?i)<a href="(/groups\?gid=[^"]*)"[^>]*>([^<]*)</a>', groupsData) if groupInfo is not None and len(groupInfo) > 0: members = [] for group in groupInfo: groupUrl = "http://www.linkedin.com" + str(group[0]) groupName = str(group[1]) self.notifyLinkedIn.emit("<b>Group Name: </b>%s <b>URL: </b>%s" % (groupName, groupUrl)) # http://www.linkedin.com/groups?members=&gid=65688&trk=anet_ug_memb gid = self.regex.getSearchedData("(?i)gid=(\d+)", group[0]) print gid groupUrl = "http://www.linkedin.com/groups?members=&gid=" + gid + "&trk=anet_ug_memb" members.append((groupName, groupUrl)) self.notifyMember.emit(members) self.notifyLinkedIn.emit("<font color=red><b>Finish Scraping All Groups.</b></font>")
class MyLinkedIn(QThread): notifyLinkedIn = pyqtSignal(object) notifyMember = pyqtSignal(object) cookieL = pyqtSignal(object) def __init__(self, username, password): QThread.__init__(self) self.spider = Spider() self.regex = Regex() self.username = username self.password = password def run(self): if self.login(): self.getAllGroups() def login(self): print 'login start' self.notifyLinkedIn.emit('<b>Trying to login. Please wait...</b>') loginPageData = self.spider.fetchData( 'https://www.linkedin.com/uas/login?goback=&trk=hb_signin') loginPageData = self.regex.reduceNewLine(loginPageData) loginPageData = self.regex.reduceBlankSpace(loginPageData) ## <input type="hidden" name="session_redirect" value="" id="session_redirect-login"><input type="hidden" name="csrfToken" value="ajax:9073845200579364133" id="csrfToken-login"><input type="hidden" name="sourceAlias" value="0_7r5yezRXCiA_H0CRD8sf6DhOjTKUNps5xGTqeX8EEoi" id="sourceAlias-login"> self.sessionRedirect = self.regex.getSearchedData( '(?i)<input type="hidden" name="session_redirect" value="([^"]*)"', loginPageData) self.token = self.regex.getSearchedData( '(?i)<input type="hidden" name="csrfToken" value="([^"]*)"', loginPageData) self.alias = self.regex.getSearchedData( '(?i)<input type="hidden" name="sourceAlias" value="([^"]*)"', loginPageData) loginParam = { 'csrfToken': self.token, 'isJsEnabled': 'true', 'session_key': self.username, 'session_password': self.password, # 'session_key': '*****@*****.**', # 'session_password': '******', 'session_redirect': self.sessionRedirect, 'signin': 'Sign In', 'sourceAlias': self.alias, 'source_app': '' } print loginParam print 'start login' time.sleep(5) loginData = self.spider.login( 'https://www.linkedin.com/uas/login-submit', loginParam) loginData = self.regex.reduceNewLine(loginData) loginData = self.regex.reduceBlankSpace(loginData) # print loginData isLoggedIn = self.regex.isFoundPattern('(?i)<li class="signout">', loginData) if isLoggedIn: self.notifyLinkedIn.emit( '<font color=green><b>Successfully Logged In.</b></font>') print 'login success' self.cookieL.emit(self.spider) return True else: self.notifyLinkedIn.emit( '<font color=red><b>Something wrong with logging in. Please try again or check manually with this username/password</b></font>' ) return False def getAllGroups(self): print 'start groups' self.notifyLinkedIn.emit( '<font color=green><b>Start Scraping All Groups.</b></font>') self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>') time.sleep(15) self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>') self.notifyLinkedIn.emit( '<font color=green><b>Fetching data for scraping your groups.</b></font>' ) groupsUrl = 'http://www.linkedin.com/myGroups?trk=hb_side_grps_top' groupsData = self.spider.fetchData(groupsUrl) self.notifyLinkedIn.emit( '<font color=green><b>Data fetching complete for scraping your groups.</b></font>' ) if groupsData is not None and len(groupsData) > 0: print 'starting groups' groupsData = self.regex.reduceNewLine(groupsData) groupsData = self.regex.reduceBlankSpace(groupsData) print groupsData ## <a href="/groups?gid=72881&trk=myg_ugrp_ovr" class="private" title="This group is members only">MySQL Professionals</a> groupInfo = self.regex.getAllSearchedData( '(?i)<a href="(/groups\?gid=[^"]*)"[^>]*>([^<]*)</a>', groupsData) if groupInfo is not None and len(groupInfo) > 0: members = [] for group in groupInfo: groupUrl = 'http://www.linkedin.com' + str(group[0]) groupName = str(group[1]) self.notifyLinkedIn.emit( '<b>Group Name: </b>%s <b>URL: </b>%s' % (groupName, groupUrl)) # http://www.linkedin.com/groups?members=&gid=65688&trk=anet_ug_memb gid = self.regex.getSearchedData('(?i)gid=(\d+)', group[0]) print gid groupUrl = 'http://www.linkedin.com/groups?members=&gid=' + gid + '&trk=anet_ug_memb' members.append((groupName, groupUrl)) self.notifyMember.emit(members) self.notifyLinkedIn.emit( '<font color=red><b>Finish Scraping All Groups.</b></font>')
class BetrosProduct(QThread): notifyProduct = pyqtSignal(object) def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.mainUrl = 'http://www.bertos.com' self.utils = Utils() self.csvHeader = ['Home Category', 'Sub Category', 'Category Description', 'Category Image', 'Code', 'Product Code', 'Product Name', 'Product Description', 'Product Image File', 'Technical Sheet File', 'Exploded View File'] self.totalProducts = 0 def run(self): self.scrapBertos() self.notifyProduct.emit('<font color=red><b>Finished Scraping All products.</b></font>') def scrapBertos(self, retry=0): # self.downloadFile('http://s900.bertos.it/download.php?file=editorcms/documentazione/schede/scheda_13722600.pdf', 'a.pdf') # self.scrapSubCategory('http://s900.bertos.it/en/', '', None, None) # self.scrapProducts('http://s900.bertos.it/en/pasta_cookers/', '', '', None, None) # return self.notifyProduct.emit('<font color=green><b>Try to get all language links.</b></font>') self.logger.debug(self.mainUrl) data = self.spider.fetchData(self.mainUrl) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) languages = self.regex.getAllSearchedData( '(?i)<div class="[^"]*"><a href="([^"]*)"\s*?class="boxalingua">([^<]*)</a>', data) if languages and len(languages) > 0: self.logger.debug('Total languages: %s' % str(len(languages))) self.notifyProduct.emit('<b>Total languages found[%s]</b>' % str(len(languages))) for language in languages: self.totalProducts = 0 url = language[0] # if str(language[1]).lower() != 'en': # continue urlChunk = self.spider.fetchData(url) if urlChunk and len(urlChunk) > 0: urlChunk = self.regex.reduceNewLine(urlChunk) urlChunk = self.regex.reduceBlankSpace(urlChunk) url = self.regex.getSearchedData('(?i)<a href="([^"]*)" onmouseover="vedi_po_cat\(2\)\s*?"', urlChunk) csvFile = str(language[1].strip()).lower() + '_' + 'bertos.csv' dupCsvReader = Csv() dupCsvRows = dupCsvReader.readCsvRow(csvFile) csvWriter = Csv(csvFile) if self.csvHeader not in dupCsvRows: dupCsvRows.append(self.csvHeader) csvWriter.writeCsvRow(self.csvHeader) self.notifyProduct.emit( '<font color=green><b>Try to get data for language [%s].</b></font>' % language[1]) self.scrapCategory(url, dupCsvRows, csvWriter) self.notifyProduct.emit( '<font color=red><b>===== Finish scraping data for [%s] =====</b></font><br /><br />' % language[1]) else: if retry < 5: return self.scrapBertos(retry + 1) def scrapCategory(self, mainUrl, dupCsvRows, csvWriter): url = mainUrl self.logger.debug('Main URL: ' + url) self.notifyProduct.emit('<font color=green><b>Main URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) data = self.regex.reduceNbsp(data) self.notifyProduct.emit('<b>Try to scrap all categories.</b>') categoryChunk = self.regex.getSearchedData('(?i)<div id="contenuto1">(.*?)</div>\s*?</div>', data) if categoryChunk and len(categoryChunk) > 0: categories = self.regex.getAllSearchedData('(?i)<a href="([^"]*)"[^>]*?>([^<]*)</a>', categoryChunk) if categories and len(categories) > 0: self.notifyProduct.emit('<b>Total Categories Found: %s</b>' % str(len(categories))) for category in categories: categoryName = category[1].strip() self.scrapSubCategory(str(category[0]).strip(), categoryName, dupCsvRows, csvWriter) def scrapSubCategory(self, url, categoryName, dupCsvRows, csvWriter): self.logger.debug('Category URL: ' + url) self.notifyProduct.emit('<b>Try to scrap subcategories for: %s</b>' % categoryName) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) subCategories = self.regex.getAllSearchedData('(?i)<li\s*?><a href="([^"]*)" title="([^"]*)"', data) if subCategories and len(subCategories) > 0: self.notifyProduct.emit( '<font color=green><b>Total subcategories found %s.</b></font>' % str(len(subCategories))) for subCategory in subCategories: subCategoryName = subCategory[1].strip() self.scrapProducts(subCategory[0].strip(), categoryName, subCategoryName, dupCsvRows, csvWriter) def scrapProducts(self, url, categoryName, subCategoryName, dupCsvRows, csvWriter): self.logger.debug('Product URL: ' + url) self.notifyProduct.emit('<b>Product URL: %s.</b>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categoryDescription = self.regex.getSearchedData( '(?i)<td class="prodottidescrizione1">\s*?<h1>[^<]*?</h1>(.*?)</td>', data) categoryDescription = self.regex.replaceData('(?i)<!--.*?-->', '', categoryDescription) categoryDescription = self.regex.replaceData('(?i)<[^>]*>', '', categoryDescription) productUrl = self.regex.getSearchedData('(?i)^(http://.*?)/', url) categoryImage = self.regex.getSearchedData( '(?i)<div class="boximgcat" id="boximgcatid">\s*?<a rel="shadowbox" href="([^"]*)"', data) categoryImageName = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_. ]*)$', categoryImage) categoryImageName = self.regex.replaceData('\s+', '_', categoryImageName.strip()) if categoryImageName is not None and len(categoryImageName) > 0 and not os.path.exists( 'category_image/' + categoryImageName): self.notifyProduct.emit( '<font color=green><b>Downloading Category Image: </b>%s <b>Please Wait...</b></font>' % categoryImageName) self.downloadFile(productUrl + categoryImage, 'category_image/' + categoryImageName) # self.utils.downloadFile(categoryImage, 'category_image/' + categoryImageName) self.notifyProduct.emit( '<font color=green><b>Downloaded Category Image: %s.</b></font>' % categoryImageName) productChunks = self.regex.getSearchedData('(?i)<table.*?class="prodottiriga"[^>]*?>(.*?)</table>', data) if productChunks and len(productChunks) > 0: productChunk = self.regex.getAllSearchedData('(?i)<tr>(.*?</div>\s*?</td>)\s*?</tr>', productChunks) for products in productChunk: print 'url: ' + url code = self.regex.getSearchedData('(?i)Cod\. ([a-zA-Z0-9 /]+)', products).strip() for dup in dupCsvRows: if code == dup[4]: return model = self.regex.getSearchedData('(?i)Mod\. ([^<]*)<', products).strip() productName = self.regex.getSearchedData('(?i)<h1>([^<]*)</h1>', products).strip() self.notifyProduct.emit( '<font color=green><b>Product Name: %s.</b></font>' % productName) desc = self.regex.getSearchedData( '(?i)<div id="prtdescrizione\d+" style="display:none">(.*?)<div class="prodotticomando">', products).strip() productImage = productUrl + self.regex.getSearchedData('(?i)<img src="/tpl/\.\.([^"]*)"', products).strip() productImage = self.regex.replaceData('(?i)k_\d+_\d+', 'k_800_557', productImage) productImageName = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_. ]*)$', productImage) productImageName = self.regex.replaceData('\s+', '_', productImageName.strip()) if productImageName is not None and len(productImageName) > 0 and not os.path.exists( 'product_image/' + productImageName): self.notifyProduct.emit( '<font color=green><b>Downloading Product Image: </b>%s <b>Please Wait...</b></font>' % productImageName) self.downloadFile(productImage, 'product_image/' + productImageName) # self.utils.downloadFile(productImage, 'product_image/' + productImageName) self.notifyProduct.emit( '<font color=green><b>Downloaded Product Image: %s.</b></font>' % productImageName) techPdf = self.regex.getSearchedData( '(?i)<td class="prodottiriga\d+">\s*?<div class="scarica">\s*?<a href="([^"]*)"' , products).strip() techPdfName = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_. ]*)$', techPdf) techPdfName = self.regex.replaceData('\s+', '_', techPdfName.strip()) if techPdfName is not None and len(techPdfName) > 0 and not os.path.exists( 'tech_pdf/' + techPdfName): self.notifyProduct.emit( '<font color=green><b>Downloading Tech Pdf: </b>%s <b>Please Wait...</b></font>' % techPdfName) self.downloadFile(techPdf, 'tech_pdf/' + techPdfName) # self.utils.downloadFile(techPdf, 'tech_pdf/' + techPdfName) self.notifyProduct.emit( '<font color=green><b>Downloaded Tech Pdf: %s.</b></font>' % techPdfName) explodedViewPdf = self.regex.getSearchedData( '(?i)</a>\s*?</div>\s*?<div class="scarica">\s*?<a href="([^"]*\.pdf)">[^<]*?</a>' , products).strip() explodedViewPdfName = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_. ]*)$', explodedViewPdf) explodedViewPdfName = self.regex.replaceData('\s+', '_', explodedViewPdfName.strip()) if explodedViewPdfName is not None and len(explodedViewPdfName) > 0 and not os.path.exists( 'exploded_pdf/' + explodedViewPdfName): self.notifyProduct.emit( '<font color=green><b>Downloading Exploded View Pdf: </b>%s <b>Please Wait...</b></font>' % explodedViewPdfName) # self.utils.downloadFile(explodedViewPdf, 'exploded_pdf/' + explodedViewPdfName) self.downloadFile(explodedViewPdf, 'exploded_pdf/' + explodedViewPdfName) self.notifyProduct.emit( '<font color=green><b>Downloading Exploded View Pdf: %s.</b></font>' % explodedViewPdfName) csvData = [categoryName, subCategoryName, categoryDescription, categoryImageName, code, model, productName, desc, productImageName, techPdfName, explodedViewPdfName] print csvData if csvData not in dupCsvRows: csvWriter.writeCsvRow(csvData) dupCsvRows.append(csvData) self.notifyProduct.emit('<font color=green><b>Successfully write data to csv file.</b></font>') else: self.notifyProduct.emit('<font color=green><b>Already exists. Skip it.</b></font>') def downloadFile(self, url, downloadPath, retry=0): print url self.notifyProduct.emit('<b>File URL: %s.</b>' % url) try: socket.setdefaulttimeout(10) opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0)) opener.addheaders = [ ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1')] urllib2.install_opener(opener) # resp = opener.open(url, timeout=30) # resp = urllib2.urlopen(url, timeout=30) resp = None try: # resp = urllib.urlopen(url) resp = opener.open(url, timeout=30) except Exception, x: print x if resp is None: return False # if resp.info()['Connection'] == 'close' or resp.getcode() != 200: # if retry < 3: # self.notifyProduct.emit('<font color=red><b>Failed to download file. Retrying...</b></font>') # return self.downloadFile(url, downloadPath, retry + 1) # else: # self.notifyProduct.emit('<font color=red><b>Failed to download file after 3 retry.</b></font>') # return print resp.info() print 'info.......' contentLength = resp.info()['Content-Length'] contentLength = self.regex.getSearchedData('(?i)^(\d+)', contentLength) totalSize = float(contentLength) directory = os.path.dirname(downloadPath) if not os.path.exists(directory): try: os.makedirs(directory) except Exception, x: print x dl_file = open(downloadPath, 'wb') currentSize = 0 CHUNK_SIZE = 32768 totalSizeKB = totalSize / 1024 if totalSize > 0 else totalSize print 'everything ok............' while True: data = None try: data = resp.read(CHUNK_SIZE) except Exception, x: print x if not data: break currentSize += len(data) dl_file.write(data) print('============> ' +\ str(round(float(currentSize * 100) / totalSize, 2)) +\ '% of ' + str(totalSize) + ' bytes') notifyDl = '===> Downloaded ' + str(round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str( totalSizeKB) + ' KB.' self.notifyProduct.emit('<b>%s</b>' % notifyDl) if currentSize >= totalSize: dl_file.close() return True
class WebTable(): def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.browser = BrowserUtil() self.regex = Regex() self.utils = Utils() self.csvHeader = [ 'Category', 'Sub Category 1', 'Sub Category 2', 'Product Code', 'Product Name', 'Product ShortName', 'Product Description', 'List Price', 'Vendor Price', 'Availability', 'Power', 'Size', 'KW', 'Weight(kg)', 'Other Tech', 'Pdf File', 'Image File' ] self.totalProducts = 0 def scrapData(self): postParams = { '__ASYNCPOST': 'true', '__EVENTVALIDATION': '/wEWWwKSuN/3AgLi8PP9DgKzpIWvCQKQ3IFsAve1x5EPAu7Dza4GArPM1qoEAvjBhsQDAvjB6qkLAvjB/o4CAvjBwtMJApP48MoOApP4xK8GApP46EYCk/j8qwgCk/jA8AcCk/jU1Q4Ck/i4uQYCk/iMng0Ck/iQ4wQCk/jkyAMC15uNvgYC15uRgw0C15uluggC15uJnwcC15ud5A4C15vhyQUC15v1rg0C15vZ8wQC15ut1wMC15uxvAsC6rKvkwgC6rKz+AcC6rLHkAIC6rKr9AkC6rK/WQLqsoO+CALqspeDBwLqsvvoDgLqss/NBQLqstOSDQK0wsnaCgL4+7LBAQLP5JaqAQKc4P/CDQLl7berDgLurP6CDALvn+2eCwK4pIGBDwKvytzABgLTu7vHBgKFmtaAAwKn0anxCwKZwpi3CgLjlM+OAwLCoMjqAQLWq7m2BALlnqSNBwKbwPKfBgL5j7vvBAKRy8fpCAKI3rXQBwLBhpnRCwLgqNqjBQLEmsPUBgL26MCGDwL0wbKZDgL16ePjAQLhraHjBAKx7Y+rCwKu+uSNDQKDp4fFBwLnmpaQCQKU2LWMCALev//ADgK9osaHBALArtXWDgKhp8iCAwKCs5DBAgKPnOP3DwK0uumDDwKJ4eXWBAKK+5r7AwLj4sWCAQKJgZPYBQL2mPvKBgL/hob0BAKsyvbZDAKSoqqWDwLSwpnTCALN797vDL/8819r5pdL6i1kQizMsBPt83oZ', '__VIEWSTATE': '/wEPDwUKMTU5MjIyNTQ2OQ9kFgICAw9kFgQCAQ9kFgJmD2QWAgIBD2QWAmYPZBYCZg9kFgYCBw8QZBAVIwstQWxsIFllYXJzLQQyMDEzBDIwMTIEMjAxMQQyMDEwBDIwMDkEMjAwOAQyMDA3BDIwMDYEMjAwNQQyMDA0BDIwMDMEMjAwMgQyMDAxBDIwMDAEMTk5OQQxOTk4BDE5OTcEMTk5NgQxOTk1BDE5OTQEMTk5MwQxOTkyBDE5OTEEMTk5MAQxOTg5BDE5ODgEMTk4NwQxOTg2BDE5ODUEMTk4NAQxOTgzBDE5ODIEMTk4MQQxOTgwFSMLLUFsbCBZZWFycy0EMjAxMwQyMDEyBDIwMTEEMjAxMAQyMDA5BDIwMDgEMjAwNwQyMDA2BDIwMDUEMjAwNAQyMDAzBDIwMDIEMjAwMQQyMDAwBDE5OTkEMTk5OAQxOTk3BDE5OTYEMTk5NQQxOTk0BDE5OTMEMTk5MgQxOTkxBDE5OTAEMTk4OQQxOTg4BDE5ODcEMTk4NgQxOTg1BDE5ODQEMTk4MwQxOTgyBDE5ODEEMTk4MBQrAyNnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2RkAgkPZBYCZg9kFgICAQ8QDxYGHg1EYXRhVGV4dEZpZWxkBQ5wcm9qX2NvZGVfbmFtZR4ORGF0YVZhbHVlRmllbGQFCXByb2pfY29kZR4LXyFEYXRhQm91bmRnZBAVCQ4tQWxsIENhdGVnb3J5LQtDb2FsIE1pbmluZxNJbmR1c3RyaWFsIFByb2plY3RzMUluZnJhc3RydWN0dXJlIGFuZCBNaXNjZWxsYW5lb3VzIFByb2plY3RzICAmICBDUloPTWluaW5nIFByb2plY3RzMk5ldyBDb25zdHJ1Y3Rpb24gUHJvamVjdHMgYW5kICBJbmR1c3RyaWFsICBFc3RhdGVzEU51Y2xlYXIgIFByb2plY3RzJ1JpdmVyIFZhbGxleSBhbmQgSHlkcm9lbGVjdHJpYyBQcm9qZWN0cxBUaGVybWFsIFByb2plY3RzFQkOLUFsbCBDYXRlZ29yeS0EQ01JTgNJTkQDTUlTA01JTgNOQ1ADTlVDA1JJVgNUSEUUKwMJZ2dnZ2dnZ2dnZGQCCw8QDxYGHwAFCnN0YXRlX25hbWUfAQUKc3RhdGVfbmFtZR8CZ2QQFSULLUFsbCBTdGF0ZS0TQW5kYW1hbiBhbmQgTmljb2Jhcg5BbmRocmEgUHJhZGVzaBFBcnVuYWNoYWwgUHJhZGVzaAVBc3NhbQVCaWhhcgpDaGFuZGlnYXJoDENoaGF0dGlzZ2FyaBREYWRhciAmIE5hZ2FyIEhhdmVsaQ1EYW1hbiBhbmQgRGl1BURlbGhpA0dvYQdHdWphcmF0B0hhcnlhbmEQSGltYWNoYWwgUHJhZGVzaBFKYW1tdSBhbmQgS2FzaG1pcglKaGFya2hhbmQJS2FybmF0YWthBktlcmFsYQtMYWtzaGFkd2VlcA5NYWRoeWEgUHJhZGVzaAtNYWhhcmFzaHRyYQdNYW5pcHVyCU1lZ2hhbGF5YQdNaXpvcmFtCE5hZ2FsYW5kBk9ycmlzYQZPdGhlcnMLUG9uZGljaGVycnkGUHVuamFiCVJhamFzdGhhbgZTaWtraW0KVGFtaWwgTmFkdQdUcmlwdXJhDVV0dGFyIFByYWRlc2gLVXR0YXJha2hhbmQLV2VzdCBCZW5nYWwVJQstQWxsIFN0YXRlLRNBbmRhbWFuIGFuZCBOaWNvYmFyDkFuZGhyYSBQcmFkZXNoEUFydW5hY2hhbCBQcmFkZXNoBUFzc2FtBUJpaGFyCkNoYW5kaWdhcmgMQ2hoYXR0aXNnYXJoFERhZGFyICYgTmFnYXIgSGF2ZWxpDURhbWFuIGFuZCBEaXUFRGVsaGkDR29hB0d1amFyYXQHSGFyeWFuYRBIaW1hY2hhbCBQcmFkZXNoEUphbW11IGFuZCBLYXNobWlyCUpoYXJraGFuZAlLYXJuYXRha2EGS2VyYWxhC0xha3NoYWR3ZWVwDk1hZGh5YSBQcmFkZXNoC01haGFyYXNodHJhB01hbmlwdXIJTWVnaGFsYXlhB01pem9yYW0ITmFnYWxhbmQGT3JyaXNhBk90aGVycwtQb25kaWNoZXJyeQZQdW5qYWIJUmFqYXN0aGFuBlNpa2tpbQpUYW1pbCBOYWR1B1RyaXB1cmENVXR0YXIgUHJhZGVzaAtVdHRhcmFraGFuZAtXZXN0IEJlbmdhbBQrAyVnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZGQCBQ9kFgJmD2QWAgIBD2QWAmYPZBYCZg9kFgICAQ88KwANAGQYAgUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgIFDEltYWdlQnV0dG9uMQUCc3MFCUdyaWRWaWV3MQ9nZJ2a7Ttf3vWdGuuLrnT2LMPjQW5x', 'btn': 'Search', 'ddlcategory': 'MIN', 'ddlstate': 'Gujarat', 'ddlstatus': 'UPEC', 'ddlyear': '2011', 'textbox2': '', 'ww': 'UpdatePanel3' } data = self.spider.fetchData( 'http://environmentclearance.nic.in/Search.aspx', postParams) print data data = self.spider.fetchData1( 'http://environmentclearance.nic.in/Search.aspx') print data # soup = BeautifulSoup(data) def scrapSubCat1(self, url): print 'url: ', url data = self.spider.fetchData(url) soup = BeautifulSoup(data) for cat in soup.find_all('td', {"class": re.compile("item_level")}): c = cat.find( "a", { "href": re.compile('rayons\.aspx\?value_path=.*?$'), "id": re.compile('ctl00_cph_center_dl_level_ctl\d+_a_level.*?$') }) if c: print c.string.strip() self.scrapSubCat2('http://www.diamond-europe.com/' + c.get("href")) def scrapSubCat2(self, url): print 'url1: ' + url data = self.spider.fetchData(url) soup = BeautifulSoup(data) for cat in soup.find_all('div', {'class': re.compile('bg_ombre')}): self.scrapProducts('http://www.diamond-europe.com/' + cat.find('a').get('href')) def scrapProducts(self, url): print 'url2', url data = self.spider.fetchData(url) soup = BeautifulSoup(data) results = soup.find('table', {'id': 'results'}) if results: for row in results.find_all('tr'): colRef = row.find('td', {'class': 'reference'}) if colRef: prCode = colRef.find('span', {'class': 'imp'}) price1 = colRef.find( 'span', { 'id': re.compile( 'ctl\d+_cph_center_r_articles_ctl\d+_l_prix_barre$' ) }) price2 = colRef.find('span', {'class', 'promo'}) print prCode.string.strip() print price1.string.strip() print price2.string.strip() coldesc = row.find('td', {'class': re.compile('description.*?$')}) if coldesc: pr = coldesc.find('a') print pr.string.strip() self.scrapProductDetails('http://www.diamond-europe.com/' + pr.get('href')) def scrapProductDetails(self, url): print 'Detail url: ' + url data = self.spider.fetchData(url) soup = BeautifulSoup(data) productDescS = soup.find('span', 'h1_nom_article') print productDescS.string.strip() productDesc = soup.find('div', {'id': 'article_right'}) print productDesc.text.strip() specs = soup.find('ul', {'id': 'spec_tech'}) if specs: print specs.contents """ __ASYNCPOST true __EVENTARGUMENT __EVENTTARGET ctl00$cph_center$menu_left1$lb_login __EVENTVALIDATION /wEWEwKOk7qrBAKG4eyLBALGw+PfBwK7jI7eDQL/2fqXBwLH9rmjDwLG2KLDCAKCvreACALPgYP1DQKqvLeACAKKtP7+DAL07MD3CwLksZZaAuSxmloCicn43Q8Cisn43Q8C/Iag2AMClcHvlQgCyNGw1Ax/PwzywfL/ooD/FU51memYxQ1U+Q== __LASTFOCUS __SCROLLPOSITIONX 0 __SCROLLPOSITIONY 0 __VIEWSTATE /wEPDwUINzkzMzQ5OTcPZBYCZg9kFgICAw9kFgICAQ9kFhICAw8WAh4LXyFJdGVtQ291bnQCBhYMZg9kFgICAQ8WBh4FdGl0bGUFB0VuZ2xpc2geBGhyZWYFDC9yYXlvbnMuYXNweB4Hb25jbGljawUcc2V0Q29va2llKCdsYW5ndWUnLCAnZW4tZ2InKRYCZg8WBB4Dc3JjBQ9+L2ltYWdlcy9lbi5wbmceA2FsdAUHRW5nbGlzaGQCAQ9kFgICAQ8WBh8BBQlGcmFuw6dhaXMfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1mci1iZR8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdmci1iZScpFgJmDxYEHwQFD34vaW1hZ2VzL2ZyLnBuZx8FBQlGcmFuw6dhaXNkAgIPZBYCAgEPFgYfAQUHRGV1dHNjaB8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWRlLWRlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2RlLWRlJykWAmYPFgQfBAUPfi9pbWFnZXMvZGUucG5nHwUFB0RldXRzY2hkAgMPZBYCAgEPFgYfAQUKTmVkZXJsYW5kcx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPW5sLWJlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ25sLWJlJykWAmYPFgQfBAUPfi9pbWFnZXMvbmwucG5nHwUFCk5lZGVybGFuZHNkAgQPZBYCAgEPFgYfAQUIRXNwYcOxb2wfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1lcy1lcx8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdlcy1lcycpFgJmDxYEHwQFD34vaW1hZ2VzL2VzLnBuZx8FBQhFc3Bhw7FvbGQCBQ9kFgICAQ8WBh8BBQhJdGFsaWFubx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWl0LWl0HwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2l0LWl0JykWAmYPFgQfBAUPfi9pbWFnZXMvaXQucG5nHwUFCEl0YWxpYW5vZAIFDw8WBB4EVGV4dAUESG9tZR4LTmF2aWdhdGVVcmwFDH4vaW5kZXguYXNweGRkAgcPDxYEHwYFB0RpYW1vbmQfBwUOfi9kaWFtb25kLmFzcHhkZAIJDw8WBB8GBQhTZXJ2aWNlcx8HBQ9+L3NlcnZpY2VzLmFzcHhkZAILDw8WCB8GBQhQcm9kdWN0cx8HBRR+L3JheW9ucy5hc3B4P3BhZ2U9MR4IQ3NzQ2xhc3MFB2N1cnJlbnQeBF8hU0ICAmRkAg0PDxYEHwYFBE5ld3MfBwULfi9uZXdzLmFzcHhkZAIPDw8WBB8GBQdDb250YWN0HwcFDn4vY29udGFjdC5hc3B4ZGQCEQ9kFgICAQ9kFgICAw9kFgJmD2QWBAIBDxBkZBYBZmQCBw8WBB4KQ29udGV4dEtleQUVUmVmZXJlbmNlfmVuLWdifkZhbHNlHg1Vc2VDb250ZXh0S2V5Z2QCEw9kFggCAQ9kFg4CAQ8PFgIeB1Zpc2libGVoZGQCAw9kFgJmD2QWAgIDDw8WAh8MZ2RkAgUPDxYCHwxoZGQCCRA8KwANAgAPFgIfDGhkDBQrABwFfDA6MCwwOjEsMDoyLDA6MywwOjQsMDo1LDA6NiwwOjcsMDo4LDA6OSwwOjEwLDA6MTEsMDoxMiwwOjEzLDA6MTQsMDoxNSwwOjE2LDA6MTcsMDoxOCwwOjE5LDA6MjAsMDoyMSwwOjIyLDA6MjMsMDoyNCwwOjI1LDA6MjYUKwACFgYfBgVyPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmY0YzBiJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPT0tJTkc8L3NwYW4+HgdUb29sVGlwBQktIENPT0tJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVA0N0tIQzc0ODRkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNlMThjNDUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gSE9UIFNOQUNLUyAtIFBBTklOSSAtIEZBU1QgRk9PRDwvc3Bhbj4fDQUhLSBIT1QgU05BQ0tTIC0gUEFOSU5JIC0gRkFTVCBGT09EHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD04M0RDM0Y2Q0FEZBQrAAIWBh8GBZMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWNhZTc1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZSRU5DSCBGUklFUyAtIFJPQVNUSU5HIC0gR1JJTExJTkcgJiBCQlE8L3NwYW4+Hw0FKi0gRlJFTkNIIEZSSUVTIC0gUk9BU1RJTkcgLSBHUklMTElORyAmIEJCUR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9RTY0NDk0MzdDM2QUKwACFgYfBgV4PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZjNjYmEzJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEFTSUFOIENPT0tJTkc8L3NwYW4+Hw0FDy0gQVNJQU4gQ09PS0lORx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9OTc4QTE0QzhFNGQUKwACFgYfBgWJATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2ZiZTZkMSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBTVEVBTSAtIENPTlZFQ1RJT04gLSBNSUNST1dBVkU8L3NwYW4+Hw0FIC0gU1RFQU0gLSBDT05WRUNUSU9OIC0gTUlDUk9XQVZFHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD02N0ZENkIzNjQ2ZBQrAAIWBh8GBXc8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmYzM0MjgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ09PSyAmIENISUxMPC9zcGFuPh8NBQ4tIENPT0sgJiBDSElMTB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9TzBRTDhLSDA4VmQUKwACFgYfBgWNATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2UxN2Y1ZCc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUdFTkVSQVRJT04gLSBWQUNVVU0gLSBCQU5RVUVUSU5HPC9zcGFuPh8NBSQtIFJFR0VORVJBVElPTiAtIFZBQ1VVTSAtIEJBTlFVRVRJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUU4NDM5Q0U0QTBkFCsAAhYGHwYFdzxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyOTQ3ZSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBESVNIIFdBU0hFUlM8L3NwYW4+Hw0FDi0gRElTSCBXQVNIRVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03OE1YQk1KRkdLZBQrAAIWBh8GBXI8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMDA4NmQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTEFVTkRSWTwvc3Bhbj4fDQUJLSBMQVVORFJZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1TWjJOU1ZKUTc4ZBQrAAIWBh8GBYMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojMDU3M2E1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEdBU1RST05PUk0gUkVGUklHRVJBVElPTjwvc3Bhbj4fDQUaLSBHQVNUUk9OT1JNIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUhTVkVROTZYRzRkFCsAAhYGHwYFeDxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyYTBjNic+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUZSSUdFUkFUSU9OPC9zcGFuPh8NBQ8tIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVgxMzY3TTdEOVNkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM2Y2IyZGEnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0FORFdJQ0hFUyAtIFNBTEFERVMgLSBTVEFSVEVSUzwvc3Bhbj4fDQUhLSBTQU5EV0lDSEVTIC0gU0FMQURFUyAtIFNUQVJURVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03REU4RUM0RTJDZBQrAAIWBh8GBXY8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM5NWM3ZTUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gV0lORSAtIEJFRVI8L3NwYW4+Hw0FDS0gV0lORSAtIEJFRVIfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTZENDg3NDQzNEFkFCsAAhYGHwYFjAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiYmRiZjAnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU09GVCBEUklOS1MgLSBBTENPSE9MIC0gQ09DS1RBSUxTPC9zcGFuPh8NBSMtIFNPRlQgRFJJTktTIC0gQUxDT0hPTCAtIENPQ0tUQUlMUx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9Q0RBRTQyMzRCRWQUKwACFgYfBgWHATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzY5N2RiOSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBJQ0UgQ1JFQU0gLSBTT1JCRVQgLSBHUkFOSVRBPC9zcGFuPh8NBR4tIElDRSBDUkVBTSAtIFNPUkJFVCAtIEdSQU5JVEEfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTNUTlQwNkJYOTJkFCsAAhYGHwYFhwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMjI4MTQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0VMRiBTRVJWSUNFIC0gQlVGRkVUIC1UQVBBUzwvc3Bhbj4fDQUeLSBTRUxGIFNFUlZJQ0UgLSBCVUZGRVQgLVRBUEFTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0zT0tIWDA1NzFXZBQrAAIWBh8GBYYBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWQ5YTA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFBBU1RSWSAtIEJBS0VSWSAtIENIT0NPTEFURTwvc3Bhbj4fDQUdLSBQQVNUUlkgLSBCQUtFUlkgLSBDSE9DT0xBVEUfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPU41RjBUNVpWS1pkFCsAAhYGHwYFhQE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMzQ0YTgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0g8L3NwYW4+Hw0FHC0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0gfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUE0MTFCODA3Q0FkFCsAAhYGHwYFhAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZjAwMGYnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUElaWkEgLSBQQVNUQSAtIFRBS0UgQVdBWTwvc3Bhbj4fDQUbLSBQSVpaQSAtIFBBU1RBIC0gVEFLRSBBV0FZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD01STZYNjZSNzYyZBQrAAIWBh8GBZwBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYTY2YjExJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPRkZFRSBURUEgLSBWSUVOTkVTRSBQQVNUUklFUyAtSlVJQ0VTIE1JTEsgU0hBS0U8L3NwYW4+Hw0FMy0gQ09GRkVFIFRFQSAtIFZJRU5ORVNFIFBBU1RSSUVTIC1KVUlDRVMgTUlMSyBTSEFLRR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MUZERkZQNUgzMmQUKwACFgYfBgV7PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYzBjYTBlJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZPT0QgUFJFUEFSQVRJT048L3NwYW4+Hw0FEi0gRk9PRCBQUkVQQVJBVElPTh8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9NVFKNzQ0MzJTV2QUKwACFgYfBgV5PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojNWQ2MzY3Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIE5FVVRSQUwgLSBJTk9YPC9zcGFuPh8NBRAtIE5FVVRSQUwgLSBJTk9YHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1ISDI3OTg1Q1pUZBQrAAIWBh8GBX08ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM0ZWJhYmMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ0xFQU5JTkcgLSBIWUdJRU5FPC9zcGFuPh8NBRQtIENMRUFOSU5HIC0gSFlHSUVORR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MU8wN09XMDA2M2QUKwACFgYfBgV/PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmZiMjA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFZBQ1VVTSAmIFZFTlRJTEFUSU9OPC9zcGFuPh8NBRYtIFZBQ1VVTSAmIFZFTlRJTEFUSU9OHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0xSTRDQzcxM0hCZBQrAAIWBh8GBXg8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMGIxYmInPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gR04gQ09OVEFJTkVSUzwvc3Bhbj4fDQUPLSBHTiBDT05UQUlORVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0yNDAxUzk1RDNHZBQrAAIWBh8GBY8BPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYjM4MzEwJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIERJTk5FUiBTRVJWSUNFIC0gRElTUExBWVMgLSBUUk9MTEVZUzwvc3Bhbj4fDQUmLSBESU5ORVIgU0VSVklDRSAtIERJU1BMQVlTIC0gVFJPTExFWVMfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVg5VEY5REY0MzdkFCsAAhYGHwYFjwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZmNjMDMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUkVDRVBUSU9OIC0gUk9PTSBTRVJWSUNFIC0gQlJFQUtGQVNUPC9zcGFuPh8NBSYtIFJFQ0VQVElPTiAtIFJPT00gU0VSVklDRSAtIEJSRUFLRkFTVB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9VVE0M1hMTlRBNWRkZAILDw8WBB8GBRJIaXN0b3JpY2FsIGVzdC9vcmQfDGhkZAIRDxYCHwxoFgJmD2QWAgIDDxBkZBYBZmQCEw9kFgICAQ8PFgQfBgUYSGlzdG9yaXF1ZSBkZXZpcyBjbGllbnRzHwxoZGQCAw9kFhICAw8PFgQfBgUMRmluZCBhbiBJdGVtHwcFHX4vZmFxL3JlY2hlcmNoZXJfYXJ0aWNsZS5hc3B4ZGQCBQ8PFgQfBgUjSG93IHRvIG1ha2UgYSAgZXN0aW1hdGUgLyAgYW4gT3JkZXIfBwUffi9mYXEvZmFpcmVfZGV2aXNfY29tbWFuZGUuYXNweGRkAgcPDxYEHwYFG0ZpbmQgYSAgZXN0aW1hdGUgLyBhbiBvcmRlch8HBSN+L2ZhcS9yZXRyb3V2ZXJfZGV2aXNfY29tbWFuZGUuYXNweGRkAgkPDxYEHwYFHVJlbW92ZSBhbiBpdG1lIG9mIGEgIGVzdGltYXRlHwcFGn4vZmFxL3JldGlyZXJfYXJ0aWNsZS5hc3B4ZGQCCw8PFgQfBgUUVG8gZXJhc2UgYW4gZXN0aW1hdGUfBwUXfi9mYXEvZWZhY2VyX2RldmlzLmFzcHhkZAINDxYCHwIFRH4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9YmEmcGFnZT0xFgJmDxYCHwQFFX4vaW1hZ2VzL2VuLWdiL2JhLmpwZ2QCDg8WAh8CBUd+L2FydGljbGVzLmFzcHg/c2VhcmNoX3R5cGU9bGV2ZWwmdmFsdWVfcGF0aD1QNDdLSEM3NDg0JnNpPXByb21vJnBhZ2U9MRYCAgEPFgIfBAUYfi9pbWFnZXMvZW4tZ2IvcHJvbW8uanBnZAIQDxYCHwIFRX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9bmV3JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9uZXcuanBnZAIRDxYCHwIFLX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1oaXQmc2k9aGl0JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9oaXQuanBnZAIFDw8WAh8MZ2QWBgIDDxQrAAJkZGQCBQ8UKwACDxYEHgtfIURhdGFCb3VuZGcfAAISZGQWAmYPZBYMAgEPZBYCAgEPZBYIAgEPDxYCHwYFB0lENzAvUE1kZAIDDxYCHwYFLFZFTlRJTEFURUQgUkVGUklHRVJBVE9SIDcwMCBMLiAxIERPT1IgKEdOMi8xZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4MzM4MhYCZg8WBB8FBSxWRU5USUxBVEVEIFJFRlJJR0VSQVRPUiA3MDAgTC4gMSBET09SIChHTjIvMR8EBWp+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvTk9VVkVBVSBET1NTSUVSL0lENzAtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI5LDAwIOKCrB8JAgJkZAICD2QWAgIBD2QWCAIBDw8WAh8GBQhEVDE3OC9QTWRkAgMPFgIfBgUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLmQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM0MTYWAmYPFgQfBQUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLh8EBVt+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvRFQxNzgtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuOTA5LDAwIOKCrB8JAgJkZAIDD2QWAgIBD2QWCAIBDw8WAh8GBQhBUDFOL0w4NmRkAgMPFgIfBgUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMGQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM2MDMWAmYPFgQfBQUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMB8EBU9+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9QQVRJU1NFUklFIFBBSU4gQ0hPQ09MQVQvQVAxTi1MNjQtODYuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNjg5LDAwIOKCrB8JAgJkZAIED2QWAgIBD2QWCAIBDw8WAh8GBQdEQzUwMi1OZGQCAw8WAh8GBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4NDY0OBYCZg8WBB8FBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tHwQFQ34vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREM1MDItTi5KUEdkAgcPDxYGHwgFE2xfcHJpeF9jbGllbnQgcHJvbW8fBgUMMS41ODksMDAg4oKsHwkCAmRkAgUPZBYCAgEPZBYIAgEPDxYCHwYFB0VGUC80NFJkZAIDDxYCHwYFIUVMRUNUUklDIE9WRU4gMnggNCBQSVpaQVMgMiBST09NU2QCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyNzgzMDcWAmYPFgQfBQUhRUxFQ1RSSUMgT1ZFTiAyeCA0IFBJWlpBUyAyIFJPT01THwQFTX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL1BJWlpBIEVUIFBBU1RBL1JVU1RJQyBMSU5FL0VGUC02NlIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI1LDAwIOKCrB8JAgJkZAIGD2QWAgIBD2QWCAIBDw8WAh8GBQVESzctMmRkAgMPFgIfBgUjSE9PRCBESVNIV0FTSEVSLCAgQkFTS0VUIDUwMHg1MDAgTU1kAgUPFgIfAgUefi9hcnRpY2xlLmFzcHg/YV9pZD0xMjUxMjc1NDA1FgJmDxYEHwUFI0hPT0QgRElTSFdBU0hFUiwgIEJBU0tFVCA1MDB4NTAwIE1NHwQFQX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREs3LTIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNTM2LDAwIOKCrB8JAgJkZAIHDxQrAAJkZGQCBw9kFgQCAQ9kFgJmD2QWCgIFDxYCHwxoZAIJDxYCHwxoZAINDxYCHwxoZAIRDw8WBB8GBQxPcGVuIC8gQ2xvc2UfDGhkZAITDw8WAh8MaGRkAgMPPCsACQBkGAMFKWN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fYm90dG9tDxQrAARkZAIGAhJkBSNjdGwwMCRjcGhfY2VudGVyJGx2X2FydGljbGVzX3JheW9ucw88KwAKAgc8KwAGAAgCEmQFJmN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fdG9wDxQrAARkZAIGAhJkzw5eBCgUF6HQH+o5L7mrNloYe3w= ctl00$ToolkitScriptManage... ctl00$cph_center$menu_left1$up_login|ctl00$cph_center$menu_left1$lb_login ctl00$cph_center$hf_value... ctl00$cph_center$menu_lef... ctl00$cph_center$menu_lef... C913327 ctl00$cph_center$menu_lef... sdfsdfsdf ctl00$cph_center$n_vei$cp... ctl00$cph_center$n_vei$hf... ctl00$ddl_search_type Reference ctl00$tb_search ctl00_ToolkitScriptManage... ;;AjaxControlToolkit, Version=3.5.40412.0, Culture=neutral, PublicKeyToken=28f01b0e84b6d53e:en-GB:1547e793-5b7e-48fe-8490-03a375b13a33:de1feab2:f2c8e708:720a52bf:f9cec9bc:589eaa30:698129cf:7a92f56c:4a2c8239; hiddenInputToUpdateATBuff... 1 """ def loginDiamondEurope(self): params = { '__ASYNCPOST': 'true', '__EVENTARGUMENT': '', '__EVENTTARGET': 'ctl00$cph_center$menu_left1$lb_login', '__EVENTVALIDATION': '/wEWEwKOk7qrBAKG4eyLBALGw+PfBwK7jI7eDQL/2fqXBwLH9rmjDwLG2KLDCAKCvreACALPgYP1DQKqvLeACAKKtP7+DAL07MD3CwLksZZaAuSxmloCicn43Q8Cisn43Q8C/Iag2AMClcHvlQgCyNGw1Ax/PwzywfL/ooD/FU51memYxQ1U+Q==', '__LASTFOCUS': '', '__SCROLLPOSITIONX': '0', '__SCROLLPOSITIONY': '0', '__VIEWSTATE': '/wEPDwUINzkzMzQ5OTcPZBYCZg9kFgICAw9kFgICAQ9kFhICAw8WAh4LXyFJdGVtQ291bnQCBhYMZg9kFgICAQ8WBh4FdGl0bGUFB0VuZ2xpc2geBGhyZWYFDC9yYXlvbnMuYXNweB4Hb25jbGljawUcc2V0Q29va2llKCdsYW5ndWUnLCAnZW4tZ2InKRYCZg8WBB4Dc3JjBQ9+L2ltYWdlcy9lbi5wbmceA2FsdAUHRW5nbGlzaGQCAQ9kFgICAQ8WBh8BBQlGcmFuw6dhaXMfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1mci1iZR8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdmci1iZScpFgJmDxYEHwQFD34vaW1hZ2VzL2ZyLnBuZx8FBQlGcmFuw6dhaXNkAgIPZBYCAgEPFgYfAQUHRGV1dHNjaB8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWRlLWRlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2RlLWRlJykWAmYPFgQfBAUPfi9pbWFnZXMvZGUucG5nHwUFB0RldXRzY2hkAgMPZBYCAgEPFgYfAQUKTmVkZXJsYW5kcx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPW5sLWJlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ25sLWJlJykWAmYPFgQfBAUPfi9pbWFnZXMvbmwucG5nHwUFCk5lZGVybGFuZHNkAgQPZBYCAgEPFgYfAQUIRXNwYcOxb2wfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1lcy1lcx8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdlcy1lcycpFgJmDxYEHwQFD34vaW1hZ2VzL2VzLnBuZx8FBQhFc3Bhw7FvbGQCBQ9kFgICAQ8WBh8BBQhJdGFsaWFubx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWl0LWl0HwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2l0LWl0JykWAmYPFgQfBAUPfi9pbWFnZXMvaXQucG5nHwUFCEl0YWxpYW5vZAIFDw8WBB4EVGV4dAUESG9tZR4LTmF2aWdhdGVVcmwFDH4vaW5kZXguYXNweGRkAgcPDxYEHwYFB0RpYW1vbmQfBwUOfi9kaWFtb25kLmFzcHhkZAIJDw8WBB8GBQhTZXJ2aWNlcx8HBQ9+L3NlcnZpY2VzLmFzcHhkZAILDw8WCB8GBQhQcm9kdWN0cx8HBRR+L3JheW9ucy5hc3B4P3BhZ2U9MR4IQ3NzQ2xhc3MFB2N1cnJlbnQeBF8hU0ICAmRkAg0PDxYEHwYFBE5ld3MfBwULfi9uZXdzLmFzcHhkZAIPDw8WBB8GBQdDb250YWN0HwcFDn4vY29udGFjdC5hc3B4ZGQCEQ9kFgICAQ9kFgICAw9kFgJmD2QWBAIBDxBkZBYBZmQCBw8WBB4KQ29udGV4dEtleQUVUmVmZXJlbmNlfmVuLWdifkZhbHNlHg1Vc2VDb250ZXh0S2V5Z2QCEw9kFggCAQ9kFg4CAQ8PFgIeB1Zpc2libGVoZGQCAw9kFgJmD2QWAgIDDw8WAh8MZ2RkAgUPDxYCHwxoZGQCCRA8KwANAgAPFgIfDGhkDBQrABwFfDA6MCwwOjEsMDoyLDA6MywwOjQsMDo1LDA6NiwwOjcsMDo4LDA6OSwwOjEwLDA6MTEsMDoxMiwwOjEzLDA6MTQsMDoxNSwwOjE2LDA6MTcsMDoxOCwwOjE5LDA6MjAsMDoyMSwwOjIyLDA6MjMsMDoyNCwwOjI1LDA6MjYUKwACFgYfBgVyPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmY0YzBiJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPT0tJTkc8L3NwYW4+HgdUb29sVGlwBQktIENPT0tJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVA0N0tIQzc0ODRkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNlMThjNDUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gSE9UIFNOQUNLUyAtIFBBTklOSSAtIEZBU1QgRk9PRDwvc3Bhbj4fDQUhLSBIT1QgU05BQ0tTIC0gUEFOSU5JIC0gRkFTVCBGT09EHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD04M0RDM0Y2Q0FEZBQrAAIWBh8GBZMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWNhZTc1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZSRU5DSCBGUklFUyAtIFJPQVNUSU5HIC0gR1JJTExJTkcgJiBCQlE8L3NwYW4+Hw0FKi0gRlJFTkNIIEZSSUVTIC0gUk9BU1RJTkcgLSBHUklMTElORyAmIEJCUR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9RTY0NDk0MzdDM2QUKwACFgYfBgV4PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZjNjYmEzJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEFTSUFOIENPT0tJTkc8L3NwYW4+Hw0FDy0gQVNJQU4gQ09PS0lORx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9OTc4QTE0QzhFNGQUKwACFgYfBgWJATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2ZiZTZkMSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBTVEVBTSAtIENPTlZFQ1RJT04gLSBNSUNST1dBVkU8L3NwYW4+Hw0FIC0gU1RFQU0gLSBDT05WRUNUSU9OIC0gTUlDUk9XQVZFHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD02N0ZENkIzNjQ2ZBQrAAIWBh8GBXc8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmYzM0MjgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ09PSyAmIENISUxMPC9zcGFuPh8NBQ4tIENPT0sgJiBDSElMTB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9TzBRTDhLSDA4VmQUKwACFgYfBgWNATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2UxN2Y1ZCc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUdFTkVSQVRJT04gLSBWQUNVVU0gLSBCQU5RVUVUSU5HPC9zcGFuPh8NBSQtIFJFR0VORVJBVElPTiAtIFZBQ1VVTSAtIEJBTlFVRVRJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUU4NDM5Q0U0QTBkFCsAAhYGHwYFdzxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyOTQ3ZSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBESVNIIFdBU0hFUlM8L3NwYW4+Hw0FDi0gRElTSCBXQVNIRVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03OE1YQk1KRkdLZBQrAAIWBh8GBXI8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMDA4NmQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTEFVTkRSWTwvc3Bhbj4fDQUJLSBMQVVORFJZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1TWjJOU1ZKUTc4ZBQrAAIWBh8GBYMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojMDU3M2E1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEdBU1RST05PUk0gUkVGUklHRVJBVElPTjwvc3Bhbj4fDQUaLSBHQVNUUk9OT1JNIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUhTVkVROTZYRzRkFCsAAhYGHwYFeDxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyYTBjNic+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUZSSUdFUkFUSU9OPC9zcGFuPh8NBQ8tIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVgxMzY3TTdEOVNkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM2Y2IyZGEnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0FORFdJQ0hFUyAtIFNBTEFERVMgLSBTVEFSVEVSUzwvc3Bhbj4fDQUhLSBTQU5EV0lDSEVTIC0gU0FMQURFUyAtIFNUQVJURVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03REU4RUM0RTJDZBQrAAIWBh8GBXY8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM5NWM3ZTUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gV0lORSAtIEJFRVI8L3NwYW4+Hw0FDS0gV0lORSAtIEJFRVIfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTZENDg3NDQzNEFkFCsAAhYGHwYFjAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiYmRiZjAnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU09GVCBEUklOS1MgLSBBTENPSE9MIC0gQ09DS1RBSUxTPC9zcGFuPh8NBSMtIFNPRlQgRFJJTktTIC0gQUxDT0hPTCAtIENPQ0tUQUlMUx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9Q0RBRTQyMzRCRWQUKwACFgYfBgWHATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzY5N2RiOSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBJQ0UgQ1JFQU0gLSBTT1JCRVQgLSBHUkFOSVRBPC9zcGFuPh8NBR4tIElDRSBDUkVBTSAtIFNPUkJFVCAtIEdSQU5JVEEfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTNUTlQwNkJYOTJkFCsAAhYGHwYFhwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMjI4MTQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0VMRiBTRVJWSUNFIC0gQlVGRkVUIC1UQVBBUzwvc3Bhbj4fDQUeLSBTRUxGIFNFUlZJQ0UgLSBCVUZGRVQgLVRBUEFTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0zT0tIWDA1NzFXZBQrAAIWBh8GBYYBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWQ5YTA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFBBU1RSWSAtIEJBS0VSWSAtIENIT0NPTEFURTwvc3Bhbj4fDQUdLSBQQVNUUlkgLSBCQUtFUlkgLSBDSE9DT0xBVEUfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPU41RjBUNVpWS1pkFCsAAhYGHwYFhQE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMzQ0YTgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0g8L3NwYW4+Hw0FHC0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0gfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUE0MTFCODA3Q0FkFCsAAhYGHwYFhAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZjAwMGYnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUElaWkEgLSBQQVNUQSAtIFRBS0UgQVdBWTwvc3Bhbj4fDQUbLSBQSVpaQSAtIFBBU1RBIC0gVEFLRSBBV0FZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD01STZYNjZSNzYyZBQrAAIWBh8GBZwBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYTY2YjExJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPRkZFRSBURUEgLSBWSUVOTkVTRSBQQVNUUklFUyAtSlVJQ0VTIE1JTEsgU0hBS0U8L3NwYW4+Hw0FMy0gQ09GRkVFIFRFQSAtIFZJRU5ORVNFIFBBU1RSSUVTIC1KVUlDRVMgTUlMSyBTSEFLRR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MUZERkZQNUgzMmQUKwACFgYfBgV7PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYzBjYTBlJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZPT0QgUFJFUEFSQVRJT048L3NwYW4+Hw0FEi0gRk9PRCBQUkVQQVJBVElPTh8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9NVFKNzQ0MzJTV2QUKwACFgYfBgV5PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojNWQ2MzY3Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIE5FVVRSQUwgLSBJTk9YPC9zcGFuPh8NBRAtIE5FVVRSQUwgLSBJTk9YHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1ISDI3OTg1Q1pUZBQrAAIWBh8GBX08ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM0ZWJhYmMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ0xFQU5JTkcgLSBIWUdJRU5FPC9zcGFuPh8NBRQtIENMRUFOSU5HIC0gSFlHSUVORR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MU8wN09XMDA2M2QUKwACFgYfBgV/PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmZiMjA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFZBQ1VVTSAmIFZFTlRJTEFUSU9OPC9zcGFuPh8NBRYtIFZBQ1VVTSAmIFZFTlRJTEFUSU9OHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0xSTRDQzcxM0hCZBQrAAIWBh8GBXg8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMGIxYmInPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gR04gQ09OVEFJTkVSUzwvc3Bhbj4fDQUPLSBHTiBDT05UQUlORVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0yNDAxUzk1RDNHZBQrAAIWBh8GBY8BPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYjM4MzEwJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIERJTk5FUiBTRVJWSUNFIC0gRElTUExBWVMgLSBUUk9MTEVZUzwvc3Bhbj4fDQUmLSBESU5ORVIgU0VSVklDRSAtIERJU1BMQVlTIC0gVFJPTExFWVMfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVg5VEY5REY0MzdkFCsAAhYGHwYFjwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZmNjMDMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUkVDRVBUSU9OIC0gUk9PTSBTRVJWSUNFIC0gQlJFQUtGQVNUPC9zcGFuPh8NBSYtIFJFQ0VQVElPTiAtIFJPT00gU0VSVklDRSAtIEJSRUFLRkFTVB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9VVE0M1hMTlRBNWRkZAILDw8WBB8GBRJIaXN0b3JpY2FsIGVzdC9vcmQfDGhkZAIRDxYCHwxoFgJmD2QWAgIDDxBkZBYBZmQCEw9kFgICAQ8PFgQfBgUYSGlzdG9yaXF1ZSBkZXZpcyBjbGllbnRzHwxoZGQCAw9kFhICAw8PFgQfBgUMRmluZCBhbiBJdGVtHwcFHX4vZmFxL3JlY2hlcmNoZXJfYXJ0aWNsZS5hc3B4ZGQCBQ8PFgQfBgUjSG93IHRvIG1ha2UgYSAgZXN0aW1hdGUgLyAgYW4gT3JkZXIfBwUffi9mYXEvZmFpcmVfZGV2aXNfY29tbWFuZGUuYXNweGRkAgcPDxYEHwYFG0ZpbmQgYSAgZXN0aW1hdGUgLyBhbiBvcmRlch8HBSN+L2ZhcS9yZXRyb3V2ZXJfZGV2aXNfY29tbWFuZGUuYXNweGRkAgkPDxYEHwYFHVJlbW92ZSBhbiBpdG1lIG9mIGEgIGVzdGltYXRlHwcFGn4vZmFxL3JldGlyZXJfYXJ0aWNsZS5hc3B4ZGQCCw8PFgQfBgUUVG8gZXJhc2UgYW4gZXN0aW1hdGUfBwUXfi9mYXEvZWZhY2VyX2RldmlzLmFzcHhkZAINDxYCHwIFRH4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9YmEmcGFnZT0xFgJmDxYCHwQFFX4vaW1hZ2VzL2VuLWdiL2JhLmpwZ2QCDg8WAh8CBUd+L2FydGljbGVzLmFzcHg/c2VhcmNoX3R5cGU9bGV2ZWwmdmFsdWVfcGF0aD1QNDdLSEM3NDg0JnNpPXByb21vJnBhZ2U9MRYCAgEPFgIfBAUYfi9pbWFnZXMvZW4tZ2IvcHJvbW8uanBnZAIQDxYCHwIFRX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9bmV3JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9uZXcuanBnZAIRDxYCHwIFLX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1oaXQmc2k9aGl0JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9oaXQuanBnZAIFDw8WAh8MZ2QWBgIDDxQrAAJkZGQCBQ8UKwACDxYEHgtfIURhdGFCb3VuZGcfAAISZGQWAmYPZBYMAgEPZBYCAgEPZBYIAgEPDxYCHwYFB0lENzAvUE1kZAIDDxYCHwYFLFZFTlRJTEFURUQgUkVGUklHRVJBVE9SIDcwMCBMLiAxIERPT1IgKEdOMi8xZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4MzM4MhYCZg8WBB8FBSxWRU5USUxBVEVEIFJFRlJJR0VSQVRPUiA3MDAgTC4gMSBET09SIChHTjIvMR8EBWp+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvTk9VVkVBVSBET1NTSUVSL0lENzAtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI5LDAwIOKCrB8JAgJkZAICD2QWAgIBD2QWCAIBDw8WAh8GBQhEVDE3OC9QTWRkAgMPFgIfBgUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLmQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM0MTYWAmYPFgQfBQUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLh8EBVt+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvRFQxNzgtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuOTA5LDAwIOKCrB8JAgJkZAIDD2QWAgIBD2QWCAIBDw8WAh8GBQhBUDFOL0w4NmRkAgMPFgIfBgUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMGQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM2MDMWAmYPFgQfBQUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMB8EBU9+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9QQVRJU1NFUklFIFBBSU4gQ0hPQ09MQVQvQVAxTi1MNjQtODYuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNjg5LDAwIOKCrB8JAgJkZAIED2QWAgIBD2QWCAIBDw8WAh8GBQdEQzUwMi1OZGQCAw8WAh8GBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4NDY0OBYCZg8WBB8FBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tHwQFQ34vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREM1MDItTi5KUEdkAgcPDxYGHwgFE2xfcHJpeF9jbGllbnQgcHJvbW8fBgUMMS41ODksMDAg4oKsHwkCAmRkAgUPZBYCAgEPZBYIAgEPDxYCHwYFB0VGUC80NFJkZAIDDxYCHwYFIUVMRUNUUklDIE9WRU4gMnggNCBQSVpaQVMgMiBST09NU2QCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyNzgzMDcWAmYPFgQfBQUhRUxFQ1RSSUMgT1ZFTiAyeCA0IFBJWlpBUyAyIFJPT01THwQFTX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL1BJWlpBIEVUIFBBU1RBL1JVU1RJQyBMSU5FL0VGUC02NlIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI1LDAwIOKCrB8JAgJkZAIGD2QWAgIBD2QWCAIBDw8WAh8GBQVESzctMmRkAgMPFgIfBgUjSE9PRCBESVNIV0FTSEVSLCAgQkFTS0VUIDUwMHg1MDAgTU1kAgUPFgIfAgUefi9hcnRpY2xlLmFzcHg/YV9pZD0xMjUxMjc1NDA1FgJmDxYEHwUFI0hPT0QgRElTSFdBU0hFUiwgIEJBU0tFVCA1MDB4NTAwIE1NHwQFQX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREs3LTIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNTM2LDAwIOKCrB8JAgJkZAIHDxQrAAJkZGQCBw9kFgQCAQ9kFgJmD2QWCgIFDxYCHwxoZAIJDxYCHwxoZAINDxYCHwxoZAIRDw8WBB8GBQxPcGVuIC8gQ2xvc2UfDGhkZAITDw8WAh8MaGRkAgMPPCsACQBkGAMFKWN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fYm90dG9tDxQrAARkZAIGAhJkBSNjdGwwMCRjcGhfY2VudGVyJGx2X2FydGljbGVzX3JheW9ucw88KwAKAgc8KwAGAAgCEmQFJmN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fdG9wDxQrAARkZAIGAhJkzw5eBCgUF6HQH+o5L7mrNloYe3w=', 'ctl00$ToolkitScriptManager1': 'ctl00$cph_center$menu_left1$up_login|ctl00$cph_center$menu_left1$lb_login', 'ctl00$cph_center$menu_left1$tb_login': '******', 'ctl00$cph_center$menu_left1$tb_password': '******', 'ctl00$ddl_search_type': 'Reference', 'ctl00_ToolkitScriptManager1HiddenField': ';;AjaxControlToolkit, Version=3.5.40412.0, Culture=neutral, PublicKeyToken=28f01b0e84b6d53e:en-GB:1547e793-5b7e-48fe-8490-03a375b13a33:de1feab2:f2c8e708:720a52bf:f9cec9bc:589eaa30:698129cf:7a92f56c:4a2c8239;', 'hiddenInputToUpdateATBuffer_CommonToolkitScripts': '1' } if self.spider.login('http://www.diamond-europe.com/rayons.aspx', params) is not None: return True return False def scrapBertos(self, retry=0): # self.downloadFile('http://s900.bertos.it/download.php?file=editorcms/documentazione/schede/scheda_13722600.pdf', 'a.pdf') # self.scrapSubCategory('http://s900.bertos.it/en/', '', None, None) # self.scrapProducts('http://s900.bertos.it/en/pasta_cookers/', '', '', None, None) # return self.notifyProduct.emit( '<font color=green><b>Try to get all language links.</b></font>') self.logger.debug(self.mainUrl) data = self.spider.fetchData(self.mainUrl) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) languages = self.regex.getAllSearchedData( '(?i)<div class="[^"]*"><a href="([^"]*)"\s*?class="boxalingua">([^<]*)</a>', data) if languages and len(languages) > 0: self.logger.debug('Total languages: %s' % str(len(languages))) self.notifyProduct.emit('<b>Total languages found[%s]</b>' % str(len(languages))) for language in languages: self.totalProducts = 0 url = language[0] # if str(language[1]).lower() != 'en': # continue urlChunk = self.spider.fetchData(url) if urlChunk and len(urlChunk) > 0: urlChunk = self.regex.reduceNewLine(urlChunk) urlChunk = self.regex.reduceBlankSpace(urlChunk) url = self.regex.getSearchedData( '(?i)<a href="([^"]*)" onmouseover="vedi_po_cat\(2\)\s*?"', urlChunk) csvFile = str( language[1].strip()).lower() + '_' + 'bertos.csv' dupCsvReader = Csv() dupCsvRows = dupCsvReader.readCsvRow(csvFile) csvWriter = Csv(csvFile) if self.csvHeader not in dupCsvRows: dupCsvRows.append(self.csvHeader) csvWriter.writeCsvRow(self.csvHeader) self.notifyProduct.emit( '<font color=green><b>Try to get data for language [%s].</b></font>' % language[1]) self.scrapCategory(url, dupCsvRows, csvWriter) self.notifyProduct.emit( '<font color=red><b>===== Finish scraping data for [%s] =====</b></font><br /><br />' % language[1]) else: if retry < 5: return self.scrapBertos(retry + 1) def scrapCategory(self, mainUrl, dupCsvRows, csvWriter): url = mainUrl self.logger.debug('Main URL: ' + url) self.notifyProduct.emit( '<font color=green><b>Main URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) data = self.regex.reduceNbsp(data) self.notifyProduct.emit('<b>Try to scrap all categories.</b>') categoryChunk = self.regex.getSearchedData( '(?i)<div id="contenuto1">(.*?)</div>\s*?</div>', data) if categoryChunk and len(categoryChunk) > 0: categories = self.regex.getAllSearchedData( '(?i)<a href="([^"]*)"[^>]*?>([^<]*)</a>', categoryChunk) if categories and len(categories) > 0: self.notifyProduct.emit( '<b>Total Categories Found: %s</b>' % str(len(categories))) for category in categories: categoryName = category[1].strip() self.scrapSubCategory( str(category[0]).strip(), categoryName, dupCsvRows, csvWriter) def scrapSubCategory(self, url, categoryName, dupCsvRows, csvWriter): self.logger.debug('Category URL: ' + url) self.notifyProduct.emit('<b>Try to scrap subcategories for: %s</b>' % categoryName) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) subCategories = self.regex.getAllSearchedData( '(?i)<li\s*?><a href="([^"]*)" title="([^"]*)"', data) if subCategories and len(subCategories) > 0: self.notifyProduct.emit( '<font color=green><b>Total subcategories found %s.</b></font>' % str(len(subCategories))) for subCategory in subCategories: subCategoryName = subCategory[1].strip() self.scrapProducts(subCategory[0].strip(), categoryName, subCategoryName, dupCsvRows, csvWriter) def downloadFile(self, url, downloadPath, retry=0): print url self.notifyProduct.emit('<b>File URL: %s.</b>' % url) try: socket.setdefaulttimeout(10) opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0)) opener.addheaders = [( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1' )] urllib2.install_opener(opener) # resp = opener.open(url, timeout=30) # resp = urllib2.urlopen(url, timeout=30) resp = None try: # resp = urllib.urlopen(url) resp = opener.open(url, timeout=30) except Exception, x: print x if resp is None: return False # if resp.info()['Connection'] == 'close' or resp.getcode() != 200: # if retry < 3: # self.notifyProduct.emit('<font color=red><b>Failed to download file. Retrying...</b></font>') # return self.downloadFile(url, downloadPath, retry + 1) # else: # self.notifyProduct.emit('<font color=red><b>Failed to download file after 3 retry.</b></font>') # return print resp.info() print 'info.......' contentLength = resp.info()['Content-Length'] contentLength = self.regex.getSearchedData('(?i)^(\d+)', contentLength) totalSize = float(contentLength) directory = os.path.dirname(downloadPath) if not os.path.exists(directory): try: os.makedirs(directory) except Exception, x: print x dl_file = open(downloadPath, 'wb') currentSize = 0 CHUNK_SIZE = 32768 totalSizeKB = totalSize / 1024 if totalSize > 0 else totalSize print 'everything ok............' while True: data = None try: data = resp.read(CHUNK_SIZE) except Exception, x: print x if not data: break currentSize += len(data) dl_file.write(data) print('============> ' + \ str(round(float(currentSize * 100) / totalSize, 2)) + \ '% of ' + str(totalSize) + ' bytes') notifyDl = '===> Downloaded ' + str( round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str(totalSizeKB) + ' KB.' self.notifyProduct.emit('<b>%s</b>' % notifyDl) if currentSize >= totalSize: dl_file.close() return True
class CsProduct(QThread): notifyProduct = pyqtSignal(object) def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('cs_product.csv', 0) self.csvWriter = Csv('cs_product.csv') self.mainUrl = 'http://www.cs-catering-equipment.co.uk/' self.utils = Utils() self.csvWriter.writeCsvRow( ['URL', 'Product Code', 'Product Name', 'Manufacturer', 'List Price', 'Product Price', 'Discount', 'Product Short Description', 'Product Long Description', 'Product Technical Specifications', 'Warranty', 'Delivery', 'Product Image', 'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Brand Image']) self.totalProducts = 0 def run(self): self.scrapProduct() self.notifyProduct.emit('<font color=red><b>Finished Scraping All products.</b></font>') def scrapProduct(self): self.logger.debug('Main URL: ' + self.mainUrl) self.notifyProduct.emit('<font color=green><b>Main URL: %s</b></font>' % self.mainUrl) data = self.spider.fetchData(self.mainUrl) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) self.notifyProduct.emit('<b>Try to scrap all categories.</b>') categories = self.regex.getAllSearchedData('(?i)<a href="([^"]*)" class="level-top" title="([^"]*)"', data) if categories and len(categories) > 0: self.notifyProduct.emit('<b>Total Categories Found: %s</b>' % str(len(categories))) for category in categories: category1Name = unicode(category[1]).strip() self.scrapCategory1Data(str(category[0]).strip(), category1Name) def scrapCategory1Data(self, url, category1Name): self.logger.debug('Category 1 URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category1Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if categories and len(categories) > 0: self.notifyProduct.emit('<b>Total Categories Found: %s</b>' % str(len(categories))) for category in categories: self.scrapCategory2Data(category[0], category1Name, category[1]) def scrapCategory2Data(self, url, category1Name, category2Name): self.logger.debug('Category 2 URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category2Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if categories and len(categories) > 0: for category in categories: print 'category2: ' + category[0] self.scrapCategory3Data(category[0], category1Name, category2Name, category[1]) def scrapCategory3Data(self, url, category1Name, category2Name, category3Name): self.logger.debug('Category 3 URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category3Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if categories and len(categories) > 0: for category in categories: print [category1Name, category2Name, category3Name, category[1]] self.scrapProductsDetails(category[0], category1Name, category2Name, category3Name, category[1]) def scrapProductsDetails(self, url, category1Name, category2Name, category3Name, category4Name): self.logger.debug('Product Details URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all products under Category[%s]</b>' % category4Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url + '?limit=10000&mode=list') if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) products = self.regex.getAllSearchedData('(?i)<div class="listing-item[^"]*?">(.*?)</div>', data) if products and len(products) > 0: self.totalProducts += len(products) self.notifyProduct.emit('<font color=green><b>Total Products Found [%s]</b></font>' % unicode(self.totalProducts)) for product in products: productDetailUrl = self.regex.getSearchedData('(?i)<a href="([^"]*)"', product) if productDetailUrl not in self.dupCsvRows: self.scrapProductDetails(productDetailUrl, category1Name, category2Name, category3Name, category4Name) else: self.notifyProduct.emit( '<font color=green><b>Already Exists This Product Under Category[%s]. Skip It.</b></font>' % category4Name) def scrapProductDetails(self, url, category1Name, category2Name, category3Name, category4Name): self.logger.debug('Product Detail URL: ' + url) self.notifyProduct.emit('<b>Try to scrap product details under Category[%s]</b>' % category4Name) self.notifyProduct.emit('<font color=green><b>Product Detail URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) manufacturer = self.regex.getSearchedData( '(?i)<span class="manufacturer-box-label">Manufacturer:</span>([^<]*)</p>', data) productCode = self.regex.getSearchedData( '(?i)<span class="manufacturer-box-label">Model No:</span>([^<]*)</p>', data) productName = self.regex.getSearchedData('(?i)<div class="product-name"> <h1>([^<]*)</h1>', data) productTechnicalDesc = self.regex.getSearchedData('(?i)<div class="product-short-description">([^<]*)</div>' , data) productDescriptions = self.regex.getSearchedData('(?i)<div class="product-specs">(.*?)</div>', data) productShortDesc = '' productFullDesc = '' if productDescriptions and len(productDescriptions) > 0: print 'desc: ' + productDescriptions productShortDesc = self.regex.getSearchedData('(?i)<p>(.*?)</p>', productDescriptions) productFullDesc = '\n'.join( self.regex.getAllSearchedData('(?i)<li>([^<]*)</li>', productDescriptions)) listPriceChunk = self.regex.getSearchedData('(?i)<div class="rrp-price regular-price">(.*?)</div>', data) listPrice = '' if listPriceChunk and len(listPriceChunk) > 0: listPrice = self.regex.getSearchedData('(?i)([0-9,.]+)', listPriceChunk) savePriceChunk = self.regex.getSearchedData('(?i)<div class="regular-price saving-price">(.*?)</div>', data) savePrice = '' if savePriceChunk and len(savePriceChunk) > 0: savePrice = self.regex.getSearchedData('(?i)([0-9%]+)', savePriceChunk) priceChunk = self.regex.getSearchedData('(?i)<div class="[^"]*" id="product-price-\d+">(.*?)</div>', data) price = '' if priceChunk and len(priceChunk) > 0: price = self.regex.getSearchedData('(?i)([0-9,.]+)', priceChunk) deliveryChunk = self.regex.getSearchedData('(?i)<div class="delivery">(.*?)</div>', data) delivery = '' if deliveryChunk and len(deliveryChunk) > 0: delivery = self.regex.getSearchedData('(?i)<p>([^<]*)</p>', deliveryChunk) warrantyChunk = self.regex.getSearchedData('(?i)<div class="warranty">(.*?)</div>', data) warranty = '' if warrantyChunk and len(warrantyChunk) > 0: warranty = self.regex.getSearchedData('(?i)<p>([^<]*)</p>', warrantyChunk) ## Download and save product images productImageUrl = self.regex.getSearchedData( '(?i)src="(http://assets.cs-catering-equipment.co.uk/media/catalog/product/cache/1/image/256x/[^"]*)"', data) print productImageUrl productImage = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_.]*)$', productImageUrl) if productImage and len(productImage) > 0: print productImage self.notifyProduct.emit('<b>Downloading Product Image [%s]. Please wait...</b>' % productImage) self.utils.downloadFile(productImageUrl, 'product_image/' + productImage) ## Download and save brand images brandImageUrl = self.regex.getSearchedData( '(?i)<div class="manufacturer-box-left"><a href="[^"]*"[^>]*?><img src="([^"]*)"', data) brandImage = '' if brandImageUrl and len(brandImageUrl) > 0: brandImageUrl = self.regex.replaceData('(?i)logo/', '', brandImageUrl) brandImage = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_.]*)$', brandImageUrl) if brandImage and len(brandImage) > 0: self.notifyProduct.emit('<b>Downloading Brand Image [%s]. Please wait...</b>' % brandImage) self.utils.downloadFile(brandImageUrl, 'brand_image/' + brandImage) csvData = [url, productCode, productName, manufacturer, listPrice, price, savePrice, productShortDesc, productFullDesc, productTechnicalDesc, warranty, delivery, productImage, category1Name, category2Name, category3Name, category4Name, brandImage] self.csvWriter.writeCsvRow(csvData) self.logger.debug(unicode(csvData)) self.notifyProduct.emit('<b>Product Details: %s</b>' % unicode(csvData))
class WebTable(): def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.browser = BrowserUtil() self.regex = Regex() self.utils = Utils() self.csvHeader = ['Category', 'Sub Category 1', 'Sub Category 2', 'Product Code', 'Product Name', 'Product ShortName', 'Product Description', 'List Price', 'Vendor Price', 'Availability', 'Power', 'Size', 'KW', 'Weight(kg)', 'Other Tech', 'Pdf File', 'Image File'] self.totalProducts = 0 def scrapData(self): postParams = {'__ASYNCPOST': 'true', '__EVENTVALIDATION': '/wEWWwKSuN/3AgLi8PP9DgKzpIWvCQKQ3IFsAve1x5EPAu7Dza4GArPM1qoEAvjBhsQDAvjB6qkLAvjB/o4CAvjBwtMJApP48MoOApP4xK8GApP46EYCk/j8qwgCk/jA8AcCk/jU1Q4Ck/i4uQYCk/iMng0Ck/iQ4wQCk/jkyAMC15uNvgYC15uRgw0C15uluggC15uJnwcC15ud5A4C15vhyQUC15v1rg0C15vZ8wQC15ut1wMC15uxvAsC6rKvkwgC6rKz+AcC6rLHkAIC6rKr9AkC6rK/WQLqsoO+CALqspeDBwLqsvvoDgLqss/NBQLqstOSDQK0wsnaCgL4+7LBAQLP5JaqAQKc4P/CDQLl7berDgLurP6CDALvn+2eCwK4pIGBDwKvytzABgLTu7vHBgKFmtaAAwKn0anxCwKZwpi3CgLjlM+OAwLCoMjqAQLWq7m2BALlnqSNBwKbwPKfBgL5j7vvBAKRy8fpCAKI3rXQBwLBhpnRCwLgqNqjBQLEmsPUBgL26MCGDwL0wbKZDgL16ePjAQLhraHjBAKx7Y+rCwKu+uSNDQKDp4fFBwLnmpaQCQKU2LWMCALev//ADgK9osaHBALArtXWDgKhp8iCAwKCs5DBAgKPnOP3DwK0uumDDwKJ4eXWBAKK+5r7AwLj4sWCAQKJgZPYBQL2mPvKBgL/hob0BAKsyvbZDAKSoqqWDwLSwpnTCALN797vDL/8819r5pdL6i1kQizMsBPt83oZ', '__VIEWSTATE': '/wEPDwUKMTU5MjIyNTQ2OQ9kFgICAw9kFgQCAQ9kFgJmD2QWAgIBD2QWAmYPZBYCZg9kFgYCBw8QZBAVIwstQWxsIFllYXJzLQQyMDEzBDIwMTIEMjAxMQQyMDEwBDIwMDkEMjAwOAQyMDA3BDIwMDYEMjAwNQQyMDA0BDIwMDMEMjAwMgQyMDAxBDIwMDAEMTk5OQQxOTk4BDE5OTcEMTk5NgQxOTk1BDE5OTQEMTk5MwQxOTkyBDE5OTEEMTk5MAQxOTg5BDE5ODgEMTk4NwQxOTg2BDE5ODUEMTk4NAQxOTgzBDE5ODIEMTk4MQQxOTgwFSMLLUFsbCBZZWFycy0EMjAxMwQyMDEyBDIwMTEEMjAxMAQyMDA5BDIwMDgEMjAwNwQyMDA2BDIwMDUEMjAwNAQyMDAzBDIwMDIEMjAwMQQyMDAwBDE5OTkEMTk5OAQxOTk3BDE5OTYEMTk5NQQxOTk0BDE5OTMEMTk5MgQxOTkxBDE5OTAEMTk4OQQxOTg4BDE5ODcEMTk4NgQxOTg1BDE5ODQEMTk4MwQxOTgyBDE5ODEEMTk4MBQrAyNnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2RkAgkPZBYCZg9kFgICAQ8QDxYGHg1EYXRhVGV4dEZpZWxkBQ5wcm9qX2NvZGVfbmFtZR4ORGF0YVZhbHVlRmllbGQFCXByb2pfY29kZR4LXyFEYXRhQm91bmRnZBAVCQ4tQWxsIENhdGVnb3J5LQtDb2FsIE1pbmluZxNJbmR1c3RyaWFsIFByb2plY3RzMUluZnJhc3RydWN0dXJlIGFuZCBNaXNjZWxsYW5lb3VzIFByb2plY3RzICAmICBDUloPTWluaW5nIFByb2plY3RzMk5ldyBDb25zdHJ1Y3Rpb24gUHJvamVjdHMgYW5kICBJbmR1c3RyaWFsICBFc3RhdGVzEU51Y2xlYXIgIFByb2plY3RzJ1JpdmVyIFZhbGxleSBhbmQgSHlkcm9lbGVjdHJpYyBQcm9qZWN0cxBUaGVybWFsIFByb2plY3RzFQkOLUFsbCBDYXRlZ29yeS0EQ01JTgNJTkQDTUlTA01JTgNOQ1ADTlVDA1JJVgNUSEUUKwMJZ2dnZ2dnZ2dnZGQCCw8QDxYGHwAFCnN0YXRlX25hbWUfAQUKc3RhdGVfbmFtZR8CZ2QQFSULLUFsbCBTdGF0ZS0TQW5kYW1hbiBhbmQgTmljb2Jhcg5BbmRocmEgUHJhZGVzaBFBcnVuYWNoYWwgUHJhZGVzaAVBc3NhbQVCaWhhcgpDaGFuZGlnYXJoDENoaGF0dGlzZ2FyaBREYWRhciAmIE5hZ2FyIEhhdmVsaQ1EYW1hbiBhbmQgRGl1BURlbGhpA0dvYQdHdWphcmF0B0hhcnlhbmEQSGltYWNoYWwgUHJhZGVzaBFKYW1tdSBhbmQgS2FzaG1pcglKaGFya2hhbmQJS2FybmF0YWthBktlcmFsYQtMYWtzaGFkd2VlcA5NYWRoeWEgUHJhZGVzaAtNYWhhcmFzaHRyYQdNYW5pcHVyCU1lZ2hhbGF5YQdNaXpvcmFtCE5hZ2FsYW5kBk9ycmlzYQZPdGhlcnMLUG9uZGljaGVycnkGUHVuamFiCVJhamFzdGhhbgZTaWtraW0KVGFtaWwgTmFkdQdUcmlwdXJhDVV0dGFyIFByYWRlc2gLVXR0YXJha2hhbmQLV2VzdCBCZW5nYWwVJQstQWxsIFN0YXRlLRNBbmRhbWFuIGFuZCBOaWNvYmFyDkFuZGhyYSBQcmFkZXNoEUFydW5hY2hhbCBQcmFkZXNoBUFzc2FtBUJpaGFyCkNoYW5kaWdhcmgMQ2hoYXR0aXNnYXJoFERhZGFyICYgTmFnYXIgSGF2ZWxpDURhbWFuIGFuZCBEaXUFRGVsaGkDR29hB0d1amFyYXQHSGFyeWFuYRBIaW1hY2hhbCBQcmFkZXNoEUphbW11IGFuZCBLYXNobWlyCUpoYXJraGFuZAlLYXJuYXRha2EGS2VyYWxhC0xha3NoYWR3ZWVwDk1hZGh5YSBQcmFkZXNoC01haGFyYXNodHJhB01hbmlwdXIJTWVnaGFsYXlhB01pem9yYW0ITmFnYWxhbmQGT3JyaXNhBk90aGVycwtQb25kaWNoZXJyeQZQdW5qYWIJUmFqYXN0aGFuBlNpa2tpbQpUYW1pbCBOYWR1B1RyaXB1cmENVXR0YXIgUHJhZGVzaAtVdHRhcmFraGFuZAtXZXN0IEJlbmdhbBQrAyVnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZGQCBQ9kFgJmD2QWAgIBD2QWAmYPZBYCZg9kFgICAQ88KwANAGQYAgUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgIFDEltYWdlQnV0dG9uMQUCc3MFCUdyaWRWaWV3MQ9nZJ2a7Ttf3vWdGuuLrnT2LMPjQW5x', 'btn': 'Search', 'ddlcategory': 'MIN', 'ddlstate': 'Gujarat', 'ddlstatus': 'UPEC', 'ddlyear': '2011', 'textbox2': '', 'ww': 'UpdatePanel3'} data = self.spider.fetchData('http://environmentclearance.nic.in/Search.aspx', postParams) print data data = self.spider.fetchData1('http://environmentclearance.nic.in/Search.aspx') print data # soup = BeautifulSoup(data) def scrapSubCat1(self, url): print 'url: ', url data = self.spider.fetchData(url) soup = BeautifulSoup(data) for cat in soup.find_all('td', {"class": re.compile("item_level")}): c = cat.find("a", {"href": re.compile('rayons\.aspx\?value_path=.*?$'), "id": re.compile('ctl00_cph_center_dl_level_ctl\d+_a_level.*?$')}) if c: print c.string.strip() self.scrapSubCat2('http://www.diamond-europe.com/' + c.get("href")) def scrapSubCat2(self, url): print 'url1: ' + url data = self.spider.fetchData(url) soup = BeautifulSoup(data) for cat in soup.find_all('div', {'class': re.compile('bg_ombre')}): self.scrapProducts('http://www.diamond-europe.com/' + cat.find('a').get('href')) def scrapProducts(self, url): print 'url2', url data = self.spider.fetchData(url) soup = BeautifulSoup(data) results = soup.find('table', {'id': 'results'}) if results: for row in results.find_all('tr'): colRef = row.find('td', {'class': 'reference'}) if colRef: prCode = colRef.find('span', {'class': 'imp'}) price1 = colRef.find('span', {'id': re.compile('ctl\d+_cph_center_r_articles_ctl\d+_l_prix_barre$')}) price2 = colRef.find('span', {'class', 'promo'}) print prCode.string.strip() print price1.string.strip() print price2.string.strip() coldesc = row.find('td', {'class': re.compile('description.*?$')}) if coldesc: pr = coldesc.find('a') print pr.string.strip() self.scrapProductDetails('http://www.diamond-europe.com/' + pr.get('href')) def scrapProductDetails(self, url): print 'Detail url: ' + url data = self.spider.fetchData(url) soup = BeautifulSoup(data) productDescS = soup.find('span', 'h1_nom_article') print productDescS.string.strip() productDesc = soup.find('div', {'id': 'article_right'}) print productDesc.text.strip() specs = soup.find('ul', {'id': 'spec_tech'}) if specs: print specs.contents """ __ASYNCPOST true __EVENTARGUMENT __EVENTTARGET ctl00$cph_center$menu_left1$lb_login __EVENTVALIDATION /wEWEwKOk7qrBAKG4eyLBALGw+PfBwK7jI7eDQL/2fqXBwLH9rmjDwLG2KLDCAKCvreACALPgYP1DQKqvLeACAKKtP7+DAL07MD3CwLksZZaAuSxmloCicn43Q8Cisn43Q8C/Iag2AMClcHvlQgCyNGw1Ax/PwzywfL/ooD/FU51memYxQ1U+Q== __LASTFOCUS __SCROLLPOSITIONX 0 __SCROLLPOSITIONY 0 __VIEWSTATE /wEPDwUINzkzMzQ5OTcPZBYCZg9kFgICAw9kFgICAQ9kFhICAw8WAh4LXyFJdGVtQ291bnQCBhYMZg9kFgICAQ8WBh4FdGl0bGUFB0VuZ2xpc2geBGhyZWYFDC9yYXlvbnMuYXNweB4Hb25jbGljawUcc2V0Q29va2llKCdsYW5ndWUnLCAnZW4tZ2InKRYCZg8WBB4Dc3JjBQ9+L2ltYWdlcy9lbi5wbmceA2FsdAUHRW5nbGlzaGQCAQ9kFgICAQ8WBh8BBQlGcmFuw6dhaXMfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1mci1iZR8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdmci1iZScpFgJmDxYEHwQFD34vaW1hZ2VzL2ZyLnBuZx8FBQlGcmFuw6dhaXNkAgIPZBYCAgEPFgYfAQUHRGV1dHNjaB8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWRlLWRlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2RlLWRlJykWAmYPFgQfBAUPfi9pbWFnZXMvZGUucG5nHwUFB0RldXRzY2hkAgMPZBYCAgEPFgYfAQUKTmVkZXJsYW5kcx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPW5sLWJlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ25sLWJlJykWAmYPFgQfBAUPfi9pbWFnZXMvbmwucG5nHwUFCk5lZGVybGFuZHNkAgQPZBYCAgEPFgYfAQUIRXNwYcOxb2wfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1lcy1lcx8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdlcy1lcycpFgJmDxYEHwQFD34vaW1hZ2VzL2VzLnBuZx8FBQhFc3Bhw7FvbGQCBQ9kFgICAQ8WBh8BBQhJdGFsaWFubx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWl0LWl0HwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2l0LWl0JykWAmYPFgQfBAUPfi9pbWFnZXMvaXQucG5nHwUFCEl0YWxpYW5vZAIFDw8WBB4EVGV4dAUESG9tZR4LTmF2aWdhdGVVcmwFDH4vaW5kZXguYXNweGRkAgcPDxYEHwYFB0RpYW1vbmQfBwUOfi9kaWFtb25kLmFzcHhkZAIJDw8WBB8GBQhTZXJ2aWNlcx8HBQ9+L3NlcnZpY2VzLmFzcHhkZAILDw8WCB8GBQhQcm9kdWN0cx8HBRR+L3JheW9ucy5hc3B4P3BhZ2U9MR4IQ3NzQ2xhc3MFB2N1cnJlbnQeBF8hU0ICAmRkAg0PDxYEHwYFBE5ld3MfBwULfi9uZXdzLmFzcHhkZAIPDw8WBB8GBQdDb250YWN0HwcFDn4vY29udGFjdC5hc3B4ZGQCEQ9kFgICAQ9kFgICAw9kFgJmD2QWBAIBDxBkZBYBZmQCBw8WBB4KQ29udGV4dEtleQUVUmVmZXJlbmNlfmVuLWdifkZhbHNlHg1Vc2VDb250ZXh0S2V5Z2QCEw9kFggCAQ9kFg4CAQ8PFgIeB1Zpc2libGVoZGQCAw9kFgJmD2QWAgIDDw8WAh8MZ2RkAgUPDxYCHwxoZGQCCRA8KwANAgAPFgIfDGhkDBQrABwFfDA6MCwwOjEsMDoyLDA6MywwOjQsMDo1LDA6NiwwOjcsMDo4LDA6OSwwOjEwLDA6MTEsMDoxMiwwOjEzLDA6MTQsMDoxNSwwOjE2LDA6MTcsMDoxOCwwOjE5LDA6MjAsMDoyMSwwOjIyLDA6MjMsMDoyNCwwOjI1LDA6MjYUKwACFgYfBgVyPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmY0YzBiJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPT0tJTkc8L3NwYW4+HgdUb29sVGlwBQktIENPT0tJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVA0N0tIQzc0ODRkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNlMThjNDUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gSE9UIFNOQUNLUyAtIFBBTklOSSAtIEZBU1QgRk9PRDwvc3Bhbj4fDQUhLSBIT1QgU05BQ0tTIC0gUEFOSU5JIC0gRkFTVCBGT09EHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD04M0RDM0Y2Q0FEZBQrAAIWBh8GBZMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWNhZTc1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZSRU5DSCBGUklFUyAtIFJPQVNUSU5HIC0gR1JJTExJTkcgJiBCQlE8L3NwYW4+Hw0FKi0gRlJFTkNIIEZSSUVTIC0gUk9BU1RJTkcgLSBHUklMTElORyAmIEJCUR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9RTY0NDk0MzdDM2QUKwACFgYfBgV4PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZjNjYmEzJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEFTSUFOIENPT0tJTkc8L3NwYW4+Hw0FDy0gQVNJQU4gQ09PS0lORx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9OTc4QTE0QzhFNGQUKwACFgYfBgWJATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2ZiZTZkMSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBTVEVBTSAtIENPTlZFQ1RJT04gLSBNSUNST1dBVkU8L3NwYW4+Hw0FIC0gU1RFQU0gLSBDT05WRUNUSU9OIC0gTUlDUk9XQVZFHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD02N0ZENkIzNjQ2ZBQrAAIWBh8GBXc8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmYzM0MjgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ09PSyAmIENISUxMPC9zcGFuPh8NBQ4tIENPT0sgJiBDSElMTB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9TzBRTDhLSDA4VmQUKwACFgYfBgWNATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2UxN2Y1ZCc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUdFTkVSQVRJT04gLSBWQUNVVU0gLSBCQU5RVUVUSU5HPC9zcGFuPh8NBSQtIFJFR0VORVJBVElPTiAtIFZBQ1VVTSAtIEJBTlFVRVRJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUU4NDM5Q0U0QTBkFCsAAhYGHwYFdzxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyOTQ3ZSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBESVNIIFdBU0hFUlM8L3NwYW4+Hw0FDi0gRElTSCBXQVNIRVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03OE1YQk1KRkdLZBQrAAIWBh8GBXI8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMDA4NmQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTEFVTkRSWTwvc3Bhbj4fDQUJLSBMQVVORFJZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1TWjJOU1ZKUTc4ZBQrAAIWBh8GBYMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojMDU3M2E1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEdBU1RST05PUk0gUkVGUklHRVJBVElPTjwvc3Bhbj4fDQUaLSBHQVNUUk9OT1JNIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUhTVkVROTZYRzRkFCsAAhYGHwYFeDxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyYTBjNic+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUZSSUdFUkFUSU9OPC9zcGFuPh8NBQ8tIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVgxMzY3TTdEOVNkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM2Y2IyZGEnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0FORFdJQ0hFUyAtIFNBTEFERVMgLSBTVEFSVEVSUzwvc3Bhbj4fDQUhLSBTQU5EV0lDSEVTIC0gU0FMQURFUyAtIFNUQVJURVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03REU4RUM0RTJDZBQrAAIWBh8GBXY8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM5NWM3ZTUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gV0lORSAtIEJFRVI8L3NwYW4+Hw0FDS0gV0lORSAtIEJFRVIfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTZENDg3NDQzNEFkFCsAAhYGHwYFjAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiYmRiZjAnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU09GVCBEUklOS1MgLSBBTENPSE9MIC0gQ09DS1RBSUxTPC9zcGFuPh8NBSMtIFNPRlQgRFJJTktTIC0gQUxDT0hPTCAtIENPQ0tUQUlMUx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9Q0RBRTQyMzRCRWQUKwACFgYfBgWHATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzY5N2RiOSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBJQ0UgQ1JFQU0gLSBTT1JCRVQgLSBHUkFOSVRBPC9zcGFuPh8NBR4tIElDRSBDUkVBTSAtIFNPUkJFVCAtIEdSQU5JVEEfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTNUTlQwNkJYOTJkFCsAAhYGHwYFhwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMjI4MTQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0VMRiBTRVJWSUNFIC0gQlVGRkVUIC1UQVBBUzwvc3Bhbj4fDQUeLSBTRUxGIFNFUlZJQ0UgLSBCVUZGRVQgLVRBUEFTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0zT0tIWDA1NzFXZBQrAAIWBh8GBYYBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWQ5YTA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFBBU1RSWSAtIEJBS0VSWSAtIENIT0NPTEFURTwvc3Bhbj4fDQUdLSBQQVNUUlkgLSBCQUtFUlkgLSBDSE9DT0xBVEUfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPU41RjBUNVpWS1pkFCsAAhYGHwYFhQE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMzQ0YTgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0g8L3NwYW4+Hw0FHC0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0gfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUE0MTFCODA3Q0FkFCsAAhYGHwYFhAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZjAwMGYnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUElaWkEgLSBQQVNUQSAtIFRBS0UgQVdBWTwvc3Bhbj4fDQUbLSBQSVpaQSAtIFBBU1RBIC0gVEFLRSBBV0FZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD01STZYNjZSNzYyZBQrAAIWBh8GBZwBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYTY2YjExJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPRkZFRSBURUEgLSBWSUVOTkVTRSBQQVNUUklFUyAtSlVJQ0VTIE1JTEsgU0hBS0U8L3NwYW4+Hw0FMy0gQ09GRkVFIFRFQSAtIFZJRU5ORVNFIFBBU1RSSUVTIC1KVUlDRVMgTUlMSyBTSEFLRR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MUZERkZQNUgzMmQUKwACFgYfBgV7PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYzBjYTBlJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZPT0QgUFJFUEFSQVRJT048L3NwYW4+Hw0FEi0gRk9PRCBQUkVQQVJBVElPTh8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9NVFKNzQ0MzJTV2QUKwACFgYfBgV5PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojNWQ2MzY3Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIE5FVVRSQUwgLSBJTk9YPC9zcGFuPh8NBRAtIE5FVVRSQUwgLSBJTk9YHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1ISDI3OTg1Q1pUZBQrAAIWBh8GBX08ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM0ZWJhYmMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ0xFQU5JTkcgLSBIWUdJRU5FPC9zcGFuPh8NBRQtIENMRUFOSU5HIC0gSFlHSUVORR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MU8wN09XMDA2M2QUKwACFgYfBgV/PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmZiMjA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFZBQ1VVTSAmIFZFTlRJTEFUSU9OPC9zcGFuPh8NBRYtIFZBQ1VVTSAmIFZFTlRJTEFUSU9OHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0xSTRDQzcxM0hCZBQrAAIWBh8GBXg8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMGIxYmInPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gR04gQ09OVEFJTkVSUzwvc3Bhbj4fDQUPLSBHTiBDT05UQUlORVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0yNDAxUzk1RDNHZBQrAAIWBh8GBY8BPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYjM4MzEwJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIERJTk5FUiBTRVJWSUNFIC0gRElTUExBWVMgLSBUUk9MTEVZUzwvc3Bhbj4fDQUmLSBESU5ORVIgU0VSVklDRSAtIERJU1BMQVlTIC0gVFJPTExFWVMfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVg5VEY5REY0MzdkFCsAAhYGHwYFjwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZmNjMDMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUkVDRVBUSU9OIC0gUk9PTSBTRVJWSUNFIC0gQlJFQUtGQVNUPC9zcGFuPh8NBSYtIFJFQ0VQVElPTiAtIFJPT00gU0VSVklDRSAtIEJSRUFLRkFTVB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9VVE0M1hMTlRBNWRkZAILDw8WBB8GBRJIaXN0b3JpY2FsIGVzdC9vcmQfDGhkZAIRDxYCHwxoFgJmD2QWAgIDDxBkZBYBZmQCEw9kFgICAQ8PFgQfBgUYSGlzdG9yaXF1ZSBkZXZpcyBjbGllbnRzHwxoZGQCAw9kFhICAw8PFgQfBgUMRmluZCBhbiBJdGVtHwcFHX4vZmFxL3JlY2hlcmNoZXJfYXJ0aWNsZS5hc3B4ZGQCBQ8PFgQfBgUjSG93IHRvIG1ha2UgYSAgZXN0aW1hdGUgLyAgYW4gT3JkZXIfBwUffi9mYXEvZmFpcmVfZGV2aXNfY29tbWFuZGUuYXNweGRkAgcPDxYEHwYFG0ZpbmQgYSAgZXN0aW1hdGUgLyBhbiBvcmRlch8HBSN+L2ZhcS9yZXRyb3V2ZXJfZGV2aXNfY29tbWFuZGUuYXNweGRkAgkPDxYEHwYFHVJlbW92ZSBhbiBpdG1lIG9mIGEgIGVzdGltYXRlHwcFGn4vZmFxL3JldGlyZXJfYXJ0aWNsZS5hc3B4ZGQCCw8PFgQfBgUUVG8gZXJhc2UgYW4gZXN0aW1hdGUfBwUXfi9mYXEvZWZhY2VyX2RldmlzLmFzcHhkZAINDxYCHwIFRH4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9YmEmcGFnZT0xFgJmDxYCHwQFFX4vaW1hZ2VzL2VuLWdiL2JhLmpwZ2QCDg8WAh8CBUd+L2FydGljbGVzLmFzcHg/c2VhcmNoX3R5cGU9bGV2ZWwmdmFsdWVfcGF0aD1QNDdLSEM3NDg0JnNpPXByb21vJnBhZ2U9MRYCAgEPFgIfBAUYfi9pbWFnZXMvZW4tZ2IvcHJvbW8uanBnZAIQDxYCHwIFRX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9bmV3JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9uZXcuanBnZAIRDxYCHwIFLX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1oaXQmc2k9aGl0JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9oaXQuanBnZAIFDw8WAh8MZ2QWBgIDDxQrAAJkZGQCBQ8UKwACDxYEHgtfIURhdGFCb3VuZGcfAAISZGQWAmYPZBYMAgEPZBYCAgEPZBYIAgEPDxYCHwYFB0lENzAvUE1kZAIDDxYCHwYFLFZFTlRJTEFURUQgUkVGUklHRVJBVE9SIDcwMCBMLiAxIERPT1IgKEdOMi8xZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4MzM4MhYCZg8WBB8FBSxWRU5USUxBVEVEIFJFRlJJR0VSQVRPUiA3MDAgTC4gMSBET09SIChHTjIvMR8EBWp+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvTk9VVkVBVSBET1NTSUVSL0lENzAtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI5LDAwIOKCrB8JAgJkZAICD2QWAgIBD2QWCAIBDw8WAh8GBQhEVDE3OC9QTWRkAgMPFgIfBgUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLmQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM0MTYWAmYPFgQfBQUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLh8EBVt+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvRFQxNzgtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuOTA5LDAwIOKCrB8JAgJkZAIDD2QWAgIBD2QWCAIBDw8WAh8GBQhBUDFOL0w4NmRkAgMPFgIfBgUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMGQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM2MDMWAmYPFgQfBQUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMB8EBU9+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9QQVRJU1NFUklFIFBBSU4gQ0hPQ09MQVQvQVAxTi1MNjQtODYuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNjg5LDAwIOKCrB8JAgJkZAIED2QWAgIBD2QWCAIBDw8WAh8GBQdEQzUwMi1OZGQCAw8WAh8GBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4NDY0OBYCZg8WBB8FBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tHwQFQ34vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREM1MDItTi5KUEdkAgcPDxYGHwgFE2xfcHJpeF9jbGllbnQgcHJvbW8fBgUMMS41ODksMDAg4oKsHwkCAmRkAgUPZBYCAgEPZBYIAgEPDxYCHwYFB0VGUC80NFJkZAIDDxYCHwYFIUVMRUNUUklDIE9WRU4gMnggNCBQSVpaQVMgMiBST09NU2QCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyNzgzMDcWAmYPFgQfBQUhRUxFQ1RSSUMgT1ZFTiAyeCA0IFBJWlpBUyAyIFJPT01THwQFTX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL1BJWlpBIEVUIFBBU1RBL1JVU1RJQyBMSU5FL0VGUC02NlIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI1LDAwIOKCrB8JAgJkZAIGD2QWAgIBD2QWCAIBDw8WAh8GBQVESzctMmRkAgMPFgIfBgUjSE9PRCBESVNIV0FTSEVSLCAgQkFTS0VUIDUwMHg1MDAgTU1kAgUPFgIfAgUefi9hcnRpY2xlLmFzcHg/YV9pZD0xMjUxMjc1NDA1FgJmDxYEHwUFI0hPT0QgRElTSFdBU0hFUiwgIEJBU0tFVCA1MDB4NTAwIE1NHwQFQX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREs3LTIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNTM2LDAwIOKCrB8JAgJkZAIHDxQrAAJkZGQCBw9kFgQCAQ9kFgJmD2QWCgIFDxYCHwxoZAIJDxYCHwxoZAINDxYCHwxoZAIRDw8WBB8GBQxPcGVuIC8gQ2xvc2UfDGhkZAITDw8WAh8MaGRkAgMPPCsACQBkGAMFKWN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fYm90dG9tDxQrAARkZAIGAhJkBSNjdGwwMCRjcGhfY2VudGVyJGx2X2FydGljbGVzX3JheW9ucw88KwAKAgc8KwAGAAgCEmQFJmN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fdG9wDxQrAARkZAIGAhJkzw5eBCgUF6HQH+o5L7mrNloYe3w= ctl00$ToolkitScriptManage... ctl00$cph_center$menu_left1$up_login|ctl00$cph_center$menu_left1$lb_login ctl00$cph_center$hf_value... ctl00$cph_center$menu_lef... ctl00$cph_center$menu_lef... C913327 ctl00$cph_center$menu_lef... sdfsdfsdf ctl00$cph_center$n_vei$cp... ctl00$cph_center$n_vei$hf... ctl00$ddl_search_type Reference ctl00$tb_search ctl00_ToolkitScriptManage... ;;AjaxControlToolkit, Version=3.5.40412.0, Culture=neutral, PublicKeyToken=28f01b0e84b6d53e:en-GB:1547e793-5b7e-48fe-8490-03a375b13a33:de1feab2:f2c8e708:720a52bf:f9cec9bc:589eaa30:698129cf:7a92f56c:4a2c8239; hiddenInputToUpdateATBuff... 1 """ def loginDiamondEurope(self): params = {'__ASYNCPOST': 'true', '__EVENTARGUMENT': '', '__EVENTTARGET': 'ctl00$cph_center$menu_left1$lb_login', '__EVENTVALIDATION': '/wEWEwKOk7qrBAKG4eyLBALGw+PfBwK7jI7eDQL/2fqXBwLH9rmjDwLG2KLDCAKCvreACALPgYP1DQKqvLeACAKKtP7+DAL07MD3CwLksZZaAuSxmloCicn43Q8Cisn43Q8C/Iag2AMClcHvlQgCyNGw1Ax/PwzywfL/ooD/FU51memYxQ1U+Q==', '__LASTFOCUS': '', '__SCROLLPOSITIONX': '0', '__SCROLLPOSITIONY': '0', '__VIEWSTATE': '/wEPDwUINzkzMzQ5OTcPZBYCZg9kFgICAw9kFgICAQ9kFhICAw8WAh4LXyFJdGVtQ291bnQCBhYMZg9kFgICAQ8WBh4FdGl0bGUFB0VuZ2xpc2geBGhyZWYFDC9yYXlvbnMuYXNweB4Hb25jbGljawUcc2V0Q29va2llKCdsYW5ndWUnLCAnZW4tZ2InKRYCZg8WBB4Dc3JjBQ9+L2ltYWdlcy9lbi5wbmceA2FsdAUHRW5nbGlzaGQCAQ9kFgICAQ8WBh8BBQlGcmFuw6dhaXMfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1mci1iZR8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdmci1iZScpFgJmDxYEHwQFD34vaW1hZ2VzL2ZyLnBuZx8FBQlGcmFuw6dhaXNkAgIPZBYCAgEPFgYfAQUHRGV1dHNjaB8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWRlLWRlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2RlLWRlJykWAmYPFgQfBAUPfi9pbWFnZXMvZGUucG5nHwUFB0RldXRzY2hkAgMPZBYCAgEPFgYfAQUKTmVkZXJsYW5kcx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPW5sLWJlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ25sLWJlJykWAmYPFgQfBAUPfi9pbWFnZXMvbmwucG5nHwUFCk5lZGVybGFuZHNkAgQPZBYCAgEPFgYfAQUIRXNwYcOxb2wfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1lcy1lcx8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdlcy1lcycpFgJmDxYEHwQFD34vaW1hZ2VzL2VzLnBuZx8FBQhFc3Bhw7FvbGQCBQ9kFgICAQ8WBh8BBQhJdGFsaWFubx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWl0LWl0HwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2l0LWl0JykWAmYPFgQfBAUPfi9pbWFnZXMvaXQucG5nHwUFCEl0YWxpYW5vZAIFDw8WBB4EVGV4dAUESG9tZR4LTmF2aWdhdGVVcmwFDH4vaW5kZXguYXNweGRkAgcPDxYEHwYFB0RpYW1vbmQfBwUOfi9kaWFtb25kLmFzcHhkZAIJDw8WBB8GBQhTZXJ2aWNlcx8HBQ9+L3NlcnZpY2VzLmFzcHhkZAILDw8WCB8GBQhQcm9kdWN0cx8HBRR+L3JheW9ucy5hc3B4P3BhZ2U9MR4IQ3NzQ2xhc3MFB2N1cnJlbnQeBF8hU0ICAmRkAg0PDxYEHwYFBE5ld3MfBwULfi9uZXdzLmFzcHhkZAIPDw8WBB8GBQdDb250YWN0HwcFDn4vY29udGFjdC5hc3B4ZGQCEQ9kFgICAQ9kFgICAw9kFgJmD2QWBAIBDxBkZBYBZmQCBw8WBB4KQ29udGV4dEtleQUVUmVmZXJlbmNlfmVuLWdifkZhbHNlHg1Vc2VDb250ZXh0S2V5Z2QCEw9kFggCAQ9kFg4CAQ8PFgIeB1Zpc2libGVoZGQCAw9kFgJmD2QWAgIDDw8WAh8MZ2RkAgUPDxYCHwxoZGQCCRA8KwANAgAPFgIfDGhkDBQrABwFfDA6MCwwOjEsMDoyLDA6MywwOjQsMDo1LDA6NiwwOjcsMDo4LDA6OSwwOjEwLDA6MTEsMDoxMiwwOjEzLDA6MTQsMDoxNSwwOjE2LDA6MTcsMDoxOCwwOjE5LDA6MjAsMDoyMSwwOjIyLDA6MjMsMDoyNCwwOjI1LDA6MjYUKwACFgYfBgVyPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmY0YzBiJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPT0tJTkc8L3NwYW4+HgdUb29sVGlwBQktIENPT0tJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVA0N0tIQzc0ODRkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNlMThjNDUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gSE9UIFNOQUNLUyAtIFBBTklOSSAtIEZBU1QgRk9PRDwvc3Bhbj4fDQUhLSBIT1QgU05BQ0tTIC0gUEFOSU5JIC0gRkFTVCBGT09EHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD04M0RDM0Y2Q0FEZBQrAAIWBh8GBZMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWNhZTc1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZSRU5DSCBGUklFUyAtIFJPQVNUSU5HIC0gR1JJTExJTkcgJiBCQlE8L3NwYW4+Hw0FKi0gRlJFTkNIIEZSSUVTIC0gUk9BU1RJTkcgLSBHUklMTElORyAmIEJCUR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9RTY0NDk0MzdDM2QUKwACFgYfBgV4PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZjNjYmEzJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEFTSUFOIENPT0tJTkc8L3NwYW4+Hw0FDy0gQVNJQU4gQ09PS0lORx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9OTc4QTE0QzhFNGQUKwACFgYfBgWJATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2ZiZTZkMSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBTVEVBTSAtIENPTlZFQ1RJT04gLSBNSUNST1dBVkU8L3NwYW4+Hw0FIC0gU1RFQU0gLSBDT05WRUNUSU9OIC0gTUlDUk9XQVZFHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD02N0ZENkIzNjQ2ZBQrAAIWBh8GBXc8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmYzM0MjgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ09PSyAmIENISUxMPC9zcGFuPh8NBQ4tIENPT0sgJiBDSElMTB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9TzBRTDhLSDA4VmQUKwACFgYfBgWNATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2UxN2Y1ZCc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUdFTkVSQVRJT04gLSBWQUNVVU0gLSBCQU5RVUVUSU5HPC9zcGFuPh8NBSQtIFJFR0VORVJBVElPTiAtIFZBQ1VVTSAtIEJBTlFVRVRJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUU4NDM5Q0U0QTBkFCsAAhYGHwYFdzxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyOTQ3ZSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBESVNIIFdBU0hFUlM8L3NwYW4+Hw0FDi0gRElTSCBXQVNIRVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03OE1YQk1KRkdLZBQrAAIWBh8GBXI8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMDA4NmQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTEFVTkRSWTwvc3Bhbj4fDQUJLSBMQVVORFJZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1TWjJOU1ZKUTc4ZBQrAAIWBh8GBYMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojMDU3M2E1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEdBU1RST05PUk0gUkVGUklHRVJBVElPTjwvc3Bhbj4fDQUaLSBHQVNUUk9OT1JNIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUhTVkVROTZYRzRkFCsAAhYGHwYFeDxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyYTBjNic+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUZSSUdFUkFUSU9OPC9zcGFuPh8NBQ8tIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVgxMzY3TTdEOVNkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM2Y2IyZGEnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0FORFdJQ0hFUyAtIFNBTEFERVMgLSBTVEFSVEVSUzwvc3Bhbj4fDQUhLSBTQU5EV0lDSEVTIC0gU0FMQURFUyAtIFNUQVJURVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03REU4RUM0RTJDZBQrAAIWBh8GBXY8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM5NWM3ZTUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gV0lORSAtIEJFRVI8L3NwYW4+Hw0FDS0gV0lORSAtIEJFRVIfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTZENDg3NDQzNEFkFCsAAhYGHwYFjAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiYmRiZjAnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU09GVCBEUklOS1MgLSBBTENPSE9MIC0gQ09DS1RBSUxTPC9zcGFuPh8NBSMtIFNPRlQgRFJJTktTIC0gQUxDT0hPTCAtIENPQ0tUQUlMUx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9Q0RBRTQyMzRCRWQUKwACFgYfBgWHATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzY5N2RiOSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBJQ0UgQ1JFQU0gLSBTT1JCRVQgLSBHUkFOSVRBPC9zcGFuPh8NBR4tIElDRSBDUkVBTSAtIFNPUkJFVCAtIEdSQU5JVEEfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTNUTlQwNkJYOTJkFCsAAhYGHwYFhwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMjI4MTQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0VMRiBTRVJWSUNFIC0gQlVGRkVUIC1UQVBBUzwvc3Bhbj4fDQUeLSBTRUxGIFNFUlZJQ0UgLSBCVUZGRVQgLVRBUEFTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0zT0tIWDA1NzFXZBQrAAIWBh8GBYYBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWQ5YTA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFBBU1RSWSAtIEJBS0VSWSAtIENIT0NPTEFURTwvc3Bhbj4fDQUdLSBQQVNUUlkgLSBCQUtFUlkgLSBDSE9DT0xBVEUfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPU41RjBUNVpWS1pkFCsAAhYGHwYFhQE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMzQ0YTgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0g8L3NwYW4+Hw0FHC0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0gfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUE0MTFCODA3Q0FkFCsAAhYGHwYFhAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZjAwMGYnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUElaWkEgLSBQQVNUQSAtIFRBS0UgQVdBWTwvc3Bhbj4fDQUbLSBQSVpaQSAtIFBBU1RBIC0gVEFLRSBBV0FZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD01STZYNjZSNzYyZBQrAAIWBh8GBZwBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYTY2YjExJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPRkZFRSBURUEgLSBWSUVOTkVTRSBQQVNUUklFUyAtSlVJQ0VTIE1JTEsgU0hBS0U8L3NwYW4+Hw0FMy0gQ09GRkVFIFRFQSAtIFZJRU5ORVNFIFBBU1RSSUVTIC1KVUlDRVMgTUlMSyBTSEFLRR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MUZERkZQNUgzMmQUKwACFgYfBgV7PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYzBjYTBlJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZPT0QgUFJFUEFSQVRJT048L3NwYW4+Hw0FEi0gRk9PRCBQUkVQQVJBVElPTh8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9NVFKNzQ0MzJTV2QUKwACFgYfBgV5PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojNWQ2MzY3Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIE5FVVRSQUwgLSBJTk9YPC9zcGFuPh8NBRAtIE5FVVRSQUwgLSBJTk9YHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1ISDI3OTg1Q1pUZBQrAAIWBh8GBX08ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM0ZWJhYmMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ0xFQU5JTkcgLSBIWUdJRU5FPC9zcGFuPh8NBRQtIENMRUFOSU5HIC0gSFlHSUVORR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MU8wN09XMDA2M2QUKwACFgYfBgV/PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmZiMjA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFZBQ1VVTSAmIFZFTlRJTEFUSU9OPC9zcGFuPh8NBRYtIFZBQ1VVTSAmIFZFTlRJTEFUSU9OHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0xSTRDQzcxM0hCZBQrAAIWBh8GBXg8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMGIxYmInPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gR04gQ09OVEFJTkVSUzwvc3Bhbj4fDQUPLSBHTiBDT05UQUlORVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0yNDAxUzk1RDNHZBQrAAIWBh8GBY8BPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYjM4MzEwJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIERJTk5FUiBTRVJWSUNFIC0gRElTUExBWVMgLSBUUk9MTEVZUzwvc3Bhbj4fDQUmLSBESU5ORVIgU0VSVklDRSAtIERJU1BMQVlTIC0gVFJPTExFWVMfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVg5VEY5REY0MzdkFCsAAhYGHwYFjwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZmNjMDMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUkVDRVBUSU9OIC0gUk9PTSBTRVJWSUNFIC0gQlJFQUtGQVNUPC9zcGFuPh8NBSYtIFJFQ0VQVElPTiAtIFJPT00gU0VSVklDRSAtIEJSRUFLRkFTVB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9VVE0M1hMTlRBNWRkZAILDw8WBB8GBRJIaXN0b3JpY2FsIGVzdC9vcmQfDGhkZAIRDxYCHwxoFgJmD2QWAgIDDxBkZBYBZmQCEw9kFgICAQ8PFgQfBgUYSGlzdG9yaXF1ZSBkZXZpcyBjbGllbnRzHwxoZGQCAw9kFhICAw8PFgQfBgUMRmluZCBhbiBJdGVtHwcFHX4vZmFxL3JlY2hlcmNoZXJfYXJ0aWNsZS5hc3B4ZGQCBQ8PFgQfBgUjSG93IHRvIG1ha2UgYSAgZXN0aW1hdGUgLyAgYW4gT3JkZXIfBwUffi9mYXEvZmFpcmVfZGV2aXNfY29tbWFuZGUuYXNweGRkAgcPDxYEHwYFG0ZpbmQgYSAgZXN0aW1hdGUgLyBhbiBvcmRlch8HBSN+L2ZhcS9yZXRyb3V2ZXJfZGV2aXNfY29tbWFuZGUuYXNweGRkAgkPDxYEHwYFHVJlbW92ZSBhbiBpdG1lIG9mIGEgIGVzdGltYXRlHwcFGn4vZmFxL3JldGlyZXJfYXJ0aWNsZS5hc3B4ZGQCCw8PFgQfBgUUVG8gZXJhc2UgYW4gZXN0aW1hdGUfBwUXfi9mYXEvZWZhY2VyX2RldmlzLmFzcHhkZAINDxYCHwIFRH4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9YmEmcGFnZT0xFgJmDxYCHwQFFX4vaW1hZ2VzL2VuLWdiL2JhLmpwZ2QCDg8WAh8CBUd+L2FydGljbGVzLmFzcHg/c2VhcmNoX3R5cGU9bGV2ZWwmdmFsdWVfcGF0aD1QNDdLSEM3NDg0JnNpPXByb21vJnBhZ2U9MRYCAgEPFgIfBAUYfi9pbWFnZXMvZW4tZ2IvcHJvbW8uanBnZAIQDxYCHwIFRX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9bmV3JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9uZXcuanBnZAIRDxYCHwIFLX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1oaXQmc2k9aGl0JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9oaXQuanBnZAIFDw8WAh8MZ2QWBgIDDxQrAAJkZGQCBQ8UKwACDxYEHgtfIURhdGFCb3VuZGcfAAISZGQWAmYPZBYMAgEPZBYCAgEPZBYIAgEPDxYCHwYFB0lENzAvUE1kZAIDDxYCHwYFLFZFTlRJTEFURUQgUkVGUklHRVJBVE9SIDcwMCBMLiAxIERPT1IgKEdOMi8xZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4MzM4MhYCZg8WBB8FBSxWRU5USUxBVEVEIFJFRlJJR0VSQVRPUiA3MDAgTC4gMSBET09SIChHTjIvMR8EBWp+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvTk9VVkVBVSBET1NTSUVSL0lENzAtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI5LDAwIOKCrB8JAgJkZAICD2QWAgIBD2QWCAIBDw8WAh8GBQhEVDE3OC9QTWRkAgMPFgIfBgUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLmQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM0MTYWAmYPFgQfBQUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLh8EBVt+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvRFQxNzgtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuOTA5LDAwIOKCrB8JAgJkZAIDD2QWAgIBD2QWCAIBDw8WAh8GBQhBUDFOL0w4NmRkAgMPFgIfBgUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMGQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM2MDMWAmYPFgQfBQUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMB8EBU9+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9QQVRJU1NFUklFIFBBSU4gQ0hPQ09MQVQvQVAxTi1MNjQtODYuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNjg5LDAwIOKCrB8JAgJkZAIED2QWAgIBD2QWCAIBDw8WAh8GBQdEQzUwMi1OZGQCAw8WAh8GBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4NDY0OBYCZg8WBB8FBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tHwQFQ34vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREM1MDItTi5KUEdkAgcPDxYGHwgFE2xfcHJpeF9jbGllbnQgcHJvbW8fBgUMMS41ODksMDAg4oKsHwkCAmRkAgUPZBYCAgEPZBYIAgEPDxYCHwYFB0VGUC80NFJkZAIDDxYCHwYFIUVMRUNUUklDIE9WRU4gMnggNCBQSVpaQVMgMiBST09NU2QCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyNzgzMDcWAmYPFgQfBQUhRUxFQ1RSSUMgT1ZFTiAyeCA0IFBJWlpBUyAyIFJPT01THwQFTX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL1BJWlpBIEVUIFBBU1RBL1JVU1RJQyBMSU5FL0VGUC02NlIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI1LDAwIOKCrB8JAgJkZAIGD2QWAgIBD2QWCAIBDw8WAh8GBQVESzctMmRkAgMPFgIfBgUjSE9PRCBESVNIV0FTSEVSLCAgQkFTS0VUIDUwMHg1MDAgTU1kAgUPFgIfAgUefi9hcnRpY2xlLmFzcHg/YV9pZD0xMjUxMjc1NDA1FgJmDxYEHwUFI0hPT0QgRElTSFdBU0hFUiwgIEJBU0tFVCA1MDB4NTAwIE1NHwQFQX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREs3LTIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNTM2LDAwIOKCrB8JAgJkZAIHDxQrAAJkZGQCBw9kFgQCAQ9kFgJmD2QWCgIFDxYCHwxoZAIJDxYCHwxoZAINDxYCHwxoZAIRDw8WBB8GBQxPcGVuIC8gQ2xvc2UfDGhkZAITDw8WAh8MaGRkAgMPPCsACQBkGAMFKWN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fYm90dG9tDxQrAARkZAIGAhJkBSNjdGwwMCRjcGhfY2VudGVyJGx2X2FydGljbGVzX3JheW9ucw88KwAKAgc8KwAGAAgCEmQFJmN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fdG9wDxQrAARkZAIGAhJkzw5eBCgUF6HQH+o5L7mrNloYe3w=', 'ctl00$ToolkitScriptManager1': 'ctl00$cph_center$menu_left1$up_login|ctl00$cph_center$menu_left1$lb_login', 'ctl00$cph_center$menu_left1$tb_login': '******', 'ctl00$cph_center$menu_left1$tb_password': '******', 'ctl00$ddl_search_type': 'Reference', 'ctl00_ToolkitScriptManager1HiddenField': ';;AjaxControlToolkit, Version=3.5.40412.0, Culture=neutral, PublicKeyToken=28f01b0e84b6d53e:en-GB:1547e793-5b7e-48fe-8490-03a375b13a33:de1feab2:f2c8e708:720a52bf:f9cec9bc:589eaa30:698129cf:7a92f56c:4a2c8239;', 'hiddenInputToUpdateATBuffer_CommonToolkitScripts': '1'} if self.spider.login('http://www.diamond-europe.com/rayons.aspx', params) is not None: return True return False def scrapBertos(self, retry=0): # self.downloadFile('http://s900.bertos.it/download.php?file=editorcms/documentazione/schede/scheda_13722600.pdf', 'a.pdf') # self.scrapSubCategory('http://s900.bertos.it/en/', '', None, None) # self.scrapProducts('http://s900.bertos.it/en/pasta_cookers/', '', '', None, None) # return self.notifyProduct.emit('<font color=green><b>Try to get all language links.</b></font>') self.logger.debug(self.mainUrl) data = self.spider.fetchData(self.mainUrl) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) languages = self.regex.getAllSearchedData( '(?i)<div class="[^"]*"><a href="([^"]*)"\s*?class="boxalingua">([^<]*)</a>', data) if languages and len(languages) > 0: self.logger.debug('Total languages: %s' % str(len(languages))) self.notifyProduct.emit('<b>Total languages found[%s]</b>' % str(len(languages))) for language in languages: self.totalProducts = 0 url = language[0] # if str(language[1]).lower() != 'en': # continue urlChunk = self.spider.fetchData(url) if urlChunk and len(urlChunk) > 0: urlChunk = self.regex.reduceNewLine(urlChunk) urlChunk = self.regex.reduceBlankSpace(urlChunk) url = self.regex.getSearchedData('(?i)<a href="([^"]*)" onmouseover="vedi_po_cat\(2\)\s*?"', urlChunk) csvFile = str(language[1].strip()).lower() + '_' + 'bertos.csv' dupCsvReader = Csv() dupCsvRows = dupCsvReader.readCsvRow(csvFile) csvWriter = Csv(csvFile) if self.csvHeader not in dupCsvRows: dupCsvRows.append(self.csvHeader) csvWriter.writeCsvRow(self.csvHeader) self.notifyProduct.emit( '<font color=green><b>Try to get data for language [%s].</b></font>' % language[1]) self.scrapCategory(url, dupCsvRows, csvWriter) self.notifyProduct.emit( '<font color=red><b>===== Finish scraping data for [%s] =====</b></font><br /><br />' % language[1]) else: if retry < 5: return self.scrapBertos(retry + 1) def scrapCategory(self, mainUrl, dupCsvRows, csvWriter): url = mainUrl self.logger.debug('Main URL: ' + url) self.notifyProduct.emit('<font color=green><b>Main URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) data = self.regex.reduceNbsp(data) self.notifyProduct.emit('<b>Try to scrap all categories.</b>') categoryChunk = self.regex.getSearchedData('(?i)<div id="contenuto1">(.*?)</div>\s*?</div>', data) if categoryChunk and len(categoryChunk) > 0: categories = self.regex.getAllSearchedData('(?i)<a href="([^"]*)"[^>]*?>([^<]*)</a>', categoryChunk) if categories and len(categories) > 0: self.notifyProduct.emit('<b>Total Categories Found: %s</b>' % str(len(categories))) for category in categories: categoryName = category[1].strip() self.scrapSubCategory(str(category[0]).strip(), categoryName, dupCsvRows, csvWriter) def scrapSubCategory(self, url, categoryName, dupCsvRows, csvWriter): self.logger.debug('Category URL: ' + url) self.notifyProduct.emit('<b>Try to scrap subcategories for: %s</b>' % categoryName) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) subCategories = self.regex.getAllSearchedData('(?i)<li\s*?><a href="([^"]*)" title="([^"]*)"', data) if subCategories and len(subCategories) > 0: self.notifyProduct.emit( '<font color=green><b>Total subcategories found %s.</b></font>' % str(len(subCategories))) for subCategory in subCategories: subCategoryName = subCategory[1].strip() self.scrapProducts(subCategory[0].strip(), categoryName, subCategoryName, dupCsvRows, csvWriter) def downloadFile(self, url, downloadPath, retry=0): print url self.notifyProduct.emit('<b>File URL: %s.</b>' % url) try: socket.setdefaulttimeout(10) opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0)) opener.addheaders = [ ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1')] urllib2.install_opener(opener) # resp = opener.open(url, timeout=30) # resp = urllib2.urlopen(url, timeout=30) resp = None try: # resp = urllib.urlopen(url) resp = opener.open(url, timeout=30) except Exception, x: print x if resp is None: return False # if resp.info()['Connection'] == 'close' or resp.getcode() != 200: # if retry < 3: # self.notifyProduct.emit('<font color=red><b>Failed to download file. Retrying...</b></font>') # return self.downloadFile(url, downloadPath, retry + 1) # else: # self.notifyProduct.emit('<font color=red><b>Failed to download file after 3 retry.</b></font>') # return print resp.info() print 'info.......' contentLength = resp.info()['Content-Length'] contentLength = self.regex.getSearchedData('(?i)^(\d+)', contentLength) totalSize = float(contentLength) directory = os.path.dirname(downloadPath) if not os.path.exists(directory): try: os.makedirs(directory) except Exception, x: print x dl_file = open(downloadPath, 'wb') currentSize = 0 CHUNK_SIZE = 32768 totalSizeKB = totalSize / 1024 if totalSize > 0 else totalSize print 'everything ok............' while True: data = None try: data = resp.read(CHUNK_SIZE) except Exception, x: print x if not data: break currentSize += len(data) dl_file.write(data) print('============> ' + \ str(round(float(currentSize * 100) / totalSize, 2)) + \ '% of ' + str(totalSize) + ' bytes') notifyDl = '===> Downloaded ' + str(round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str( totalSizeKB) + ' KB.' self.notifyProduct.emit('<b>%s</b>' % notifyDl) if currentSize >= totalSize: dl_file.close() return True
class CsProduct(QThread): notifyProduct = pyqtSignal(object) def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows0 = dupCsvReader.readCsvRow('cs_product.csv', 0) self.dupCsvRows = dupCsvReader.readCsvRow('cs_product.csv', 1) self.csvWriter = Csv('cs_product.csv') self.mainUrl = 'http://www.cs-catering-equipment.co.uk/' self.utils = Utils() if 'Product Code' not in self.dupCsvRows: self.csvWriter.writeCsvRow( ['URL', 'Product Code', 'Product Name', 'Manufacturer', 'List Price', 'Product Price', 'Discount', 'Product Short Description', 'Product Long Description', 'Product Technical Specifications', 'Warranty' , 'Delivery', 'Product Image', 'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Brand Image']) self.totalProducts = len(self.dupCsvRows) def run(self): self.scrapProduct() self.notifyProduct.emit('<font color=red><b>Finished Scraping All products.</b></font>') def scrapProduct(self): # self.logger.debug('Main URL: ' + self.mainUrl) self.notifyProduct.emit('<font color=green><b>Main URL: %s</b></font>' % self.mainUrl) data = self.spider.fetchData(self.mainUrl) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) self.notifyProduct.emit('<b>Try to scrap all categories.</b>') categories = self.regex.getAllSearchedData('(?i)<a href="([^"]*)" class="level-top" title="([^"]*)"', data) if categories and len(categories) > 0: self.notifyProduct.emit('<b>Total Categories Found: %s</b>' % str(len(categories))) for category in categories: category1Name = unicode(category[1]).strip() self.scrapCategory1Data(str(category[0]).strip(), category1Name) def scrapCategory1Data(self, url, category1Name): # self.logger.debug('Category 1 URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category1Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if categories and len(categories) > 0: self.notifyProduct.emit('<b>Total Categories Found: %s</b>' % str(len(categories))) for category in categories: self.scrapCategory2Data(category[0], category1Name, category[1]) def scrapCategory2Data(self, url, category1Name, category2Name): # self.logger.debug('Category 2 URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category2Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if categories and len(categories) > 0: for category in categories: print 'category2: ' + category[0] self.scrapCategory3Data(category[0], category1Name, category2Name, category[1]) def scrapCategory3Data(self, url, category1Name, category2Name, category3Name): # self.logger.debug('Category 3 URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category3Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categories = self.regex.getAllSearchedData( '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data) if categories and len(categories) > 0: for category in categories: print [category1Name, category2Name, category3Name, category[1]] self.scrapProductsDetails(category[0], category1Name, category2Name, category3Name, category[1]) def scrapProductsDetails(self, url, category1Name, category2Name, category3Name, category4Name): self.logger.debug('Product Details URL: ' + url) self.notifyProduct.emit('<b>Try to scrap all products under Category[%s]</b>' % category4Name) self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url) maxLimit = 25 maxLimitChunk = self.spider.fetchData(url + '?mode=list') if maxLimitChunk and len(maxLimitChunk): maxLimitChunk = self.regex.reduceNewLine(maxLimitChunk) maxLimitChunk = self.regex.reduceBlankSpace(maxLimitChunk) maxLimits = self.regex.getAllSearchedData('<option value="[^"]*limit=(\d+)[^"]*"', maxLimitChunk) # print maxLimits if maxLimits and len(maxLimits) > 0: maxLimit = max(map(int, maxLimits)) # print maxLimit # self.notifyProduct.emit('<font color=blue><b>Max Limit: %s</b></font>' % str(maxLimit)) data = self.spider.fetchData(url + '?limit=' + str(maxLimit) + '&mode=list') if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) products = self.regex.getAllSearchedData('(?i)<div class="listing-item[^"]*?">(.*?)</div>', data) if products and len(products) > 0: print len(products) # self.totalProducts += len(products) # self.logger.debug('Total Products for %s is [%s]' % (str(len(products)), self.totalProducts)) self.notifyProduct.emit('<font color=green><b>Total Products Found [%s] for category[%s]</b></font>' % ( str(len(products)), category4Name)) for product in products: productDetailUrl = self.regex.getSearchedData('(?i)<a href="([^"]*)"', product) if productDetailUrl not in self.dupCsvRows0: # self.totalProducts += 1 self.dupCsvRows0.append(productDetailUrl) self.scrapProductDetails(productDetailUrl, category1Name, category2Name, category3Name, category4Name) else: self.notifyProduct.emit( '<font color=green><b>Already Exists This Product Under Category[%s]. Skip It.</b></font>' % category4Name) self.notifyProduct.emit( '<font color=green><b>Total Products Scraped [%s].</b></font>' % str(self.totalProducts)) def scrapProductDetails(self, url, category1Name, category2Name, category3Name, category4Name): self.logger.debug('Product Detail URL: ' + url) self.notifyProduct.emit('<b>Try to scrap product details under Category[%s]</b>' % category4Name) self.notifyProduct.emit('<font color=green><b>Product Detail URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) manufacturer = self.regex.getSearchedData( '(?i)<span class="manufacturer-box-label">Manufacturer:</span>([^<]*)</p>', data) productCode = self.regex.getSearchedData( '(?i)<span class="manufacturer-box-label">Model No:</span>([^<]*)</p>', data) if productCode not in self.dupCsvRows: self.totalProducts += 1 self.dupCsvRows.append(productCode) else: self.notifyProduct.emit( '<font color=green><b>Already Exists This Product Under Category[%s]. Skip It.</b></font>' % category4Name) return productName = self.regex.getSearchedData('(?i)<div class="product-name"> <h1>([^<]*)</h1>', data) productTechnicalDesc = self.regex.getSearchedData('(?i)<div class="product-short-description">([^<]*)</div>' , data) productDescriptions = self.regex.getSearchedData('(?i)<div class="product-specs">(.*?)</div>', data) productShortDesc = '' productFullDesc = '' if productDescriptions and len(productDescriptions) > 0: print 'desc: ' + productDescriptions productShortDesc = self.regex.getSearchedData('(?i)<p>(.*?)</p>', productDescriptions) productFullDesc = '\n'.join( self.regex.getAllSearchedData('(?i)<li>([^<]*)</li>', productDescriptions)) listPriceChunk = self.regex.getSearchedData('(?i)<div class="rrp-price regular-price">(.*?)</div>', data) listPrice = '' if listPriceChunk and len(listPriceChunk) > 0: listPrice = self.regex.getSearchedData('(?i)([0-9,.]+)', listPriceChunk) savePriceChunk = self.regex.getSearchedData('(?i)<div class="regular-price saving-price">(.*?)</div>', data) savePrice = '' if savePriceChunk and len(savePriceChunk) > 0: savePrice = self.regex.getSearchedData('(?i)([0-9%]+)', savePriceChunk) priceChunk = self.regex.getSearchedData('(?i)<div class="[^"]*" id="product-price-\d+">(.*?)</div>', data) price = '' if priceChunk and len(priceChunk) > 0: price = self.regex.getSearchedData('(?i)([0-9,.]+)', priceChunk) deliveryChunk = self.regex.getSearchedData('(?i)<div class="delivery">(.*?)</div>', data) delivery = '' if deliveryChunk and len(deliveryChunk) > 0: delivery = self.regex.getSearchedData('(?i)<p>([^<]*)</p>', deliveryChunk) warrantyChunk = self.regex.getSearchedData('(?i)<div class="warranty">(.*?)</div>', data) warranty = '' if warrantyChunk and len(warrantyChunk) > 0: warranty = self.regex.getSearchedData('(?i)<p>([^<]*)</p>', warrantyChunk) ## Download and save product images productImageUrl = self.regex.getSearchedData( '(?i)src="(http://assets.cs-catering-equipment.co.uk/media/catalog/product/cache/1/image/256x/[^"]*)"', data) print productImageUrl productImage = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_.]*)$', productImageUrl) if productImage and len(productImage) > 0: print productImage self.notifyProduct.emit( '<font color=green><b>Downloading Product Image [%s]. Please wait...</b></font>' % productImage) self.downloadFile(productImageUrl, 'product_image/' + productImage) self.notifyProduct.emit('<font color=green><b>Downloaded Product Image [%s].</b></font>' % productImage) # self.utils.downloadFile(productImageUrl, 'product_image/' + productImage) ## Download and save brand images brandImageUrl = self.regex.getSearchedData( '(?i)<div class="manufacturer-box-left"><a href="[^"]*"[^>]*?><img src="([^"]*)"', data) brandImage = '' if brandImageUrl and len(brandImageUrl) > 0: brandImageUrl = self.regex.replaceData('(?i)logo/', '', brandImageUrl) brandImage = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_.]*)$', brandImageUrl) if brandImage and len(brandImage) > 0: self.notifyProduct.emit( '<font color=green><b>Downloading Brand Image [%s]. Please wait...</b></font>' % brandImage) # self.utils.downloadFile(brandImageUrl, 'brand_image/' + brandImage) self.downloadFile(brandImageUrl, 'brand_image/' + brandImage) self.notifyProduct.emit('<font color=green><b>Downloaded Brand Image [%s].</b></font>' % brandImage) csvData = [url, productCode, productName, manufacturer, listPrice, price, savePrice, productShortDesc, productFullDesc, productTechnicalDesc, warranty, delivery, productImage, category1Name, category2Name, category3Name, category4Name, brandImage] self.csvWriter.writeCsvRow(csvData) self.logger.debug(unicode(csvData)) self.notifyProduct.emit('<b>Product Details: %s</b>' % unicode(csvData)) def downloadFile(self, url, downloadPath, retry=0): print url self.notifyProduct.emit('<b>File URL: %s.</b>' % url) try: socket.setdefaulttimeout(10) opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0)) opener.addheaders = [ ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1')] urllib2.install_opener(opener) # resp = opener.open(url, timeout=30) # resp = urllib2.urlopen(url, timeout=30) resp = None try: # resp = urllib.urlopen(url) resp = opener.open(url, timeout=30) except Exception, x: print x if resp is None: return False print resp.info() print 'info.......' contentLength = resp.info()['Content-Length'] contentLength = self.regex.getSearchedData('(?i)^(\d+)', contentLength) totalSize = float(contentLength) directory = os.path.dirname(downloadPath) if not os.path.exists(directory): try: os.makedirs(directory) except Exception, x: print x dl_file = open(downloadPath, 'wb') currentSize = 0 CHUNK_SIZE = 32768 totalSizeKB = totalSize / 1024 if totalSize > 0 else totalSize print 'everything ok............' while True: data = None try: data = resp.read(CHUNK_SIZE) except Exception, x: print x if not data: break currentSize += len(data) dl_file.write(data) print('============> ' +\ str(round(float(currentSize * 100) / totalSize, 2)) +\ '% of ' + str(totalSize) + ' bytes') notifyDl = '===> Downloaded ' + str(round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str( totalSizeKB) + ' KB.' self.notifyProduct.emit('<b>%s</b>' % notifyDl) if currentSize >= totalSize: dl_file.close() return True
class Nisbets: def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.csvWriter = Csv('nisbets.csv') self.mainUrl = 'http://www.nisbets.co.uk' csvHeaderList = ['Category', 'Product Image Url', 'Product Code', 'Product Name', 'Price'] self.csvWriter.writeCsvRow(csvHeaderList) def scrapData(self): self.logger.debug('===== URL [' + self.mainUrl + '] =====') data = self.spider.fetchData(self.mainUrl) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) data = self.regex.getSearchedData('(?i)<div class="cms-left-nav-category">(.*?)</ul>', data) if data: links = self.regex.getAllSearchedData('(?i)<a href="([^"]*)"', data) if links: for link in links: self.scrapLinkData(self.mainUrl + link) def scrapLinkData(self, link): self.logger.debug('== Link URL [' + link + '] ==') data = self.spider.fetchData(link) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) data = self.regex.getSearchedData('(?i)<h3>Brand</h3> <ul class="subCat02 clear-fix">(.*?)</ul>', data) if data: links = self.regex.getAllSearchedData('(?i)<a href="([^"]*)"', data) if links: for link in links: self.scrapInfo(self.mainUrl + link) def scrapInfo(self, link): self.logger.debug('= Info URL [' + link + '] =') data = self.spider.fetchData(link) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category = self.regex.getSearchedData('(?i)<li><h3>Category</h3></li> <li class="remCont"> <span class="block">([^<]*)</span>', data) allInfo = self.regex.getAllSearchedData('(?i)<div class="product-list-row clear-after">(.*?)</fieldset>', data) if allInfo: for info in allInfo: csvData = [] csvData.append(category) grpData = self.regex.getSearchedDataGroups('(?i)<img class="primaryImage" src="([^"]*)" alt="([^"]*)" />', info) if grpData.group(1): imageUrl = grpData.group(1) imageUrl = self.regex.replaceData('(?i)medium', 'xlarge', imageUrl) csvData.append(self.mainUrl + imageUrl) else: csvData.append('') csvData.append(grpData.group(2)) name = self.regex.getSearchedData('(?i)<h3 class="product-name"> <a href="[^"]*">([^<]*)</a>', info) csvData.append(name) price = self.regex.getSearchedData(u'(?i)<div class="reduced-price"> <span class="bold">([^<]*)</span>', info) csvData.append(price.strip()[1:]) self.logger.debug('Scraped Data ' + str(csvData)) self.csvWriter.writeCsvRow(csvData)
class NisbetProduct(QtCore.QThread): scrapProductData = QtCore.pyqtSignal(object) stopThread = QtCore.pyqtSignal(int) def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('nisbets.csv', 0) self.csvWriter = Csv('nisbets.csv') self.mainUrl = 'http://www.nisbets.co.uk' csvHeaderList = [ 'URL', 'Product Code', 'Product Technical Specifications', 'Product Name', 'Brand', 'Product Price', 'Product Short Description', 'Product Long Description', 'Image File Name', 'User Manual File Name', 'Exploded View File Name', 'Spares Code', 'Accessories', 'Product Status' 'Category1', 'Category2', 'Category3', 'Category4' ] if 'URL' not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList[0]) self.utils = Utils() def run(self): self.scrapData() def stop(self): self.isExiting = True def scrapData(self): if self.isExiting: return self.scrapProductData.emit( '<font color=green><b>Main URL: </b>%s</font>' % self.mainUrl) self.logger.debug('===== URL [' + self.mainUrl + '] =====') data = self.spider.fetchData(self.mainUrl) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category1Chunk = self.regex.getAllSearchedData( '(?i)<li id="li-id-\d+">(.*?)</ul> </li>', data) if category1Chunk: for category1Data in category1Chunk: category1 = self.regex.getSearchedData( '(?i)<a href="[^"]*">([^<]*)</a>', category1Data) category2Chunk = self.regex.getAllSearchedData( '(?i)<li><a href="([^"]*)">([^<]*)</a>', category1Data) if category2Chunk: for category2Data in category2Chunk: self.scrapCategory2Data( self.mainUrl + category2Data[0], category1, category2Data[1]) self.scrapProductData.emit( '<font color=red><b>Finish Scraping Product data from %s</b></font>' % self.mainUrl) def scrapCategory2Data(self, url, category1, category2): if self.isExiting: return self.scrapProductData.emit('<b>Category 2 URL: </b>%s' % url) self.logger.debug('== Category 2 URL [' + url + '] ==') data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category3Chunks = self.regex.getSearchedData( '(?i)<ul class="topCat clear-fix">(.*?)</ul>', data) if category3Chunks: category3Chunk = self.regex.getAllSearchedData( '(?i)<a href="([^"]*)">([^<]*)<', category3Chunks) if category3Chunk: for category3Data in category3Chunk: self.scrapCategory3Data( self.mainUrl + category3Data[0], category1, category2, category3Data[1]) def scrapCategory3Data(self, url, category1, category2, category3): if self.isExiting: return self.scrapProductData.emit('<b>Category 3 URL: </b>%s' % url) self.logger.debug('== Category 3 URL [' + url + '] ==') data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category4Chunks = self.regex.getSearchedData( '(?i)<ul class="topCat clear-fix">(.*?)</ul>', data) if category4Chunks: category4Chunk = self.regex.getAllSearchedData( '(?i)<a href="([^"]*)">([^<]*)<', category4Chunks) if category4Chunk: for category4Data in category4Chunk: category4Url = self.mainUrl + category4Data[0] self.scrapCategory4Data(category4Url, category1, category2, category3, category4Data[1]) def scrapCategory4Data(self, url, category1, category2, category3, category4): if self.isExiting: return self.scrapProductData.emit('<b>Category 4 URL: </b>%s' % url) self.logger.debug('== Category 4 URL [' + url + '] ==') data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categoryChunk = self.regex.getAllSearchedData( '(?i)<div class="product-list-row clear-after">(.*?)</fieldset>', data) if categoryChunk: for categoryData in categoryChunk: if self.isExiting: return productInfo = self.regex.getSearchedDataGroups( '(?i)<h3 class="product-name"> <a href="([^"]*)"[^>]*?>([^<]*)</a>', categoryData) productUrl = self.mainUrl + productInfo.group(1) productName = productInfo.group(2) if productUrl not in self.dupCsvRows: self.dupCsvRows.append(productUrl) else: self.scrapProductData.emit( '<font color=green><b>Already exists this item in csv Skip it</b></font>' ) self.logger.debug( '========= Already exists this item Skip it ===========' ) return productImageInfo = self.regex.getSearchedDataGroups( '(?i)<img class="primaryImage" src="([^"]*)" alt="([^"]*)"', categoryData) image = self.regex.replaceData( '(?i)medium', 'xlarge', str(productImageInfo.group(1))) productImageUrl = self.mainUrl + image productImage = self.regex.getSearchedData( '(?i)/([a-zA-Z0-9-_.]*)$', image) self.utils.downloadFile(productImageUrl, 'images/' + productImage) productCode = productImageInfo.group(2) productTechSpecs = self.regex.getSearchedData( '(?i)<p class="description">([^<]*)</p>', categoryData) brandName = self.regex.getSearchedData( '(?i)<img class="brand-image" src="[^"]*" alt="([^"]*)"', categoryData) price = self.regex.getSearchedData( '(?i)<div class="reduced-price"> <span class="[^"]*">([^<]*)</span>', categoryData) if price: price = price.strip()[1:] productStatus = self.regex.getSearchedData( '(?i)<div class="availibility"> <img alt="([^"]*)"', categoryData) productDesc = '' productLongDesc = '' spareCodes = '' accessoryCode = '' userManual = '' explodedView = '' self.scrapProductData.emit( '<br /><font color=green><b>Product Details URL: </b>%s</font>' % productUrl) productChunk = self.spider.fetchData(productUrl) if productChunk: productChunk = self.regex.reduceNewLine(productChunk) productChunk = self.regex.reduceBlankSpace( productChunk) productDesc = self.regex.getSearchedData( '(?i)<div class="productDesc"> <h1 class="[^"]*"[^>]*?>[^<]*?</h1>.*?<p>([^<]*)</p>', productChunk) productLongDesc = self.regex.getSearchedData( '(?i)<div class="info-product[^>]*?>(.*?)</div>', productChunk) otherUrl = self.regex.getSearchedData( '(?i)(^.*?/)[a-zA-Z0-9._-]*?$', productUrl) self.logger.debug('== Common Product URL [' + otherUrl + '] ==') sparesUrl = otherUrl + "AjaxProductSpares.raction" self.logger.debug('== Spares URL [' + sparesUrl + '] ==') spares = self.spider.fetchData(sparesUrl) if spares: spares = self.regex.getAllSearchedData( '(?i)<p class="code"><span class="bold">Code:</span>([^<]*)</p>', spares) if spares: spareCodes = ', '.join(spares) accessoriesUrl = otherUrl + "AjaxProductAccessories.raction" self.logger.debug('== Accessories URL [' + accessoriesUrl + '] ==') accessories = self.spider.fetchData(accessoriesUrl) if accessories: accessories = self.regex.getAllSearchedData( '(?i)<p class="code"><span class="bold">Code:</span>([^<]*)</p>', accessories) if accessories: accessoryCode = ', '.join(accessories) docUrl = otherUrl + "AjaxProductDocuments.raction" self.logger.debug('== Document URL[' + docUrl + '] ==') userManuals = self.spider.fetchData(docUrl) if userManuals: userManual = self.regex.getSearchedData( '(?i)<a class="document-icon" href="([^"]*)"[^>]*?>Download User Manual</a>', userManuals) self.logger.debug('Manual URL: ' + userManual) if userManual: userManualUrl = self.mainUrl + self.regex.replaceData( ' ', '%20', userManual) self.logger.debug('User Manual URL: ' + userManualUrl) self.scrapProductData.emit( '<b>User Manual PDF URL: </b>%s' % userManualUrl) userManual = self.regex.getSearchedData( '(?i)/([a-zA-Z0-9-_. ]*)$', userManual) userManual = self.regex.replaceData( '\s+', '_', userManual.strip()) self.scrapProductData.emit( '<font color=green><b>Downloading User Manual: </b>%s <b>Please Wait...</b>' % userManual) self.utils.downloadFile( userManualUrl, 'user_manual/' + userManual) explodedView = self.regex.getSearchedData( '(?i)<a class="document-icon" href="([^"]*)"[^>]*?>Download Exploded Diagram</a>', userManuals) if explodedView: explodedViewUrl = self.mainUrl + self.regex.replaceData( ' ', '%20', explodedView) self.scrapProductData.emit( '<b>Exploded Diagram PDF URL: </b>%s' % explodedViewUrl) explodedView = self.regex.getSearchedData( '(?i)/([a-zA-Z0-9-_. ]*)$', explodedView) explodedView = self.regex.replaceData( '\s+', '_', explodedView.strip()) self.scrapProductData.emit( '<font color=green><b>Downloading Exploded Diagram: </b>%s <b>Please Wait...</b>' % explodedView) self.utils.downloadFile( explodedViewUrl, 'exploded_view/' + explodedView) csvData = [ productUrl, productCode, productTechSpecs, productName, brandName, price.strip(), productDesc, productLongDesc, productImage, userManual, explodedView, spareCodes, accessoryCode, productStatus, category1, category2, category3, category4 ] self.csvWriter.writeCsvRow(csvData) self.logger.debug('Scraped data ' + str(csvData)) self.scrapProductData.emit( '<div><b>Scraped Data: </b>%s<br /></div>' % str(csvData))