def __init__(self, input_file, output_file): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.input_file = input_file self.output_file = output_file
def __init__(self, spider, memberList, subject, message): QThread.__init__(self) # self.spider = Spider() self.spider = spider self.regex = Regex() self.memberList = memberList self.subject = unicode(subject) self.message = unicode(message)
def __init__(self): QObject.__init__(self) self.regex = Regex() self.title = '' self.webView = QWebView() self.webView.settings().setAttribute(QWebSettings.AutoLoadImages, True) self.webView.settings().setAttribute(QWebSettings.JavascriptEnabled, True) self.webView.settings().setAttribute(QWebSettings.PluginsEnabled, True) self.webView.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, True) self.pdfPrinter = QPrinter() self.webView.loadFinished.connect(self.convertToPdf)
def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'http://topsy.com/s?' self.csvWriter = Csv('topsy.csv') csvDataHeader = [ 'Keyword', 'Tweets in last 30 days', 'Topsy Sentiment Score', ' Date of scrape' ] self.csvWriter.writeCsvRow(csvDataHeader)
def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.browser = BrowserUtil() self.regex = Regex() self.utils = Utils() self.csvHeader = [ 'Category', 'Sub Category 1', 'Sub Category 2', 'Product Code', 'Product Name', 'Product ShortName', 'Product Description', 'List Price', 'Vendor Price', 'Availability', 'Power', 'Size', 'KW', 'Weight(kg)', 'Other Tech', 'Pdf File', 'Image File' ] self.totalProducts = 0
def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'https://www.google.com/finance?' self.main_url = 'https://www.google.com' self.csvWriter = Csv('google_finance.csv') csvDataHeader = [ 'Ticker Symbol', 'Quarter End', 'Revenue', 'Total Revenue', 'Date of Scrape' ] self.csvWriter.writeCsvRow(csvDataHeader)
def __init__(self, spider, url, pageRange=None): QThread.__init__(self) # self.spider = Spider() self.spider = spider self.regex = Regex() self.url = url self.startPage = None self.endPage = None if self.regex.isFoundPattern('(?i)(\d+)-(\d+)', str(pageRange).strip()): pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)-(\d+)', str(pageRange).strip()) self.startPage = int(pageRangeFormat.group(1)) self.endPage = int(pageRangeFormat.group(2)) elif self.regex.isFoundPattern('(?i)(\d+)', str(pageRange).strip()): pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)', str(pageRange).strip()) self.startPage = int(pageRangeFormat.group(1)) self.endPage = self.startPage
def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.mainUrl = 'http://www.paodeacucar.com.br/' self.url = 'http://www.paodeacucar.com.br/' dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('paodeacucar.csv', 4) self.csvWriter = Csv('paodeacucar.csv') csvDataHeader = ['SKU', 'Category', 'Subcategory', 'Name', 'URL', 'URL Image', 'Details', 'Nutrients Table html code', 'Price from, 28/abr/14', '28/abr/14'] if 'URL' not in self.dupCsvRows: self.dupCsvRows.append(csvDataHeader) self.csvWriter.writeCsvRow(csvDataHeader)
def __init__(self, parent=None): super(MainForm, self).__init__(parent) self.regex = Regex() self.alreadyClickedA = False self.alreadyClickedB = False self.fileDir = None self.fileDirB = None self.fileName = None self.fileNameB = None self.totalUrlA = 0 self.totalUrlB = 0 self.currentUrlA = 0 self.currentUrlB = 0 self.pdfCounter = 1 self.pdfCounterB = 1 self.typeName = 'B' self.setupUI()
def __init__(self): self.browser = None self.url = "http://environmentclearance.nic.in/Search.aspx" self.statuses = [] self.categories = [] self.years = [] self.states = [] self.csvDataHeader = [ 'Status', 'Category', 'Year', 'State', 'Serial No', 'Proposal details', 'Location', 'Important Date', 'Category', 'Company Proponent' ] self.logger = LogManager(__name__) self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('env_clearance.csv') self.csvWriter = Csv('env_clearance.csv') if self.csvDataHeader not in self.dupCsvRows: self.csvWriter.writeCsvRow(self.csvDataHeader) self.dupCsvRows.append(self.csvDataHeader)
def downloadFile(self, url, downloadPath, proxyHandler=None): try: regex = Regex() opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0)) opener.addheaders = [ config.USER_AGENT, ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' ), ('Connection', 'keep-alive') ] if proxyHandler is not None: opener.add_handler(proxyHandler) resp = urllib2.urlopen(url, timeout=30) contentLength = resp.info()['Content-Length'] contentLength = regex.getSearchedData('(?i)^(\d+)', contentLength) totalSize = float(contentLength) directory = os.path.dirname(downloadPath) if not os.path.exists(directory): os.makedirs(directory) dl_file = open(downloadPath, 'wb') currentSize = 0 CHUNK_SIZE = 32768 while True: data = resp.read(CHUNK_SIZE) if not data: break currentSize += len(data) dl_file.write(data) print('============> ' + str(round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str(totalSize) + ' bytes') if currentSize >= totalSize: dl_file.close() return True except Exception, x: print x
def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('nisbets.csv', 0) self.csvWriter = Csv('nisbets.csv') self.mainUrl = 'http://www.nisbets.co.uk' csvHeaderList = [ 'URL', 'Product Code', 'Product Technical Specifications', 'Product Name', 'Brand', 'Product Price', 'Product Short Description', 'Product Long Description', 'Image File Name', 'User Manual File Name', 'Exploded View File Name', 'Spares Code', 'Accessories', 'Product Status' 'Category1', 'Category2', 'Category3', 'Category4' ] if 'URL' not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList[0]) self.utils = Utils()
def __init__(self): self.regex = Regex()
def __init__(self, username, password): QThread.__init__(self) self.spider = Spider() self.regex = Regex() self.username = username self.password = password