Python WidthParser примеры использования

Язык программирования: Python

Пространство имен/Пакет: htmlParsers.WidthParser

Класс/Тип: WidthParser

Примеров на hotexamples.com: 2

Python WidthParser - 2 примера найдено. Это лучшие примеры Python кода для htmlParsers.WidthParser.WidthParser, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

feed(2)

Пример #1

Показать файл

Файл: dumpQuadrantData.py Проект: evgeniyarbatov/dev-practice

def main(argv):
	(basedir, commonXLSStore) = readArgs(argv)
	client = pymongo.MongoClient("localhost", 27017)
	db = client.marketing_campaigns

	companyList = getDirs(basedir)
	for company in companyList:
		excelPath = basedir + '/' + company + '/' + 'CTR'

		try:
			campaignList = getDirs(excelPath)
			for j in range(len(campaignList)):
				baseCapmaignPath = excelPath + '/' + str(campaignList[j])
				os.chdir(baseCapmaignPath)

				pageWidth = 0

				campaignFilename = '2_Campaign_files/html.html'
				if not os.path.isfile(campaignFilename):
					campaignFilename = '2_Campaign.html'

				if os.path.isfile(campaignFilename):
					wParser = WidthParser()
					wParser.feed(file2String(campaignFilename))
					pageWidth = wParser.contentWidth
				else:
					raise OSError

				if os.path.isfile(ctrFile):
					dbRecord = [item for item in db.campaigns.find({ 'path': baseCapmaignPath })]
					pageHeight = int(dbRecord[0]['height'])

					quadrantClickCount = defaultdict(int)
					quadrantPercentCount = defaultdict(float)

					(quadrantClickCount, quadrantPercentCount) = quadrantCount(ctrFile, pageHeight, pageWidth)

				else:
					raise OSError

		except OSError:
			print 'No CTR found for ' + company

Пример #2

Показать файл

Файл: dir-iterate.py Проект: evgeniyarbatov/dev-practice

def main(argv):
	global basedir, commonXLSStore
	(basedir, commonXLSStore) = readArgs(argv)

	# Start node server to save page dimensions
	os.system('launchctl load ~/Library/LaunchAgents/org.mongodb.mongod.plist')
	os.system('launchctl start org.mongodb.mongod')
	os.system('forever start local-server/app.js')

	# Mongo dababase store page heights received from JS script
	client = pymongo.MongoClient("localhost", 27017)
	db = client.marketing_campaigns

	companyList = getDirs(basedir)

	for company in companyList:
		excelPath = basedir + '/' + company + '/' + 'CTR'

		try:
			campaignList = getDirs(excelPath)

			# Sort campaign IDs to make it look nicer
			camaignIDs = [int(k) for k in campaignList]
			camaignIDs.sort()

			# Create Excel file and add common header row
			workbook = xlsxwriter.Workbook(excelPath + '/' + company+'_CTR.xlsx')
			worksheet = workbook.add_worksheet()
			for i in range(len(excelHeader)):
				headerIndex[ excelHeader[i] ] = i
				worksheet.write(0, i, excelHeader[i])

			# Iterate campaigns and add info to Excel
			for j in range(len(camaignIDs)):
				baseCapmaignPath = excelPath + '/' + str(camaignIDs[j])
				# Mind the header row
				excelRow = j + 1

				worksheet.write(excelRow, headerIndex['Campaign code'], camaignIDs[j])

				# Initialize XLS values to zeros
				for z in range(headerIndex['Campaign code'] + 1, len(excelHeader)):
					worksheet.write(excelRow, z, 0)

				# Get stats based on XLS files
				os.chdir(baseCapmaignPath)
				for xlsFile in glob.glob('*.xls'):
					statName = re.sub('\.xls', '', xlsFile)

					# Excel filenames usually match Excel column name
					try:
						headerIndex[statName]
					except KeyError:
						statName = findClosestString(statName, excelHeader)
						if statName is None:
							continue

					rowCount = countExcelRows(baseCapmaignPath + '/' + xlsFile)
					worksheet.write(excelRow, headerIndex[statName], rowCount)	

				# Get stats based on HTML files
				parser = CampaignHTMLParser()
				wParser = WidthParser()
				pageWidth = pageHeight = 0

				# For some companies there is only 2_Campaign_files.html
				campaignFilename = '2_Campaign_files/html.html'
				if not os.path.isfile(campaignFilename):
					campaignFilename = '2_Campaign.html'

				if os.path.isfile(campaignFilename):
					parser.feed(file2String(campaignFilename))

					totalPageSize = parser.imageSizeKb + round(os.path.getsize(campaignFilename) * 0.000976562)
					worksheet.write(excelRow, headerIndex['Page size (KB)'], totalPageSize)

					subjectWordCount = len(parser.emailSubject.split())
					bodyWordCount = len(parser.emailBody.split()) - subjectWordCount

					worksheet.write(excelRow, headerIndex['Subject line'], parser.emailSubject)
					worksheet.write(excelRow, headerIndex['Number of words in subject line'], subjectWordCount)
					worksheet.write(excelRow, headerIndex['Number of links'], parser.linkCount)
					worksheet.write(excelRow, headerIndex['Number of words in body'], bodyWordCount)
					worksheet.write(excelRow, headerIndex['Number of images'], parser.imageCount)

					# Save body text as separate file
					cleanedBody = saveBodyText(parser.emailBody)

					# Estimate width and height of the page
					wParser.feed(file2String(campaignFilename))
					pageWidth = wParser.contentWidth
					worksheet.write(excelRow, headerIndex['Page width (px)'], pageWidth)
				else:
					print 'No', baseCapmaignPath + '/' + campaignFilename

				# Get delivery date
				camgainDetailsParser = CampaignHTMLParser()

				if not os.path.isfile(campgainDetailsFile):
					dirFiles = [ fileName for fileName in glob.glob('*') ]
					matchedFilename = findClosestString(campgainDetailsFile, dirFiles)

					if matchedFilename is None:
						print 'No', baseCapmaignPath + '/' + campgainDetailsFile
					else:
						camgainDetailsParser.feed(file2String(matchedFilename))
				else:
					camgainDetailsParser.feed(file2String(campgainDetailsFile))

				worksheet.write(excelRow, headerIndex['Delivery date'], camgainDetailsParser.delieverDate)
				worksheet.write(excelRow, headerIndex['Unique people opened'], camgainDetailsParser.uniqueOpened)
				worksheet.write(excelRow, headerIndex['Total clicks'], camgainDetailsParser.totalClicks)
				worksheet.write(excelRow, headerIndex['CTR'], camgainDetailsParser.ctr)

				# Get CTR counts
				if os.path.isfile(campaignFilename):
					dbRecord = [item for item in db.campaigns.find({ 'path': baseCapmaignPath })]

					# Calculate page height only if not previously calculated
					if (not len(dbRecord)):
						appendJSToHTML(campaignFilename, baseCapmaignPath)
						os.system('open -g ' + campaignFilename)
					else:
						pageHeight = int(dbRecord[0]['height'])
						worksheet.write(excelRow, headerIndex['Page length (px)'], pageHeight)
				else:
					print 'No', baseCapmaignPath + '/' + ctrFile

				if os.path.isfile(ctrFile):
					if pageHeight and pageWidth:
						quadrantClickCount = defaultdict(int)
						quadrantPercentCount = defaultdict(float)

						(quadrantClickCount, quadrantPercentCount) = quadrantCount(ctrFile, pageHeight, pageWidth)

						worksheet.write(excelRow, headerIndex['1_NE number of clicks'], quadrantClickCount['I'])
						worksheet.write(excelRow, headerIndex['1_NE percent of clicks'], quadrantPercentCount['I'])

						worksheet.write(excelRow, headerIndex['2_NW number of clicks'], quadrantClickCount['II'])
						worksheet.write(excelRow, headerIndex['2_NW percent of clicks'], quadrantPercentCount['II'])

						worksheet.write(excelRow, headerIndex['3_SW number of clicks'], quadrantClickCount['III'])
						worksheet.write(excelRow, headerIndex['3_SW percent of clicks'], quadrantPercentCount['III'])

						worksheet.write(excelRow, headerIndex['4_SE number of clicks'], quadrantClickCount['IV'])
						worksheet.write(excelRow, headerIndex['4_SE percent of clicks'], quadrantPercentCount['IV'])

					else:
						print 'WARN: 2nd run required pageHeight=', pageHeight, 'pageWidth=', pageWidth, baseCapmaignPath
				else:
					print 'No', baseCapmaignPath + '/' + ctrFile

			workbook.close()

			# Debugging: copy XLS files to common place
			os.system('cp "' + excelPath + '/' + company+'_CTR.xlsx" ' + commonXLSStore)

		except OSError:
			print 'No CTR found for ' + company

	# Start node server to save page dimensions
	os.system('forever stop local-server/app.js')