def main(argv): (basedir, commonXLSStore) = readArgs(argv) client = pymongo.MongoClient("localhost", 27017) db = client.marketing_campaigns companyList = getDirs(basedir) for company in companyList: excelPath = basedir + '/' + company + '/' + 'CTR' try: campaignList = getDirs(excelPath) for j in range(len(campaignList)): baseCapmaignPath = excelPath + '/' + str(campaignList[j]) os.chdir(baseCapmaignPath) pageWidth = 0 campaignFilename = '2_Campaign_files/html.html' if not os.path.isfile(campaignFilename): campaignFilename = '2_Campaign.html' if os.path.isfile(campaignFilename): wParser = WidthParser() wParser.feed(file2String(campaignFilename)) pageWidth = wParser.contentWidth else: raise OSError if os.path.isfile(ctrFile): dbRecord = [item for item in db.campaigns.find({ 'path': baseCapmaignPath })] pageHeight = int(dbRecord[0]['height']) quadrantClickCount = defaultdict(int) quadrantPercentCount = defaultdict(float) (quadrantClickCount, quadrantPercentCount) = quadrantCount(ctrFile, pageHeight, pageWidth) else: raise OSError except OSError: print 'No CTR found for ' + company
def main(argv): global basedir, commonXLSStore (basedir, commonXLSStore) = readArgs(argv) # Start node server to save page dimensions os.system('launchctl load ~/Library/LaunchAgents/org.mongodb.mongod.plist') os.system('launchctl start org.mongodb.mongod') os.system('forever start local-server/app.js') # Mongo dababase store page heights received from JS script client = pymongo.MongoClient("localhost", 27017) db = client.marketing_campaigns companyList = getDirs(basedir) for company in companyList: excelPath = basedir + '/' + company + '/' + 'CTR' try: campaignList = getDirs(excelPath) # Sort campaign IDs to make it look nicer camaignIDs = [int(k) for k in campaignList] camaignIDs.sort() # Create Excel file and add common header row workbook = xlsxwriter.Workbook(excelPath + '/' + company+'_CTR.xlsx') worksheet = workbook.add_worksheet() for i in range(len(excelHeader)): headerIndex[ excelHeader[i] ] = i worksheet.write(0, i, excelHeader[i]) # Iterate campaigns and add info to Excel for j in range(len(camaignIDs)): baseCapmaignPath = excelPath + '/' + str(camaignIDs[j]) # Mind the header row excelRow = j + 1 worksheet.write(excelRow, headerIndex['Campaign code'], camaignIDs[j]) # Initialize XLS values to zeros for z in range(headerIndex['Campaign code'] + 1, len(excelHeader)): worksheet.write(excelRow, z, 0) # Get stats based on XLS files os.chdir(baseCapmaignPath) for xlsFile in glob.glob('*.xls'): statName = re.sub('\.xls', '', xlsFile) # Excel filenames usually match Excel column name try: headerIndex[statName] except KeyError: statName = findClosestString(statName, excelHeader) if statName is None: continue rowCount = countExcelRows(baseCapmaignPath + '/' + xlsFile) worksheet.write(excelRow, headerIndex[statName], rowCount) # Get stats based on HTML files parser = CampaignHTMLParser() wParser = WidthParser() pageWidth = pageHeight = 0 # For some companies there is only 2_Campaign_files.html campaignFilename = '2_Campaign_files/html.html' if not os.path.isfile(campaignFilename): campaignFilename = '2_Campaign.html' if os.path.isfile(campaignFilename): parser.feed(file2String(campaignFilename)) totalPageSize = parser.imageSizeKb + round(os.path.getsize(campaignFilename) * 0.000976562) worksheet.write(excelRow, headerIndex['Page size (KB)'], totalPageSize) subjectWordCount = len(parser.emailSubject.split()) bodyWordCount = len(parser.emailBody.split()) - subjectWordCount worksheet.write(excelRow, headerIndex['Subject line'], parser.emailSubject) worksheet.write(excelRow, headerIndex['Number of words in subject line'], subjectWordCount) worksheet.write(excelRow, headerIndex['Number of links'], parser.linkCount) worksheet.write(excelRow, headerIndex['Number of words in body'], bodyWordCount) worksheet.write(excelRow, headerIndex['Number of images'], parser.imageCount) # Save body text as separate file cleanedBody = saveBodyText(parser.emailBody) # Estimate width and height of the page wParser.feed(file2String(campaignFilename)) pageWidth = wParser.contentWidth worksheet.write(excelRow, headerIndex['Page width (px)'], pageWidth) else: print 'No', baseCapmaignPath + '/' + campaignFilename # Get delivery date camgainDetailsParser = CampaignHTMLParser() if not os.path.isfile(campgainDetailsFile): dirFiles = [ fileName for fileName in glob.glob('*') ] matchedFilename = findClosestString(campgainDetailsFile, dirFiles) if matchedFilename is None: print 'No', baseCapmaignPath + '/' + campgainDetailsFile else: camgainDetailsParser.feed(file2String(matchedFilename)) else: camgainDetailsParser.feed(file2String(campgainDetailsFile)) worksheet.write(excelRow, headerIndex['Delivery date'], camgainDetailsParser.delieverDate) worksheet.write(excelRow, headerIndex['Unique people opened'], camgainDetailsParser.uniqueOpened) worksheet.write(excelRow, headerIndex['Total clicks'], camgainDetailsParser.totalClicks) worksheet.write(excelRow, headerIndex['CTR'], camgainDetailsParser.ctr) # Get CTR counts if os.path.isfile(campaignFilename): dbRecord = [item for item in db.campaigns.find({ 'path': baseCapmaignPath })] # Calculate page height only if not previously calculated if (not len(dbRecord)): appendJSToHTML(campaignFilename, baseCapmaignPath) os.system('open -g ' + campaignFilename) else: pageHeight = int(dbRecord[0]['height']) worksheet.write(excelRow, headerIndex['Page length (px)'], pageHeight) else: print 'No', baseCapmaignPath + '/' + ctrFile if os.path.isfile(ctrFile): if pageHeight and pageWidth: quadrantClickCount = defaultdict(int) quadrantPercentCount = defaultdict(float) (quadrantClickCount, quadrantPercentCount) = quadrantCount(ctrFile, pageHeight, pageWidth) worksheet.write(excelRow, headerIndex['1_NE number of clicks'], quadrantClickCount['I']) worksheet.write(excelRow, headerIndex['1_NE percent of clicks'], quadrantPercentCount['I']) worksheet.write(excelRow, headerIndex['2_NW number of clicks'], quadrantClickCount['II']) worksheet.write(excelRow, headerIndex['2_NW percent of clicks'], quadrantPercentCount['II']) worksheet.write(excelRow, headerIndex['3_SW number of clicks'], quadrantClickCount['III']) worksheet.write(excelRow, headerIndex['3_SW percent of clicks'], quadrantPercentCount['III']) worksheet.write(excelRow, headerIndex['4_SE number of clicks'], quadrantClickCount['IV']) worksheet.write(excelRow, headerIndex['4_SE percent of clicks'], quadrantPercentCount['IV']) else: print 'WARN: 2nd run required pageHeight=', pageHeight, 'pageWidth=', pageWidth, baseCapmaignPath else: print 'No', baseCapmaignPath + '/' + ctrFile workbook.close() # Debugging: copy XLS files to common place os.system('cp "' + excelPath + '/' + company+'_CTR.xlsx" ' + commonXLSStore) except OSError: print 'No CTR found for ' + company # Start node server to save page dimensions os.system('forever stop local-server/app.js')