def downloadAvailableEMails(): tableName = N.JOBS_DETAIL_TABLENAME startDate = datetime.datetime(2000, 1, 1, tzinfo=dateutil.tz.tzoffset(None, 0)) connection = MySQLdb.connect(host=N.SQL_HOST_NAME, user=N.SQL_USER_NAME, passwd=N.SQL_PASSWD, db=N.SQL_DB) cursor = connection.cursor() sqlCMD = "SELECT max(pDate) from %s" % (tableName) cursor.execute(sqlCMD) results = cursor.fetchall() if len(results) == 1 and results[0][0] != None: startDate = results[0][0] emails = [email for email in EMailDownloader.getEmails(since=startDate)] if len(emails) == 0: return 0 cPickle.dump( emails, open(N.MAILARCHIVES + "MailArchive%s.pkl" % (datetime.datetime.now()), 'wb')) jobs = [] for email in emails: jobs.extend(email.parse()) EMailSqlDump.dumpJobOffersToSQL(jobs) return len(emails)
def finish(self, cursor): try: jobOffers = list(self.fetchJobOffers()) if len(jobOffers) == 0: raise Exception("!No job offers!") EMailSqlDump.dumpJobOfferSummary(jobOffers, self.timestamp, tableName = N.JOBS_SUMMARY_TABLENAME, cursor=cursor) EMailSqlDump.dumpBand5OfferSummary(jobOffers, self.timestamp, tableName = N.JOBS_BAND5_SUMMARY_TABLENAME, cursor=cursor) EMailUpdates.sendEmailUpdate() logging.info("Finish, processed count: " + str(self.processed_count)) return True except: logging.exception("Failed to finish UpdateJob") return False
def process_email(self, email, cursor): jobOffers = email.parse() if len(jobOffers) == 0: logging.debug("Could not find jobOffers for email at: " + str(email.dateReceivedStr).strip() + ", subject: " + str(email.subject).strip()); return False alreadyPostedJobs = EMailSqlDump.getPostedJobsAndCounts(cursor, N.JOBS_DETAIL_TABLENAME, jobOffers) for jobOffer in jobOffers: if str(jobOffer.uniqueURLID) in alreadyPostedJobs: alreadyPostedJobs[str(jobOffer.uniqueURLID)]+=1 EMailSqlDump.updateRepostedCount(cursor, N.JOBS_DETAIL_TABLENAME, int(jobOffer.uniqueURLID), alreadyPostedJobs[str(jobOffer.uniqueURLID)]) else: EMailSqlDump.dumpJobOffer(jobOffer, tableName = N.JOBS_DETAIL_TABLENAME, cursor = cursor, timestamp = self.timestamp) return True
def downloadAvailableEMails(): tableName = N.JOBS_DETAIL_TABLENAME startDate = datetime.datetime(2000,1,1, tzinfo = dateutil.tz.tzoffset(None,0)) connection = MySQLdb.connect(host=N.SQL_HOST_NAME,user=N.SQL_USER_NAME,passwd=N.SQL_PASSWD,db=N.SQL_DB) cursor = connection.cursor() sqlCMD = "SELECT max(pDate) from %s"%(tableName) cursor.execute(sqlCMD) results = cursor.fetchall() if len(results) == 1 and results[0][0] != None: startDate= results[0][0] emails = [email for email in EMailDownloader.getEmails(since=startDate)] if len(emails) == 0: return 0 cPickle.dump(emails, open(N.MAILARCHIVES + "MailArchive%s.pkl"%(datetime.datetime.now()), 'wb')) jobs = [] for email in emails: jobs.extend(email.parse()) EMailSqlDump.dumpJobOffersToSQL(jobs) return len(emails)
job.URL = jobOfferParts['URL'].strip(' ') job.fillInMissingEntries('\n'.join(self.jobHeader)) #if job.salary_lower == None: # print "Error!" # print '\n'.join(jobOffer) # print "*"*20 jobs.append(job) return jobs if __name__ == '__main__': emails = [] if not os.path.exists(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl"): for email in EMailDownloader.getEmails(since=datetime.datetime(2012,6,7,23,24,34)): emails.append(email) print len(emails) cPickle.dump(emails, open(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl")) emails = cPickle.load(open(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl","rb")) jobs = [] for email in emails: jobsToAdd = email.parse() if len(jobsToAdd) == 0: print email.dateReceivedStr jobs.extend(email.parse()) #for job in jobs: # print job.pDate EMailSqlDump.dumpJobOffersToSQL(jobs)