Пример #1
0
def downloadAvailableEMails():
    tableName = N.JOBS_DETAIL_TABLENAME
    startDate = datetime.datetime(2000,
                                  1,
                                  1,
                                  tzinfo=dateutil.tz.tzoffset(None, 0))
    connection = MySQLdb.connect(host=N.SQL_HOST_NAME,
                                 user=N.SQL_USER_NAME,
                                 passwd=N.SQL_PASSWD,
                                 db=N.SQL_DB)
    cursor = connection.cursor()
    sqlCMD = "SELECT max(pDate) from %s" % (tableName)
    cursor.execute(sqlCMD)
    results = cursor.fetchall()
    if len(results) == 1 and results[0][0] != None:
        startDate = results[0][0]

    emails = [email for email in EMailDownloader.getEmails(since=startDate)]
    if len(emails) == 0: return 0

    cPickle.dump(
        emails,
        open(N.MAILARCHIVES + "MailArchive%s.pkl" % (datetime.datetime.now()),
             'wb'))

    jobs = []
    for email in emails:
        jobs.extend(email.parse())
    EMailSqlDump.dumpJobOffersToSQL(jobs)

    return len(emails)
Пример #2
0
 def __init__(self, timestamp, startDate, process_step_count,
              interval, scheduler=None):
     super(UpdateJob, self).__init__(interval, scheduler)
     self.startDate = startDate
     self.timestamp = int(timestamp)
     self.process_step_count = process_step_count
     self.getEmailsGenerator = EMailDownloader.getEmails(since=startDate)
     self.db = None
     self.processed_count = 0
Пример #3
0
def downloadAvailableEMails():
    tableName = N.JOBS_DETAIL_TABLENAME
    startDate = datetime.datetime(2000,1,1, tzinfo = dateutil.tz.tzoffset(None,0))
    connection = MySQLdb.connect(host=N.SQL_HOST_NAME,user=N.SQL_USER_NAME,passwd=N.SQL_PASSWD,db=N.SQL_DB)
    cursor = connection.cursor()
    sqlCMD = "SELECT max(pDate) from %s"%(tableName)
    cursor.execute(sqlCMD)
    results = cursor.fetchall()
    if len(results) == 1 and results[0][0] != None:
        startDate= results[0][0]
    
    emails = [email for email in EMailDownloader.getEmails(since=startDate)]
    if len(emails) == 0: return 0
    
    cPickle.dump(emails, open(N.MAILARCHIVES +  "MailArchive%s.pkl"%(datetime.datetime.now()), 'wb'))
    
    jobs = []
    for email in emails:
        jobs.extend(email.parse())
    EMailSqlDump.dumpJobOffersToSQL(jobs)
    
    return len(emails)
Пример #4
0
            job.URL = jobOfferParts['URL'].strip(' ')

            job.fillInMissingEntries('\n'.join(self.jobHeader))
            
            #if job.salary_lower == None:
            #    print "Error!"
            #    print '\n'.join(jobOffer)
            #    print "*"*20
            jobs.append(job)
        return jobs

if __name__ == '__main__':
    emails = []
    
    if not os.path.exists(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl"):
        for email in EMailDownloader.getEmails(since=datetime.datetime(2012,6,7,23,24,34)):
            emails.append(email)
        print len(emails)
        cPickle.dump(emails, open(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl"))

    emails = cPickle.load(open(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl","rb"))
    jobs = []
    for email in emails:
        jobsToAdd = email.parse()
        if len(jobsToAdd) == 0:
            print email.dateReceivedStr
        jobs.extend(email.parse())
    #for job in jobs:
    #    print job.pDate
    EMailSqlDump.dumpJobOffersToSQL(jobs)