def __init__(self):
     #1. Connect to MySQL database table wp_posts
     host="127.0.0.1"
     user="******"
     password="******"
     database="tap_sg"
 
     self.mysql_conn = MySQLConnect(database, host, user, password)
     cmd = '''select * from wp_posts limit 2'''
     self.articles = self.mysql_conn.query(cmd)
     
     # Call Google API for Thai to English Translation
     
     API_KEY = "AIzaSyBGGfOdtKFhlJ1w2bitjsj194jUKIxoPT0"
     
     self.TRANSLATE_URL = "https://www.googleapis.com/language/translate/v2?key=" + API_KEY
     self.DETECT_URL = "https://www.googleapis.com/language/translate/v2/detect?key=" + API_KEY  # &q=google+translate+is+fast
def preProcessChunk(chunkID):

    print 'Connecting to Mongodb..'
    tableName = 'jobs_status_check'
    monconn_status_check = MongoConnect(tableName,
                                        host='localhost',
                                        database='jam_status')
    monconn_status_check_cur = monconn_status_check.getCursor()

    ######################################
    '''Fetching the Jobs from SQL'''
    ######################################

    #Connect to SQL table and get the jobs data
    host = "172.22.65.157"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3308

    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.isbocreated as back_office_job,
         rj.publisheddate as publisheddate,
         rj.republisheddate as republisheddate,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8) 
         ) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         '''

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    mysql_conn.query(cmd2)
    mysql_conn.query(cmd3)
    jobs = mysql_conn.query(cmd4)
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime(
    )

    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)

    ######################################
    '''Connecting to Mongo 233 Server'''
    ######################################

    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='localhost',
                                      database='JobAlerts')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    ######################################
    '''Processing the Jobs'''
    ######################################

    i = 0
    for job in jobs:
        #pprint(job)
        #print i
        if i % 1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'

        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])
        job_jd = cleanHTML(cleanToken(job['JD']))
        job_industry = cleanToken(job['Industry'])
        job_location = removeDup(job['location'])
        job_subfunction = removeDup(cleanToken(job['subfunction']))
        job_function = removeDup(cleanToken(job['function']))
        job_skills = removeDup(cleanToken(job['keySkills']))
        job_flag = job['flag']
        job_accounttype = job['account_type']
        job_company_id = job['Company_id']
        job_company_name = cleanToken(job['Company_name'])
        job_published_date = job['publisheddate']
        job_republished_date = job['republisheddate']
        job_back_office = int(job['back_office_job'])

        if job_company_id == 421880:  ################## Altimetrik Jobs removed ##########################
            continue

        job_location = job_location.replace(', ', ',').lower().split(',')

        ##Extract additional fields like bow
        text = 5 * (" " + job_title) + ' ' + 5 * (
            " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * (
                " " + job_industry) + ' ' + 2 * (
                    " " + job_function) + ' ' + 2 * (" " + job_subfunction)
        text = text.replace('candidates', ' ')
        '''
        try:
            text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction)
            text = text.replace('candidates', ' ')
            
        except:
            text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd)
            text = text.replace('candidates', ' ')
        '''

        job_bow = mb.getBow(text, getbowdict=0)

        #job_keySkills = ','.join([x for x in jobKeySkills.split(',') if x.strip() != ''])

        #pprint(job_bow)
        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \
             'job_flag':job_flag,'job_accounttype':job_accounttype, \
             'job_company_id':job_company_id,'job_company_name':job_company_name,
             'job_published':job_published_date,'job_republished':job_republished_date,'job_back_office':job_back_office
             }

        monconn_jobs_local.saveToTable(document)

        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    os.system(
        ' echo "Jobs Processed ' + str(i) + ' in :' +
        str(end_time - start_time) + ' seconds' +
        ' " | mutt -s "Job Alert Mailer " [email protected] ,[email protected]'
    )
    del (monconn_jobs_local)
    del (mysql_conn)
    monconn_status_check.saveToTable({'_id': 1, 'status': 1})
    del (monconn_status_check)
示例#3
0
def preProcessChunk(chunkID):

    #########################################################################################################
    ############-----------------    SQL Credentials
    #########################################################################################################
    '''
    host="172.22.65.157"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3308
    '''
    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    #########################################################################################################
    ############-----------------    Creating the SQL Query
    #########################################################################################################
    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.isbocreated as back_office_job,
         rj.publisheddate as publisheddate,
         rj.republisheddate as republisheddate,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 16 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 16) 
         ) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         '''

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    #########################################################################################################
    ############-----------------    Executing the SQL Query
    #########################################################################################################
    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    mysql_conn.query(cmd2)
    mysql_conn.query(cmd3)
    jobs = mysql_conn.query(cmd4)
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime(
    )
    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)

    #########################################################################################################
    ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233)
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='localhost',
                                      database='mailer_weekly')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    #########################################################################################################
    ############-----------------Processing the Jobs data extracted from SQL
    #########################################################################################################
    i = 0
    for job in jobs:
        if i % 1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'
        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])
        job_jd = cleanHTML(cleanToken(job['JD']))
        job_industry = cleanToken(job['Industry'])
        job_location = removeDup(job['location'])
        job_subfunction = removeDup(cleanToken(job['subfunction']))
        job_function = removeDup(cleanToken(job['function']))
        job_skills = removeDup(cleanToken(job['keySkills']))
        job_flag = job['flag']
        job_accounttype = job['account_type']
        job_company_id = job['Company_id']
        job_company_name = cleanToken(job['Company_name'])
        job_published_date = job['publisheddate']
        job_republished_date = job['republisheddate']
        job_back_office = int(job['back_office_job'])
        job_location = job_location.replace(', ', ',').lower().split(',')
        if job_company_id == 421880:  #######---------- Altimetrik Jobs removed
            continue

        #########################################################################################################
        ############-----------------Creating Bag of Words for Text
        #########################################################################################################
        text = 5 * (" " + job_title) + ' ' + 5 * (
            " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * (
                " " + job_industry) + ' ' + 2 * (
                    " " + job_function) + ' ' + 2 * (" " + job_subfunction)
        text = text.replace('candidates', ' ')
        job_bow = mb.getBow(text, getbowdict=0)

        #########################################################################################################
        ############-----------------Creating Job document to be saved in Mongo
        #########################################################################################################
        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \
             'job_flag':job_flag,'job_accounttype':job_accounttype, \
             'job_company_id':job_company_id,'job_company_name':job_company_name,
             'job_published':job_published_date,'job_republished':job_republished_date,'job_back_office':job_back_office
             }

        #########################################################################################################
        ############-----------------Saving the document in Job collection Mongo (172.22.66.233)
        #########################################################################################################
        monconn_jobs_local.saveToTable(document)
        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    send_email([
        '*****@*****.**',
        '*****@*****.**'
    ], "Revival Mailer Weekly", 'Jobs Processed ' + str(i) + ' in :' +
               str(end_time - start_time) + ' seconds')

    #########################################################################################################
    ############-----------------Changing the status of completion and deleting the mongo connections
    #########################################################################################################
    del (monconn_jobs_local)
    del (mysql_conn)
示例#4
0
def preProcessChunk(chunkID):

    #########################################################################################################
    ############-----------------    SQL Credentials
    #########################################################################################################
    '''
    host="172.22.65.157"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3308
    '''
    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    #########################################################################################################
    ############-----------------    Creating the SQL Query
    #########################################################################################################
    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.companyid_id as Company_id,
         rj.publisheddate as publisheddate,
         rj.displayname as Company_name,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 20 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 20)  
         ) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         '''

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    #########################################################################################################
    ############-----------------    Executing the SQL Query
    #########################################################################################################
    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    mysql_conn.query(cmd2)
    mysql_conn.query(cmd3)
    jobs = mysql_conn.query(cmd4)
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime(
    )
    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)

    #########################################################################################################
    ############-----------------    Connecting to Jobs Tech Dump Collections Mongo (172.22.66.233)
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'JobDesc_weekly'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='localhost',
                                      database='JobDescDB')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    #########################################################################################################
    ############-----------------Processing the Jobs data extracted from SQL
    #########################################################################################################
    i = 0
    for job in jobs:
        if i % 1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'
        _id = job['Jobid']
        comp_name = cleanToken_1(job.get('Company_name', None))
        loc = (removeDup(job.get('location', None))).replace(', ',
                                                             ',').split(',')
        min_exp = job.get('ExpMin', None)
        title = cleanToken_1(job.get('JobTitle', None))
        max_exp = job.get('ExpMax', None)
        pub_date = job.get('publisheddate', None)
        id = job['Jobid']
        job_flag = job.get('flag')

        p = 0
        if job_flag == "Paid":
            p = 1
        else:
            p = 0

        desc = None

        #########################################################################################################
        ############-----------------Creating Job document to be saved in Mongo
        #########################################################################################################
        document = {
            '_id': _id,
            'comp_name': comp_name,
            'loc': loc,
            'min_exp': min_exp,
            'title': title,
            'max_exp': max_exp,
            'pub_date': pub_date,
            'id': id,
            'p': p,
            'desc': desc
        }

        #########################################################################################################
        ############-----------------Saving the document in Job collection Mongo (172.22.66.233)
        #########################################################################################################
        monconn_jobs_local.saveToTable(document)
        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    send_email([
        '*****@*****.**',
        '*****@*****.**'
    ], "Revival Mailer Weekly", 'TEch Dump Jobs Processed ' + str(i) +
               ' in :' + str(end_time - start_time) + ' seconds')

    #########################################################################################################
    ############-----------------Deleting the mongo connections
    #########################################################################################################
    del (monconn_jobs_local)
    del (mysql_conn)
class getArticlesData():
     
    def __init__(self):
        #1. Connect to MySQL database table wp_posts
        host="127.0.0.1"
        user="******"
        password="******"
        database="tap_sg"
    
        self.mysql_conn = MySQLConnect(database, host, user, password)
        cmd = '''select * from wp_posts limit 2'''
        self.articles = self.mysql_conn.query(cmd)
        
        # Call Google API for Thai to English Translation
        
        API_KEY = "AIzaSyBGGfOdtKFhlJ1w2bitjsj194jUKIxoPT0"
        
        self.TRANSLATE_URL = "https://www.googleapis.com/language/translate/v2?key=" + API_KEY
        self.DETECT_URL = "https://www.googleapis.com/language/translate/v2/detect?key=" + API_KEY  # &q=google+translate+is+fast
        
    def unicode_urlencode(params):
        if isinstance(params, dict):
            params = params.items()
        return urllib.urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v) for k, v in params])
        
        
    def make_request(url):
        return urllib.urlopen(url).read()
        
        
    def quick_translate(text, target, source):
        try:
            #print translate(text, target, source)
            return translate(text, target, source)["data"]["translations"][0]["translatedText"].replace('&#39;', "'")
        except:
            return ""
    
        
    def translate(text, target, source,self):
        query_params = {"q": text, "source": source, "target": target}
        url = self.TRANSLATE_URL + "&" + unicode_urlencode(query_params)
        try:
            return demjson.decode(make_request(url))
        except:
            return {}
            
    def quick_detect(text,self):
        try:
            lang= detect(text)["data"]["detections"][0][0]["language"]
            return lang
        except:
            return ""
    
    def detect(text):
        query_params = {"q": text}
        url = self.DETECT_URL + "&" + unicode_urlencode(query_params)
        try:
            return demjson.decode(make_request(url))
        except:
            return {}
            
    def getArticlesList(self):
    #2. Create a list of articles
        articlesDict = {} #article details
        snoToArticleDict = {} #sno -> article details
        articlesTitlesToDetailsDict = {} #article title -> article details
        articleText=[] # To store articles'text
        
        
        i=0
        
        for article in self.articles:
            
            article_title = article['post_title']
            article_content = article['post_content']
            
            if self.quick_detect(article_title)=="th":
                self.quick_translate(article_title,"en","th")
                self.quick_translate(article_content,"en","th")
            
            else:
                pass
                
            article_title = callRemoveHtml(article_title)
            articleid = article['ID']       
            article_url = article['guid']
            article_type = article['post_type']
            article_status = article['post_status']
            article_sno = i
            
            
         
            if (article_type == 'post') or (article_status == 'publish'): # Filtering only articles from database which are live
                
                article_title_with_space = ' ' + article_title
                
                ''' Creating text string with 5/6 weightage of title and 1/6 of article content'''
                
                article_text = article_content + article_title_with_space*5
                article_text = article_text.lower()
                article_text = getASCIIString(strip_tags(article_text))
                
                articleText.append(article_text)
                
                articleDetailsDict = {'article_text': article_text,
                                'articleid': articleid,
                                'article_title': article_title, 
                                'article_url': article_url,
                                'article_sno': article_sno
                                }
                
                articlesDict[articleid] = articleDetailsDict
                articlesTitlesToDetailsDict[article_title] = articleDetailsDict
                snoToArticleDict[i] = articleDetailsDict
                
                
                i += 1    
            else:
                pass    
            
           #5. Close the MySQL connection
        self.mysql_conn.close()
    
        return [articleText,articlesDict, snoToArticleDict, articlesTitlesToDetailsDict]
示例#6
0
def preProcessChunk(chunkId1, chunkId2):

    ######################################
    '''Fetching the Jobs from SQL'''
    ######################################

    #host="172.22.65.157"
    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         rj.publisheddate as Published_Date,
         rj.republisheddate as RePublished_Date,
         rj.expirydate as Expiry_Date,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         lj.Applications as Application_Number,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where ( (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) <= %s) OR (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) <= %s))) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         left join ShineReport.LiveJobsApplications AS lj on rj.jobid = lj.JobId
         
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         ''' % (chunkId1, chunkId2, chunkId1, chunkId2)

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    print 'chnukID:', chunkId1, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    print 'cmd1'
    mysql_conn.query(cmd2)
    print 'cmd2'
    mysql_conn.query(cmd3)
    print 'cmd3'
    jobs = mysql_conn.query(cmd4)
    print 'jobs'
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkId1, ': Loading jobs from SQL....completed..', time.ctime(
    )

    print 'chunkid:', chunkId1, ' : Number of jobs loaded: ', len(jobs)

    ######################################
    '''Connecting to Mongo 233 Server'''
    ######################################

    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed_9months'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='172.22.66.198',
                                      database='SimilarJobs')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    ######################################
    '''Processing the Jobs'''
    ######################################
    global i
    #i = 0
    for job in jobs:
        #pprint(job)
        #print i
        if i % 1000 == 0:
            print '\tchunkID:', chunkId1, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'

        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])
        job_jd = cleanHTML(cleanToken(job['JD']))
        job_industry = cleanToken(job['Industry'])
        job_location = removeDup(job['location'])
        job_subfunction = removeDup(cleanToken(job['subfunction']))
        job_function = removeDup(cleanToken(job['function']))
        job_skills = removeDup(cleanToken(job['keySkills']))
        job_flag = job['flag']
        job_accounttype = job['account_type']
        job_company_id = job['Company_id']
        job_company_name = cleanToken(job['Company_name'])
        job_index = i
        job_publishedate = job['Published_Date']
        job_repubslisheddate = job['RePublished_Date']
        job_expirydate = job['Expiry_Date']
        pid = i % 5000
        job_applications = job['Application_Number']
        job_location = job_location.replace(', ', ',').lower().split(',')

        #################################################
        '''Creating Bag of Words from the text fields'''
        #################################################

        text = 5 * (" " + job_title) + ' ' + 3 * (
            " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * (
                " " + job_industry) + ' ' + 2 * (
                    " " + job_function) + ' ' + 2 * (" " + job_subfunction)
        text = text.replace('candidates', ' ')
        job_bow = mb.getBow(text, getbowdict=0)

        ##################################################
        '''Dumping Job Details in Mongo (172.22.66.253)'''
        ##################################################

        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \
             'job_flag':job_flag,'job_accounttype':job_accounttype, \
             'job_company_id':job_company_id,'job_company_name':job_company_name,'job_index':job_index, \
             'application_number': job_applications,'pid':pid,'job_publishedate':job_publishedate , \
             'job_repubslisheddate':job_repubslisheddate,'job_expirydate':job_expirydate
             }

        monconn_jobs_local.saveToTable(document)

        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkId1, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    monconn_jobs_local.doIndexing('pid')
    #send_email(['*****@*****.**', '*****@*****.**','*****@*****.**'],"Similar Jobs Mailer 9 Month Jobs",'Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds')
    #os.system(' echo "Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Similar Jobs Mailer" [email protected], [email protected], [email protected]')
    del (monconn_jobs_local)
    del (mysql_conn)
def preProcessChunk(chunkID):
    
    #########################################################################################################             
    ############-----------------    SQL Credentials
    #########################################################################################################
    
    #Connect to SQL table and get the jobs data
    #host="172.16.66.64"
    #user="******"
    #password="******"
    '''
    host="172.22.65.157"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3308
    '''

    host="172.22.66.204"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3306



    #########################################################################################################             
    ############-----------------    Creating the SQL Query
    #########################################################################################################
    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd='''SELECT
        rj.jobid as Jobid,
        rj.jobtitle as JobTitle,
        rj.description as JD,
        la1.text_value_MAX as SalaryMax,
        la2.text_value_MIN as SalaryMin,
        le1.display as ExpMin,
        le2.display as ExpMax,
        li.industry_desc as Industry,
        group_concat(c.AttValueCustom,'') as keySkills,
        group_concat(fn.field_enu,'') as function,
        group_concat(l.city_desc,'') as location,
        group_concat(fn.sub_field_enu,'') as subfunction 
        
        from 
        (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8)  
        ) AS rj 
        left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
        left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
        left join lookup_experience AS le1 on rj.minexperience = le1.value 
        left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
        left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
        left join  lookup_industry AS li on rj.industry=li.industry_id 
        left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
        left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
        
        WHERE 
        
        c.AttType in (3,12,13) 
        
        group by rj.jobid
        
        
        '''


    #########################################################################################################             
    ############-----------------    Executing the SQL Query
    #########################################################################################################
    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    jobs = mysql_conn.query(cmd)
    print 'chunkID:', chunkID,': Loading jobs from SQL....completed..', time.ctime()
    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)




    #########################################################################################################             
    ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233)
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed_midout'
    monconn_jobs_local = MongoConnect(tableName , host = 'localhost', database = 'Midout_Mailers')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'
    
    
        
    #########################################################################################################             
    ############-----------------Processing the Jobs data extracted from SQL
    #########################################################################################################
    i = 0
    for job in jobs:
        #pprint(job)
        #print i
        if i%1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:' , i,  ' completed in ', time.time() - start_time, ' seconds'
        
        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])  
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])  
        job_jd = cleanHTML(cleanToken(job['JD']) )
        job_industry = cleanToken(job['Industry'])
        job_location=removeDup(job['location'])
        job_subfunction=removeDup(job['subfunction'])
        job_function=removeDup(job['function'])
        job_skills=removeDup(cleanToken(job['keySkills']))
        

        
        #########################################################################################################             
        ############-----------------Creating Bag of Words for Text
        #########################################################################################################
        text = 5*(" "+job_title) + ' ' + 5*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction)
        text = text.replace('candidates', ' ')
        job_bow = mb.getBow(text, getbowdict = 0)
    


        #########################################################################################################             
        ############-----------------Creating Job document to be saved in Mongo
        #########################################################################################################        
        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd
             }



        #########################################################################################################             
        ############-----------------Saving the document in Job collection Mongo (172.22.66.233)
        #########################################################################################################        
        monconn_jobs_local.saveToTable(document)
    
        i += 1
        

    print "Processing finished....."    
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time() - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    send_email(['*****@*****.**', '*****@*****.**'],"Midout Mailers",'Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds')
    #os.system(' echo "Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Midout Mailers" [email protected] ,[email protected]')
    del(monconn_jobs_local)
    del(mysql_conn)
def getArticlesData():

    #1. Connect to MySQL database table wp_posts
    host = "127.0.0.1"
    user = "******"
    password = "******"
    database = "tap_sg_new"

    mysql_conn = MySQLConnect(database, host, user, password)
    cmd = '''select * from wp_posts'''
    articles = mysql_conn.query(cmd)

    #2. Create a list of articles
    articleText = []
    articlesDict = {}  #article details
    snoToArticleDict = {}  #sno -> article details
    articlesTitlesToDetailsDict = {}  #article title -> article details

    i = 0

    for article in articles:

        article_content = article['post_content']
        articleid = article['ID']
        article_title = article['post_title']
        article_title = callRemoveHtml(article_title)
        article_url = article['guid']
        article_type = article['post_type']
        article_status = article['post_status']
        article_sno = i

        if (article_type == 'post') or (
                article_status == 'publish'
        ):  # Filtering only articles from database which are live

            article_title_with_space = ' ' + article_title
            ''' Creating text string with 5/6 weightage of title and 1/6 of article content'''

            article_text = article_content + article_title_with_space * 5
            article_text = article_text.lower()
            article_text = getASCIIString(strip_tags(article_text))

            articleText.append(article_text)

            articleDetailsDict = {
                'article_text': article_text,
                'articleid': articleid,
                'article_title': article_title,
                'article_url': article_url,
                'article_sno': article_sno
            }

            articlesDict[articleid] = articleDetailsDict
            articlesTitlesToDetailsDict[article_title] = articleDetailsDict
            snoToArticleDict[i] = articleDetailsDict

            i += 1
        else:
            pass

    #5. Close the MySQL connection
    mysql_conn.close()

    return [
        articleText, articlesDict, snoToArticleDict,
        articlesTitlesToDetailsDict
    ]
示例#9
0
def getDataFromSQL():

    ######################################             
    '''Fetching the Jobs from SQL'''
    ######################################

    #host="172.22.65.157"
    host = "172.22.66.204"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3306
    
    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd1='''drop table if exists SumoPlus.XY'''
    cmd2='''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3='''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4='''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         K.Applications as Applications,
         K.MatchedApplications as MatchedApplications,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and ((DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 51 AND (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) > 6)) OR (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 51 AND (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) > 6)))  
         ) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         left join ShineReport.LiveJobsApplications as K on K.JobId = rj.jobid     
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         '''
        
    cmd5= '''drop table if exists SumoPlus.XY '''

    #print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    mysql_conn.query(cmd2)
    mysql_conn.query(cmd3)
    jobs = mysql_conn.query(cmd4)
    mysql_conn.query(cmd5)