Exemplo n.º 1
0
        monconn_user.dropTable()
        monconn_user.close()

        #############################
        'Starting Index Creation'
        #############################
        ApplicationIndexing()

        #############################
        'Creating Index on Collection'
        #############################
        tablename = "apply_data"
        monconn_user = MongoConnect(tablename,
                                    host='172.22.66.198',
                                    database='SimilarJobs')
        monconn_user.doIndexing('user_index')
        monconn_user.doIndexing('pid')
        monconn_user.close()
        #os.system(' echo "Application Indexing Completed !!!'' " | mutt -s "Similar Jobs Mailer" [email protected], [email protected], [email protected]')
        send_email([
            '*****@*****.**',
            '*****@*****.**'
        ], "Similar Jobs Mailer applies preprocessing",
                   'Application Indexing Completed !!!')
        #send_email(['*****@*****.**'],"Similar Jobs Mailer applies preprocessing",'Application Indexing Completed !!!')
    except Exception as e:
        print e
        #os.system(' echo "Similar Job Mailer Failed : Application Indexing Failed!!!!!\nCall Himanshu (+91-7738982847) asap.'  ' " | mutt -s "Urgent!!!" [email protected],[email protected]')
        #send_email(['*****@*****.**'],"Urgent!!!",'Similar Job Mailer Failed : Application Indexing Failed!!!!!\n Call Kanika (+91-9560649296) or Akash (+91-8527716555) asap.')
        send_email(
            [
     if pprocessing == 0:            
         numChunks = 80
         computeAlertsChunk(0)
 
     if pprocessing == 1:                                                    #######---------- Initiating Multiprocessing
         numChunks = 80                                                      #######---------- Define the number of chunks to break data into and number of concurrent threads
         numConcurrentThreads = 6 
         print "numConcurrentThreads:",numConcurrentThreads
         chunkStart = 0
         chunkEnd = chunkStart + numChunks -1
         poolArgList=range(chunkStart, chunkEnd+1)
         print 'chunkStart:', chunkStart, 'chunkEnd:', chunkEnd
         pool=multiprocessing.Pool(numConcurrentThreads)
         pool.map(computeAlertsChunk, poolArgList)
         pool.close()
         pool.join()
 
 
 
     #########################################################################################################             
     ############-----------------  Creating the index on 'p' field in 'candidates_processed_5' collection
     ######################################################################################################### 
     print "Creating the index"
     tablename = 'candidates_processed_5'
     monconn_recommendations = MongoConnect(tablename, host='localhost', database='JobAlerts')   
     monconn_recommendations.doIndexing(tablename,'p') 
     monconn_recommendations.close()
     send_email(['*****@*****.**', '*****@*****.**'],"Job Alert Mailer","Candidate DB Updation Completed")
 except Exception as e:
     print e
     send_email(['*****@*****.**', '*****@*****.**','*****@*****.**'],"Job Alert Mailer","Candidate DB Updation Failed . Call Akash (+91-8527716555) or Kanika (+91-9560649296) asap.")
Exemplo n.º 3
0
def preProcessChunk(chunkId1, chunkId2):

    ######################################
    '''Fetching the Jobs from SQL'''
    ######################################

    #host="172.22.65.157"
    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         rj.publisheddate as Published_Date,
         rj.republisheddate as RePublished_Date,
         rj.expirydate as Expiry_Date,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         lj.Applications as Application_Number,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where ( (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) <= %s) OR (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) <= %s))) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         left join ShineReport.LiveJobsApplications AS lj on rj.jobid = lj.JobId
         
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         ''' % (chunkId1, chunkId2, chunkId1, chunkId2)

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    print 'chnukID:', chunkId1, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    print 'cmd1'
    mysql_conn.query(cmd2)
    print 'cmd2'
    mysql_conn.query(cmd3)
    print 'cmd3'
    jobs = mysql_conn.query(cmd4)
    print 'jobs'
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkId1, ': Loading jobs from SQL....completed..', time.ctime(
    )

    print 'chunkid:', chunkId1, ' : Number of jobs loaded: ', len(jobs)

    ######################################
    '''Connecting to Mongo 233 Server'''
    ######################################

    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed_9months'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='172.22.66.198',
                                      database='SimilarJobs')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    ######################################
    '''Processing the Jobs'''
    ######################################
    global i
    #i = 0
    for job in jobs:
        #pprint(job)
        #print i
        if i % 1000 == 0:
            print '\tchunkID:', chunkId1, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'

        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])
        job_jd = cleanHTML(cleanToken(job['JD']))
        job_industry = cleanToken(job['Industry'])
        job_location = removeDup(job['location'])
        job_subfunction = removeDup(cleanToken(job['subfunction']))
        job_function = removeDup(cleanToken(job['function']))
        job_skills = removeDup(cleanToken(job['keySkills']))
        job_flag = job['flag']
        job_accounttype = job['account_type']
        job_company_id = job['Company_id']
        job_company_name = cleanToken(job['Company_name'])
        job_index = i
        job_publishedate = job['Published_Date']
        job_repubslisheddate = job['RePublished_Date']
        job_expirydate = job['Expiry_Date']
        pid = i % 5000
        job_applications = job['Application_Number']
        job_location = job_location.replace(', ', ',').lower().split(',')

        #################################################
        '''Creating Bag of Words from the text fields'''
        #################################################

        text = 5 * (" " + job_title) + ' ' + 3 * (
            " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * (
                " " + job_industry) + ' ' + 2 * (
                    " " + job_function) + ' ' + 2 * (" " + job_subfunction)
        text = text.replace('candidates', ' ')
        job_bow = mb.getBow(text, getbowdict=0)

        ##################################################
        '''Dumping Job Details in Mongo (172.22.66.253)'''
        ##################################################

        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \
             'job_flag':job_flag,'job_accounttype':job_accounttype, \
             'job_company_id':job_company_id,'job_company_name':job_company_name,'job_index':job_index, \
             'application_number': job_applications,'pid':pid,'job_publishedate':job_publishedate , \
             'job_repubslisheddate':job_repubslisheddate,'job_expirydate':job_expirydate
             }

        monconn_jobs_local.saveToTable(document)

        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkId1, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    monconn_jobs_local.doIndexing('pid')
    #send_email(['*****@*****.**', '*****@*****.**','*****@*****.**'],"Similar Jobs Mailer 9 Month Jobs",'Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds')
    #os.system(' echo "Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Similar Jobs Mailer" [email protected], [email protected], [email protected]')
    del (monconn_jobs_local)
    del (mysql_conn)