示例#1
0
def translate(company_name):
    conn = getMongoConnection()
    db = conn[config.Mongo_DB_NAME]
    while db[company_name].find({"translated": "false"}).count() > 0:
        job = db[company_name].find_one({"translated": "false"})
        try:
            lan = detect(unicode(job['job_summary'], "utf-8"))
        except:
            lan = detect(job['job_summary'])
        print lan
        print job
        if str(lan) != 'en':
            for key in job:
                if key == "city" or key == "snippet":
                    job[key] = translator.translate(job[key], lang_to="en")
                    db[company_name].update({"url": job["url"]},
                                            {"$set": {
                                                key: job[key]
                                            }},
                                            upsert=False,
                                            multi=False)
                if key == "job_summary":
                    flag = 0
                    try:
                        job[key] = translator.translate(unicode(
                            job[key], "utf-8"),
                                                        lang_to="en")
                        flag = 1
                    except:
                        job[key] = job[key].encode("ascii", "ignore")
                        job[key] = translator.translate(job[key], lang_to="en")
                        flag = 1
                    else:
                        print "couldn't translate %s" % job['url']
                    print flag
                    if flag == 1:
                        db[company_name].update(
                            {"url": job["url"]},
                            {"$set": {
                                "job_summary": job[key]
                            }},
                            upsert=False,
                            multi=False)
        db[company_name].update({"url": job["url"]},
                                {"$set": {
                                    "translated": "true"
                                }},
                                upsert=False,
                                multi=False)
示例#2
0
def translate(company_name):
    conn = getMongoConnection()
    db = conn[config.Mongo_DB_NAME]
    print db
    print "something yar"
    while db[company_name].find({"translated": True}).count() > 0:
        job = db[company_name].find_one({"translated": True})
        lan = detect(job['snippet'])
        if str(lan) != 'en':
            for key in job:
                if key == "city" or key == "snippet":
                    job[key] = translator.translate(job[key], lang_to="en")
                    db[company_name].update({"url": job["url"]},
                                            {"$set": {
                                                key: job[key]
                                            }},
                                            upsert=False,
                                            multi=False)
                if key == "job_summary":
                    flag = 0
                    try:
                        job[key] = translator.translate(unicode(
                            job[key], "utf-8"),
                                                        lang_to="en")
                        flag = 1
                    except:
                        job[key] = translator.translate(job[key], lang_to="en")
                        flag = 1
                    else:
                        print "couldn't translate %s" % job['url']
                    print flag
                    if flag == 1:
                        db[company_name].update(
                            {"url": job["url"]},
                            {"$set": {
                                "job_summary": job[key]
                            }},
                            upsert=False,
                            multi=False)
        db[company_name].update({"url": job["url"]},
                                {"$set": {
                                    "translated": False
                                }},
                                upsert=False,
                                multi=False)
示例#3
0
    def tag_jobs(self, company_name):
        mongo = connection.getMongoConnection()
        db = mongo[config.Mongo_DB_NAME]
        while db[company_name].find({"BU": ""}).count() > 0:
            job = db[company_name].find_one({"BU": ""})

            if self.to_ignore(job['jobtitle']):
                db[company_name].update({"url": job['url']},
                                        {"$set": {
                                            "BU": "invalid"
                                        }},
                                        upsert=False,
                                        multi=False)
            else:
                bu = self.calculate_tag(company_name, job["job_summary"])
                db[company_name].update({"url": job['url']},
                                        {"$set": {
                                            "BU": bu
                                        }},
                                        upsert=False,
                                        multi=False)
示例#4
0
 def __init__(self):
     self.conn = connection.getMongoConnection()
     self.db = self.conn[config.Mongo_DB_NAME]
 def __init__(self, company_name, key):
     self.query = "company:({0})".format(company_name)
     self.endpoint = 'http://api.indeed.com/ads/apisearch'
     self.key = key
     self.conn = connection.getMongoConnection()
     self.db = self.conn[Mongo_DB_NAME]