Python Translate示例，translator.Translate Python示例

示例#1

0

显示文件

文件： reply.py 项目： aidiel97/cuacakita

def reply_to_tweets():
    print('retrieving and replying to tweets...', flush=True)
    # id last seen testing : 1112619279025725441 1124521036538568704
    url = "http://188.166.216.148:4545/"
    response = requests.request("GET", url)

    last_seen_id = response.text
    mentions = api.mentions_timeline(
                        last_seen_id,
                        tweet_mode='extended')
    print('last seen id : ', last_seen_id)
    # print(mentions)

    for mention in reversed(mentions): #supaya membaca perulangannya terbalik..
    #biasanya list mention di timline dibaca dari yg terakhir dahulu, supaya terurut, maka dibalik
        print(str(mention.id) + ' - ' + mention.full_text + ' - ' +str(mention.in_reply_to_screen_name), flush=True)
        last_seen_id = str(mention.id)
        store_last_seen_id(last_seen_id)

        tweet = mention.full_text
        tlow = tweet.lower()
        # print(tlow)
        city = re.findall(r"#(\w+)", tlow)

        translate = Translate()
        ans = translate.kamusDaerah(city)
        print(ans)

        # if '#hai' in mention.full_text.lower():
        if ans != "no" and mention.in_reply_to_screen_name != 'cuaca_kita':
        	print('menemukan tweet yang harus dibalas!', flush=True)

示例#2

0

显示文件

def Vacancy_info(url):
    print(url)
    page = requests.get(url)
    # /html/body/table[2]/tbody/tr/td[2]/div/table/tbody/tr[2]/td[2]/table/tbody/tr/td/div/div[7]
    # Description
    try:
        description = Selector(response=page).xpath(
            '/html/body/table[2]/tr/td[2]/div/table/tr[2]/td[2]/table/tr/td/div[6]'
        ).get()
        description = remove_tags(description)
        description = description.rstrip()
        description = description.lstrip()
        # description = re.sub(r"\s+", " ", description)
    except:
        description = ""
    if description is None:
        description = ""
    if detect(description) == "ru":
        description_ru = description
        description_en = Translate(description)
        description_ka = ""
    elif detect(description) == "et":
        description_ru = ""
        try:
            description_en = Translate(description)
        except:
            description_en = ""
        description_ka = description
    else:
        description_ru = ""
        description_en = description
        description_ka = ""

    # Email
    try:
        email = re.findall(r'[\w\.-]+@[\w\.-]+', description)
        email = email[0]
    except:
        email = ""

    data = {
        "description_ka": description_ka,
        "description_ru": description_ru,
        "description_en": description_en,
        "email": email
    }
    print("Info Scraped Successfully")
    return data


# //*[@id="CenterBody1"]/div/table/tbody/tr[2]/td[2]/table/tbody/tr/td/div/div[7]
# //*[@id="CenterBody1"]/div/table/tbody/tr[2]/td[2]/table/tbody/tr/td/div/div[7]

# Vacancy_info('https://jobs24.ge/?act=obj&id=173982&PHPSESSID=tf04s8ucsd5trehbc1qouk90f25tnqma')

示例#3

0

显示文件

def Vacancy_info(url):
    url = url.replace("/en/", "/ge/")
    print(url)
    page = requests.get(url)


    # Description
    try:
        description = Selector(response=page).xpath('//*[@id="job"]/table/tr[1]/td/table[2]').get()
        description = remove_tags(description)
        description = description.rstrip()
        description = description.lstrip()
        description = description.replace('*', "")
        description = re.sub(r"\s+", " ", description)
        print(description)
    except:
        description = ""
    if detect(description) == "ru":
        description_ru = description
        description_en = Translate(description)
        description_ka = ""
    elif detect(description) == "et":
        description_ru = ""
        try: 
            description_en = Translate(description)
        except:
            description_en = ""
        description_ka = description
    else:
        description_ru = ""
        description_en = description
        description_ka = ""

    # Email
    try:
        email = re.findall(r'[\w\.-]+@[\w\.-]+', description)
        email = email[0]
    except:
        email = ""

    data = {
        "description_ka" : description_ka,
        "description_ru" : description_ru,
        "description_en" : description_en,
        "email" : email
    }
    return data

# Vacancy_info("https://jobs.ge/en/?view=jobs&id=268715")

示例#4

0

显示文件

文件： clock.py 项目： aidiel97/cuacakita

def reply_to_tweets():
    print('retrieving and replying to tweets...', flush=True)
    # id last seen testing : 1112619279025725441
    url = "http://188.166.216.148:4545/"
    response = requests.request("GET", url)

    last_seen_id = response.text
    mentions = api.mentions_timeline(last_seen_id, tweet_mode='extended')
    print('last seen id : ', last_seen_id)

    for mention in reversed(
            mentions):  #supaya membaca perulangannya terbalik..
        #biasanya list mention di timline dibaca dari yg terakhir dahulu, supaya terurut, maka dibalik
        print(str(mention.id) + ' - ' + mention.full_text, flush=True)
        last_seen_id = str(mention.id)
        store_last_seen_id(last_seen_id)

        tweet = mention.full_text
        tlow = tweet.lower()
        city = re.findall(r"#(\w+)", tlow)

        translate = Translate()
        ans = translate.kamusDaerah(city)
        print(ans)

        # if '#hai' in mention.full_text.lower():
        if ans != "no":
            print('menemukan tweet yang harus dibalas!', flush=True)

            text_gen = Text_generator()
            data = text_gen.getCData(ans)
            sentence = text_gen.generator(data)

            print('respond tweet...', flush=True)

            api.update_status(
                'Hai! @' + mention.user.screen_name + " " + sentence,
                mention.id)

        elif city != []:
            api.update_status(
                'Hai! @' + mention.user.screen_name +
                " sayang sekali Cuki hanya dibuat untuk kamu yang ingin tau prakiraan cuaca di Indonesia saja\n\nAyo coba yang lain",
                mention.id)

示例#5

0

显示文件

文件： ChatBot.py 项目： lyricsjoker/exomind

    def __init__(self,
                 proxies={},
                 entropy_filter=True,
                 lang='en',
                 entropy_top=3,
                 query_top=100,
                 fraction=5):
        self.__ngd = NGD(proxies)
        #self.__ngd.set_context('site:imsdb.com')
        self.__cache = {}
        self.__min_ent = 0.0
        self.__entropy_filter = entropy_filter
        self.__lang = lang
        self.__entropy_top = entropy_top
        self.__fraction = fraction
        self.__query_top = query_top
        self.__translator = Translate()

        self.__lock = Lock()
        self.__voc_translator = None

        random.seed(666)

示例#6

0

显示文件

文件： vacancy.py 项目： Caravan2/scripts

def Vacancy(link):
    url = link
    page = requests.get(url)

    # Description
    try:
        description = Selector(response=page).xpath('/html/body/div[3]/div[1]/div/div/div[1]/div[1]').get()
        description = remove_tags(description)
        if detect(description) == "et":
            try: 
                description_en = Translate(description)
            except:
                description_en = ""
            description_az = description
        else:
            description_en = description
            description_az = ""
    except:
        description_en = ""
        description_az = ""


    # email
    try:
        driver.get(url)
        email = driver.find_element_by_xpath('/html/body/div[3]/div[1]/div/div/div[1]/div[1]').text
        email = re.findall(r'[\w\.-]+@[\w\.-]+', email)
    except:
        email = []

    data = {
        "description_az" : description_az,
        "description_en" : description_en,
        "email" : email
    }

    # print(data)
    return data

# Vacancy("https://azinka.az/jobs/3710/")

示例#7

0

显示文件

def Vacancy(link):
    url = link
    headers = {
        "User-Agent":
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36",
        "Accept-Language": "en-US,en;q=0.9,ru;q=0.8"
    }
    page = requests.get(url, headers=headers)

    # Company
    try:
        company = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[2]/div[1]/h4/text()').get()
    except:
        company = ""

    # position
    try:
        position = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[2]/div/div[1]/h4/text()').get()
    except:
        position = ""

    # logo
    try:
        logo = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[2]/div[1]/img/@src').get()
    except:
        logo = ""

    # Job_type
    try:
        job_type = Selector(response=page).xpath(
            '/html/body/div[3]/div/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]//text()[2]'
        ).get()
        job_type = job_type.strip()
    except:
        job_type = ""

    # Contact Person
    try:
        person = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[2]/div[2]/div/text()[2]').get(
            )
        person = person.strip()
    except:
        person = ""

    # Email
    try:
        email = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[2]/div[2]/div/text()[3]').get(
            )
        email = email.strip()
        email = [email]
    except:
        email = []

    # Phone
    try:
        phone = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[2]/div[2]/div/text()[4]').get(
            )
        phone = phone.strip()
        if "," in phone:
            phones = phone.split(",")
            phone = []
            for each in phones:
                each = each.strip()
                if "+" in each and " " in each:
                    number = each.split(" ",
                                        1)[1].replace('-',
                                                      "").replace(" ", "")
                    country_code = each.split(" ", 1)[0].replace('+', "")
                    phone.append({
                        "country_code": country_code,
                        "number": number
                    })
                elif "+" in each and " " not in each:
                    if "+374" in each:
                        country_code = "374"
                        number = each.replace("+374", "")
                        phone.append({
                            "country_code": country_code,
                            "number": number
                        })
                    elif "+1" in each:
                        country_code = "1"
                        number = each.replace("+1", "")
                        phone.append({
                            "country_code": country_code,
                            "number": number
                        })
                    else:
                        country_code = "374"
                        number = each
                        phone.append({
                            "country_code": country_code,
                            "number": number
                        })
                elif "+" not in each:
                    number = each.replace('-', "").replace(" ", "")
                    country_code = "374"
                    phone.append({
                        "country_code": country_code,
                        "number": number
                    })
        else:
            if "+" in phone and " " in phone:
                number = phone.split(" ", 1)[1].replace('-',
                                                        "").replace(" ", "")
                country_code = phone.split(" ", 1)[0].replace('+', "")
                phone = [{"country_code": country_code, "number": number}]
            elif "+" in phone and " " not in phone:
                if "+374" in phone:
                    country_code = "374"
                    number = phone.replace("+374", "")
                    phone = [{"country_code": country_code, "number": number}]
                elif "+1" in phone:
                    country_code = "1"
                    number = phone.replace("+1", "")
                    phone = [{"country_code": country_code, "number": number}]
                else:
                    country_code = "374"
                    number = phone
                    phone = [{"country_code": country_code, "number": number}]
            elif "+" not in phone:
                number = phone.replace('-', "").replace(" ", "")
                country_code = "374"
                phone = [{"country_code": country_code, "number": number}]

    except Exception as e:
        phone = []

    # Website
    try:
        website = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[2]/div[2]/div/text()[5]').get(
            )
        website = website.strip()
        if "not" in website:
            website = []
        else:
            website = [website]
    except:
        website = []

    # Published
    try:
        published = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[1]/div[2]/text()[2]').get()
        published = published.strip()
        publish_day = int(published.split("-")[2])
        publish_month = int(published.split("-")[1])
        publish_year = int(published.split("-")[0])
    except:
        publish_day = 0
        publish_month = 0
        publish_year = 0

    # Ends
    try:
        ends = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[1]/div[2]/text()[5]').get()
        ends = ends.strip()
        deadline_day = int(ends.split("-")[2])
        deadline_month = int(ends.split("-")[1])
        deadline_year = int(ends.split("-")[0])
    except:
        deadline_day = 0
        deadline_month = 0
        deadline_year = 0

    # Career Level
    try:
        career_level = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[1]/div[2]/span[1]/text()').get(
            )
        if career_level == None:
            career_level = ""
    except:
        career_level = ""

    # Education
    try:
        education = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[1]/div[2]/span[2]/text()').get(
            )
        if education == None:
            education = ""
    except:
        education = ""

    # Experience
    try:
        experience = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[1]/div[2]/span[3]/text()').get(
            )
        if experience == None:
            experience = ""
    except:
        experience = ""

    # Salary
    try:
        salary = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[1]/div[2]/strong/text()').get(
            )
        if "-" in salary:
            salary = salary.split("-")
            min_salary = salary[0].strip()
            min_salary = int(min_salary.replace(".", ""))
            max_salary = salary[1].strip()
            max_salary = int(max_salary.replace('.', ""))
        elif "-" not in salary and salary != "N/A":
            min_salary = int(salary.replace("."))
            max_salary = int(salary.replace("."))
        else:
            min_salary = 0
            max_salary = 0
    except:
        min_salary = 0
        max_salary = 0

    # Vacancy Description
    try:
        v_description = Selector(
            response=page).xpath('//*[@id="loyal"]/div[2]/div/div[1]').get()
        v_description = remove_tags(v_description).strip()
        v_description = v_description.replace('\xa0', " ")
    except:
        v_description = ""
    try:
        if detect(v_description) == "et":
            try:
                v_description_en = Translate(v_description)
            except:
                v_description_en = " "
            v_description_am = v_description
        else:
            v_description_en = v_description
            v_description_am = ""
    except:
        v_description_am = ""
        v_description_en = ""

    # Company Description
    try:
        c_description = Selector(response=page).xpath(
            '//*[@id="loyal"]/div[1]/div[2]/div[2]/div[1]/p/text()').get()
        c_description = c_description.strip()
    except:
        c_description = ""
    try:
        if detect(c_description) == "et":
            try:
                c_description_en = Translate(c_description)
            except:
                c_description_en = " "
            c_description_am = c_description
        else:
            c_description_en = c_description
            c_description_am = ""
    except:
        c_description_am = ""
        c_description_en = ""
# c_descrip ; //*[@id="loyal"]/div[1]/div[2]/div[2]/div[1]/p/text()

    data = {
        "company": company,
        "position": position,
        "logo": logo,
        "person": person,
        "job_type": job_type,
        "email": email,
        "phone": phone,
        "website": website,
        "publish_day": publish_day,
        "publish_month": publish_month,
        "publish_year": publish_year,
        "deadline_day": deadline_day,
        "deadline_month": deadline_month,
        "deadline_year": deadline_year,
        "career_level": career_level,
        "education": education,
        "experience": experience,
        "min_salary": min_salary,
        "max_salary": max_salary,
        "v_description_am": v_description_am,
        "v_description_en": v_description_en,
        "c_description_am": c_description_am,
        "c_description_en": c_description_en,
    }

    print(data)
    return data


# Vacancy("https://rezume.am/job/2184")

示例#8

0

显示文件

文件： piglatin.py 项目： TSomKes/pig-latin

#!/usr/bin/env python3

import os

from translator import Translate

messageWelcome = "Pig Latin"
messageGoodbye = "Goodbye! :)"

# Clear display
os.system('cls')
os.system('clear')

# Print welcome message
print(messageWelcome + ":  " + Translate(messageWelcome))
print("\nTo finish translating, enter 'quit'.\n")

done = False
while not done:
    text = input("Text to translate: ")
    done = text.lower() in ['quit', 'itquay']  # User quitting?
    print(Translate(text) + '\n')

print(Translate(messageGoodbye))

示例#9

0

显示文件

文件： RequestAPI.py 项目： aidiel97/cuacakita

import requests
import json
# import numpy as np
from translator import Translate
from check import Check
import time

translate = Translate()
check = Check()


class RequestAPI:
    def get_by_city(self, c):
        self.url = "http://api.openweathermap.org/data/2.5/forecast"
        self.querystring = {"appid": "2651c986fc0256f04e92c5a71d08e870"}
        self.querystring['q'] = c
        response = requests.request("GET", self.url, params=self.querystring)
        # print("Mengumpulkan data cuaca kota", c)
        print("\r" + "Berhasil mengumpulkan data cuaca kota " + c + ".....")
        return json.loads(response.text)

    def get_by_coordinate(self, lat, lon):
        self.url = "http://api.openweathermap.org/data/2.5/forecast"
        self.querystring = {"appid": "2651c986fc0256f04e92c5a71d08e870"}
        self.querystring['lat'] = lat
        self.querystring['lon'] = lon
        response = requests.request("GET", self.url, params=self.querystring)
        return json.loads(response.text)

    def get_sys(self, c):
        self.url = "http://api.openweathermap.org/data/2.5/weather"

示例#10

0

显示文件

文件： calendar_engine.py 项目： Ian-Gabaraev/spravkagen

 def get_this_month_in_unicode(self):
     this_month_in_cyrillic = self.get_this_month_in_cyrillic()
     return Translate(
         cyrillic=this_month_in_cyrillic).translate_cyrillic_to_unicode()

示例#11

0

显示文件

文件： vacancy.py 项目： Caravan2/scripts

def Vacancy(link):
    print("request sent for Vacancy succesfully")
    url = link
    print(url)
    # headers = {"Accept-Language": "en-US,en;q=0.5"}
    page = requests.get(url)  #headers=headers)

    # Published
    try:
        published = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/div[1]/div[2]/div/div/ul/li[2]/span/text()[2]'
        ).get()
        published = published.strip().split(" ")
        publish_day = int(published[0].split("/")[0])
        publish_month = int(published[0].split("/")[1])
        publish_year = int(published[0].split("/")[2])
    except Exception as e:
        publish_day = 0
        publish_month = 0
        publish_year = 0
    if yesterday_day != publish_day or yesterday_month != publish_month:
        print("Not published yesterday")
        return

    # Location #
    try:
        location = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/div[1]/div[2]/div/div/ul/li[1]/text()'
        ).get()
        location = location.strip()
        location_id = []
        location = {"city": f"{location}", "id": f"{Geonames(location)}"}
        location_id.append(location)
    except:
        location_id = [{'city': 'Yerevan', 'id': '616052'}]

    # Posted by
    try:
        posted_by = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/div[1]/div[2]/p[1]/text()'
        ).get()
        posted_by = posted_by.strip()
    except:
        posted_by = ""

    # Email
    try:
        email = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/div[1]/div[2]/p[2]/text()'
        ).get()
        email = email.strip()
        if email == "":
            email = []
        else:
            email = [email]
    except:
        email = []

    # Workspace
    try:
        workspace = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/ul/li[2]/div[2]/div[2]/p/text()'
        ).get()
        workspace = workspace.strip()
    except:
        workspace = ""

    # Job_type
    try:
        job_type = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/ul/li[3]/div[2]/div[2]/p/text()'
        ).get()
        job_type = job_type.strip()
    except:
        job_type = ""

    # Salary
    try:
        salary = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/ul/li[4]/div[2]/div[2]/p/text()'
        ).get()
        salary = salary.strip().replace("Until ", "")
        if "-" in salary:
            salary = salary.split("-")
            min_salary = int(salary[0].strip())
            max_salary = int(salary[1].strip())
        elif "-" not in salary and salary != '':
            min_salary = int(salary)
            max_salary = int(salary)
        else:
            min_salary = 0
            max_salary = 0
    except:
        min_salary = 0
        max_salary = 0

    # Education
    try:
        education = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/ul/li[5]/div[2]/div[2]/p/text()'
        ).get()
        education = education.strip()
    except:
        education = ""

    # Experience
    try:
        experience = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/ul/li[6]/div[2]/div[2]/p/text()'
        ).get()
        experience = experience.strip()
    except:
        experience = ""

    # Gender
    try:
        gender = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/ul/li[7]/div[2]/div[2]/p/i/@class'
        ).get()
        if "female" in gender:
            gender = "female"
        elif "male" in gender:
            gender = "male"
        else:
            gender = ''
    except:
        gender = ""

    # Age
    try:
        age = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/ul/li[8]/div[2]/div[2]/p/text()'
        ).get()
        age = age.strip()
    except:
        age = ""

    print(1)

    # Description
    try:
        description = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/ul/li[10]/div[2]/div/p/text()'
        ).get()
        description = description.strip()
    except:
        description = ""
    description_en = ""
    description_am = ""
    try:
        if detect(description) == "et":
            try:
                description_en = Translate(description)
            except:
                description_en = ""
            description_am = description
        else:
            description_en = description
            description_am = ""
    except:
        description_en = ""
        description_am = ""

    # Phone
    try:
        phone = Selector(response=page).css(
            '#sidebar-border > div.detailed-info-block.form-inline.clearfix > div.clearfix > div > div.user-details'
        ).extract()
        phones = []
        for phone in phone:
            phone = remove_tags(phone).strip()
            area_code = "374"
            number = phone.replace(" ", "")
            number = number.replace("-", "")
            number = number.replace("(", "")
            number = number.replace(")", "")
            phones.append({'country_code': area_code, "number": number})
    except:
        phone = []

    # Username
    try:
        username = Selector(response=page).xpath(
            '//*[@id="sidebar-border"]/div[1]/div[1]/div/div[1]/div[2]/div[1]/div[2]/h6/a/text()'
        ).get()
        username = username.strip()
    except:
        username = ""

    data = {
        "publish_day": publish_day,
        "publish_month": publish_month,
        "publish_year": publish_year,
        "location_id": location_id,
        "posted_by": posted_by,
        "email": email,
        "workspace": workspace,
        "job_type": job_type,
        "min_salary": min_salary,
        "max_salary": max_salary,
        "education": education,
        "experience": experience,
        "gender": gender,
        "age": age,
        "description_am": description_am,
        "description_en": description_en,
        "phone": phones,
        "username": username
    }

    print(data)
    return data


# Vacancy("https://full.am/en/job/public/view/1163")

# https://full.am/en/job/public/view/12067
# https://full.am/en/job/public/view/1163

示例#12

0

显示文件

文件： vacancy.py 项目： Caravan2/scripts

def Vacancy(link):
    print("request sent for Vacancy succesfully")
    url = link
    print(url)
    page = requests.get(url)  #headers=headers)

    # Description
    try:
        description = Selector(
            response=page).xpath('/html/body/section[2]/div[3]').get()
        description = remove_tags(description)
    except:
        description = ""
    try:
        if detect(description) == "et":
            try:
                description_en = Translate(description)
            except:
                description_en = ""
            description_am = description
        else:
            description_en = description
            description_am = ""
    except:
        description_en = ""
        description_am = ""

    # Company Link
    try:
        c_link = Selector(response=page).xpath(
            '/html/body/section[2]/section/div[1]/div[2]/a/@href').get()
        c_link = "http://hr.am" + c_link
    except:
        c_link = ""

    # Email
    try:
        driver.get(c_link)
        email = driver.find_element_by_xpath(
            '/html/body/section[2]/div[10]/div[1]/a').get_attribute("href")
        email = email.replace('mailto:', "")
        email = [email]
    except:
        email = []

    if email == []:
        try:
            email = email = re.findall(r'[\w\.-]+@[\w\.-]+', description)
        except:
            email = []

    # Phone
    try:
        phone = re.search(r"\d{9}", v_description_en).group()
        phone = [{"country_code": "374", "number": phone}]
    except:
        phone = []

    data = {
        "description_en": description_en,
        "description_am": description_am,
        "c_link": c_link,
        "email": email,
        "phone": phone
    }

    print(data)
    return data


# Vacancy('http://hr.am/vacancy/view/vid/73244/t/')

示例#13

0

显示文件

文件： codeTranslator.py 项目： moein99/Persian-Lexer

 def __init__(self):
     self.engToPerDic = {
         "False": "غلط",
         "None": "هیچی",
         "True": "درست",
         "and": "و",
         "as": "بعنوان",
         "assert": "تاکید",
         "break": "خارج",
         "class": "کلاس",
         "continue": "ادامه",
         "def": "تعریف",
         "del": "حذف",
         "elif": "شرط_دیگر",
         "else": "درغیراینصورت",
         "except": "بجز",
         "finally": "دراخر",
         "for": "برای",
         "from": "از",
         "global": "جهانی",
         "if": "اگر",
         "import": "واردکن",
         "in": "داخل",
         "is": "هست",
         "lambda": "لاندا",
         "nonlocal": "غیرمحلی",
         "not": "نفی",
         "or": "یا",
         "pass": "******",
         "raise": "زیادکن",
         "return": "برگردان",
         "try": "ازمون",
         "while": "تازمانیکه",
         "with": "با",
         "yield": "واگذارکن",
         "self": "خود",
         "print": "چاپ"
     }
     self.perToEndDic = {
         "غلط": "False",
         "هیچی": "None",
         "درست": "True",
         "و": "and",
         "بعنوان": "as",
         "تاکید": "assert",
         "خارج": "break",
         "کلاس": "class",
         "ادامه": "continue",
         "تعریف": "def",
         "حذف": "del",
         "شرط_دیگر": "elif",
         "درغیراینصورت": "else",
         "بجز": "except",
         "دراخر": "finally",
         "برای": "for",
         "از": "from",
         "جهانی": "global",
         "اگر": "if",
         "واردکن": "import",
         "داخل": "in",
         "هست": "is",
         "لاندا": "lambda",
         "غیرمحلی": "nonlocal",
         "نفی": "not",
         "یا": "or",
         "بگذر": "pass",
         "زیادکن": "raise",
         "برگردان": "return",
         "ازمون": "try",
         "تازمانیکه": "while",
         "با": "with",
         "واگذارکن": "yield",
         "خود": "self",
         "چاپ": "print"
     }
     self.translator = Translate()

示例#14

0

显示文件

def Vacancy(link):
    url = link
    page = requests.get(url)

    # Age
    try:
        age = Selector(response=page).xpath(
            '/html/body/div[3]/div[1]/div[5]/div/div[1]/ul/li[2]/div[2]/text()'
        ).get()
    except:
        age = ""

    # Education
    try:
        education = Selector(response=page).xpath(
            '/html/body/div[3]/div[1]/div[5]/div/div[1]/ul/li[3]/div[2]/text()'
        ).get()
    except:
        education = ""

    # Experience
    try:
        experience = Selector(response=page).xpath(
            '/html/body/div[3]/div[1]/div[5]/div/div[1]/ul/li[4]/div[2]/text()'
        ).get()
    except:
        experience = ""

    # Published
    try:
        published = Selector(response=page).xpath(
            '/html/body/div[3]/div[1]/div[5]/div/div[1]/ul/li[5]/div[2]/text()'
        ).get()
        published = published.split(",")
        publish_day = int(published[0].split(" ")[1])
        publish_month = int(months[published[0].split(" ")[0]])
        publish_year = int(published[1].strip())
    except Exception as e:
        publish_day = e
        publish_month = 0
        publish_year = 0

    # Ends
    try:
        ends = Selector(response=page).xpath(
            '/html/body/div[3]/div[1]/div[5]/div/div[1]/ul/li[6]/div[2]/text()'
        ).get()
        ends = ends.split(",")
        deadline_day = int(ends[0].split(" ")[1])
        deadline_month = int(months[ends[0].split(" ")[0]])
        deadline_year = int(ends[1].strip())
    except Exception as e:
        deadline_day = 0
        deadline_month = 0
        deadline_year = 0

    # Phone
    try:
        phone = Selector(response=page).xpath(
            '/html/body/div[3]/div[1]/div[5]/div/div[2]/ul/li[1]/div[2]').get(
            )
        phone = remove_tags(phone)
        phone = phone.split("(")
        if len(phone) == 2:
            phone = phone[1].replace(")", "")
            phone = phone.replace("-", "")
            phone = phone.replace(" ", "")
            phones = [{"country_code": "994", "number": phone}]
        elif len(phone) == 3:
            phones = []
            number1 = phone[1].replace(")", "")
            number1 = number1.replace("-", "")
            number1 = number1.replace(" ", "")
            phones.append({"country_code": "994", "number": number1})
            number2 = phone[2].replace(")", "")
            number2 = number2.replace("-", "")
            number2 = number2.replace(" ", "")
            phones.append({"country_code": "994", "number": number2})
        else:
            phones = []
    except Exception as e:
        phones = []

    # Email
    try:
        driver.get(url)
        email = driver.find_element_by_xpath(
            '/html/body/div[3]/div[1]/div[5]/div/div[2]/ul/li[2]/div[2]/a'
        ).text
        email = [email]
    except:
        email = []

    # Description
    try:
        description = Selector(
            response=page).xpath('/html/body/div[3]/div[1]/div[6]').get()
        description = remove_tags(description)
    except:
        description = ""
    try:
        if detect(description) == "et":
            try:
                description_en = Translate(description)
            except:
                description_en = ""
            description_az = description
        else:
            description_en = description
            description_az = ""
    except:
        description_en = ""
        description_az = ""

    data = {
        "age": age,
        "education": education,
        "experience": experience,
        "publish_day": publish_day,
        "publish_month": publish_month,
        "publish_year": publish_year,
        "deadline_day": deadline_day,
        "deadline_month": deadline_month,
        "deadline_year": deadline_year,
        "phone": phones,
        "email": email,
        "description_az": description_az,
        "description_en": description_en,
    }

    # print(data)
    return data


# Vacancy('https://boss.az/vacancies/161045')

示例#15

0

显示文件

文件： vacancy.py 项目： Caravan2/scripts

def Vacancy(link, cookies):
    print("request sent for Vacancy succesfully")
    url = link
    print(url)
    cookies = { "Cookie": cookies }
    headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36"}
    page = requests.get(url, cookies=cookies)

    # Stack
    try:
        stack = Selector(response=page).xpath('//*[@id="page"]/div/div/div/div/div/div[1]/span[2]/text()').get()
    except:
        stack = ""


    # Education
    try:
        education = Selector(response=page).xpath('//*[@id="page"]/div/div/div/div/div/div[3]/span[contains(.,"Education:")]').get()
        education = education.split("</strong>")[1]
        education = education.split("</span>")[0].strip()
    except:
        education = ""


    # Languages
    try:
        languages = Selector(response=page).xpath('//*[@id="page"]/div/div/div/div/div/div[3]/span[contains(.,"Languages:")]').get()
        languages = languages.split("</strong>")[1]
        languages = languages.split("</span>")[0].strip()
    except:
        languages = ""


    # Email
    try:
        email = Selector(response=page).xpath('//*[@id="page"]/main/div/div/div[2]/div/aside[2]/div/div/span/a/text()').get()
    except:
        email = ""
    if email is None:
        email = ""

    # Logo
    try:
        logo = Selector(response=page).xpath('//*[@id="page"]/main/div/div/div[2]/div/aside[3]/div/div/figure/img/@src').get()
    except:
        logo = ""
    if logo is None:
        logo = ""


    # Description
    try:
        description = Selector(response=page).xpath('//*[@id="page"]/main/div/div/div[1]/div[1]/article/div').get()
        description = remove_tags(description)
        description = description.rstrip()
        description = description.lstrip()
        description = description.replace('*', "")
        description = re.sub(r"\s+", " ", description)
    except:
        description = ""
    if detect(description) == "ru":
        description_ru = description
        description_en = Translate(description)
        description_ka = ""
    elif detect(description) == "et":
        description_ru = ""
        try: 
            description_en = Translate(description)
        except:
            description_en = ""
        description_ka = description
    else:
        description_ru = ""
        description_en = description
        description_ka = ""
# contains(text(),"STODOLINK")


    data = {
        "stack" : stack,
        "education" : education,
        "languages" : languages,
        "email" : email,
        "logo" : logo,
        "description" : description,
        "description_ka" : description_ka,
        "description_ru" : description_ru,
        "description_en" : description_en
    }

    print("Vacancy scraped succesfully")
    # print(data)
    return data

# Vacancy('_ga=GA1.2.2101960191.1593693483; _gid=GA1.2.453973920.1593693483; WSID=dnh4xi0r4g1qhrtdiygzn241; __RequestVerificationToken=-ZO3RUnIkifRk6Z-oYnkY1BO7sljzPZhydaRlB23lP0PyUlYkuV0iw3TrkEAsMFrOxCONP1xxAIZh8qzX2tzB_D5DSiXD8G3RyUdZn-wyGE1; LastVisit=2020-07-02T16:38:08.2821857+04:00; _gat=1', 'https://www.cv.ge/announcement/127911/office-housekeeper')

示例#16

0

显示文件

def Vacancy(link):
    print("request sent for Vacancy succesfully")
    url = link
    print(url)
    # headers = {"Accept-Language": "en-US,en;q=0.5"}
    page = requests.get(url)  #headers=headers)

    # C_Link
    try:
        c_link = Selector(response=page).xpath(
            "/html/body/div[2]/div[3]/div[3]/div[2]/div/div/a/@href").get()
        c_link = "https://staff.am" + c_link
    except:
        c_link = ""

    # Industry
    try:
        industry = Selector(response=page).xpath(
            '/html/body/div[2]/div[3]/div[3]/div[4]/div[1]/div/div/div[1]/div[1]/p[1]/span[2]/text()'
        ).get()
    except:
        industry = ""

    # Views
    try:
        views = Selector(response=page).xpath(
            '/html/body/div[2]/div[3]/div[3]/div[4]/div[1]/div/div/div[1]/div[1]/p[2]/span/text()'
        ).get()
    except:
        views = ""

    # Followers
    try:
        followers = Selector(
            response=page).xpath('//*[@id="followers_count"]/text()').get()
    except:
        followers = ""

    # Employment_Term
    try:
        employment_term = Selector(response=page).xpath(
            '//*[@id="job-post"]/div[1]/div[3]/p[1]').get()
        employment_term = employment_term.split("</span> ")
        employment_term = remove_tags(employment_term[1]).strip()
    except:
        employment_term = ""

    # Category
    try:
        category = Selector(response=page).xpath(
            '//*[@id="job-post"]/div[1]/div[3]/p[2]').get()
        category = category.split("</span> ")
        category = remove_tags(category[1]).strip()
    except:
        category = ""

    # Job_Type
    try:
        job_type = Selector(response=page).xpath(
            '//*[@id="job-post"]/div[1]/div[4]/p[1]').get()
        job_type = job_type.split("</span> ")
        job_type = remove_tags(job_type[1]).strip()
    except:
        job_type = ""

    # Deadline
    try:
        ends = Selector(response=page).xpath(
            '//*[@id="job-post"]/div[1]/div[2]/p/text()').get()
        ends = ends.replace("\n", " ")
        ends = ends.replace(" Deadline: ", "")
        ends = ends.split(" ")
        deadline_day = int(ends[0])
        deadline_month = int(months_en[ends[1]])
        deadline_year = int(ends[2])
    except:
        deadline_day = 0
        deadline_month = 0
        deadline_year = 0

    # Description
    try:
        description = Selector(
            response=page).xpath('//*[@id="job-post"]/div[2]').get()
        description = remove_tags(description)
    except:
        description = ""
    description_en = ""
    description_am = ""
    if detect(description) == "et":
        try:
            description_en = Translate(description)
        except:
            description_en = ""
        description_am = description
    else:
        description_en = description
        description_am = ""

    # Website
    try:
        website = Selector(response=page).xpath(
            '//*[@id="company-contact-details"]/div[1]/p[contains(., "Website")]/a/@href'
        ).get()
        website = [website]
    except:
        website = []

    # Phone Number
    try:
        phone = Selector(response=page).xpath(
            '//*[@id="company-contact-details"]/div[1]/p[contains(., "Phone")]'
        ).get()
        phone = phone.split("</span>")[1].split("</p>")[0].strip()
        if "," in phone:
            phones = []
            phone = phone.split(", ")

            phone1 = phone[0].replace(") ", "")
            number1 = phone1.replace("-", "")
            number1 = number1.replace("(", "")
            phone1 = {"country_code": "374", "number": number1}

            phone2 = phone[1].replace(") ", "")
            number2 = phone2.replace("-", "")
            number2 = number2.replace("(", "")
            phone2 = {"country_code": "374", "number": number2}

            phones.append(phone1)
            phones.append(phone2)
        else:
            number = phone.replace(") ", "")
            number = number.replace("(", "")
            number = number.replace("-", "")
            phones = [{"country_code": "374", "number": number}]

    except:
        phones = []

    # Address
    try:
        address = Selector(response=page).xpath(
            '//*[@id="company-contact-details"]/div[1]/p[contains(., "Address")]'
        ).get()
        address = remove_tags(address)
        address = address.replace("Address: ", "").strip()
        # Garegin Hovsepyan 20, Yerevan, Armenia
    except:
        address = ""

    # About Company
    try:
        c_description = Selector(response=page).xpath(
            '//*[@id="company-details"]/div[1]/div[2]').get()
        c_description = remove_tags(c_description).strip()
    except:
        c_description = ""

    # Canditate Level
    try:
        candidate_level = Selector(response=page).xpath(
            '//*[@id="job-post"]/div[2]/h3[contains(., "candidate level")]/span/text()'
        ).get()
        if candidate_level == "Not defined":
            candidate_level = ""
    except:
        candidate_level = ""

    # Email
    try:
        driver.get(link)
        email = driver.find_element_by_class_name('desc_email').text
        email = [email]
    except:
        email = []
    print("Vacancy Scraped Successfully")

    ccc = Company_Info(c_link)

    # //*[@id="followers_count"]
    data = {
        "c_link": c_link,
        "industry": industry,
        "views": views,
        "followers": followers,
        "employment_term": employment_term,
        "category": category,
        "job_type": job_type,
        "deadline_day": deadline_day,
        "deadline_month": deadline_month,
        "deadline_year": deadline_year,
        "description_en": description_en,
        "description_am": description_am,
        "website": website,
        "phone": phones,
        "address": address,
        "company_description": c_description,
        "candidate_level": candidate_level,
        "email": email,
        "type_of_company": ccc["type_of_company"],
        "N_of_employees": ccc["N_of_employees"],
        "foundation_date": ccc["foundation_date"]
    }

    print("Data is ready to be added to a DB")

    return data


# Vacancy('https://staff.am/en/digital-marketing-specialist-219')

# https://staff.am/en/apariki-zargacman-bazni-asxatakic
# https://staff.am/en/digital-marketing-specialist-219

# //*[@id="job-post"]/div[1]/div[2]/p
# //*[@id="job-post"]/div[1]/div[2]/p

示例#17

0

显示文件

def get_products():
    for prod in links_db.find({"parsed": False}):
        req = requests.get(prod["link"])
        _print.value(prod["link"])
        city = select_one(req, "#df_field_mdebareoba .value", True)

        bread_crumbs = select_many(req, "#bread_crumbs .point1 li a::text")
        geonames_id = geo_names(city)
        deal_type = get_deal_type(bread_crumbs[2], prod['link'])
        property_type = get_property_type(bread_crumbs[1], prod['link'])
        status = get_status(
            select_one(req, "#df_field_built_status .value", True),
            prod["link"])
        street = select_one(req, "#df_field_mdebareoba_level1 .value", True)
        address = select_one(req, "#df_field_mdebareoba_level2 .value", True)
        bedrooms = select_one(req, "#df_field_bedrooms .value", True)
        bathrooms = select_one(req, "#df_field_bathrooms .value", True)
        total_area = string_to_int(
            select_one(req, "#df_field_square_feet .value", True))[0]
        floor = select_one(req, "#df_field_floor .value", True)
        floors = select_one(req, "#df_field_number_of_floors .value", True)
        try:
            _view = int(select_one(req, "#area_listing .count::text"))
        except:
            _view = 0

        outdoor_features = get_outdoor_features(req)
        indoor_features = get_indoor_features(req)
        climate_control = get_climate_control(req)
        details = [{
            "title":
            select_one(req, "#area_listing > h1"),
            "house_rules":
            "",
            "description":
            Translate(
                select_one(req, "#df_field_additional_information .value",
                           True))
        }]
        price = {
            "price_type":
            "total_price",
            "min_price":
            0,
            "max_price":
            0,
            "fix_price":
            converted_price(select_one(req, "#lm_loan_amount::attr(value)"),
                            prod["link"]),
            "currency":
            "USD"
        }
        phones = [{
            "country_code":
            995,
            "number":
            converted_price(select_one(req, "#df_field_phone .value a::text"),
                            prod["link"])
        }]
        files = get_images(req)
        try:
            real_estate_db.insert_one({
                "location": {
                    "country": {
                        "id": "GE"
                    },
                    "city": {
                        "id": geonames_id,
                        "name": city,
                        "subdivision": ""
                    },
                    "street": street,
                    "address": address,
                },
                "created_at": datetime.datetime.utcnow(),
                "deal_type": deal_type,
                "type_of_property": [property_type],
                "status": status,
                "bedrooms": bedrooms,
                "bathrooms": bathrooms,
                "total_area": total_area,
                "metric": "feet_square",
                "floor": floor,
                "floors": floors,
                "car_spaces": 0,
                "is_agent": True,
                "outdoor_features": outdoor_features,
                "indoor_features": indoor_features,
                "climate_control": climate_control,
                "detail": details,
                "price": price,
                "phones": phones,
                "files": files,
                "source": "Home.ge",
                "view": _view
            })
            links_db.update_one({"link": prod["link"]},
                                {"$set": {
                                    "parsed": True
                                }})
        except:
            log_error(req.url, "პროდუქტის", True)

示例#18

0

显示文件

文件： list.py 项目： Caravan2/scripts

                    logo = driver.find_element_by_xpath(
                        '//*[@id="uinfo"]/div[1]/a/img').get_attribute("src")
                except:
                    logo = ""

                # Description
                try:
                    description = driver.find_element_by_xpath(
                        '//*[@id="pcontent"]/div/div[3]').text
                except:
                    description = ""
                description_en = ""
                description_am = ""
                if detect(description) == "et":
                    try:
                        description_en = Translate(description)
                    except:
                        description_en = ""
                    description_am = description
                else:
                    description_en = description
                    description_am = ""

                # Phone
                try:
                    phone = driver.find_element_by_xpath(
                        '//*[@id="uinfo"]/div[2]/div[2]/a').click()
                    phone = driver.find_element_by_xpath(
                        '//*[@id="callPhoneInfo"]/div[3]/div').text
                    if "\n" not in phone:
                        phone = phone.replace(" ", "")

示例#19

0

显示文件

def all_reply(message):
    bot.send_message(message.chat.id, Translate(message.text))

示例#20

0

显示文件

    'http': 'http://10.10.1.10:3128',
    'https': 'http://10.10.1.10:1080',
}

for i in range(58214, 2000000):
    url = f"https://www.e-register.am/en/companies/{i}"

    page = requests.get(url)

    name_am = Selector(
        response=page).xpath('//*[@id="page"]/div[1]/text()').get()
    if name_am is None:
        print("No company on:", i)
        continue

    name_en = Translate(name_am)

    registration_number = Selector(response=page).xpath(
        '//*[@id="page"]/table[3]/tr[2]/td[2]/text()').get()

    foundation_date = registration_number.split("/")[1].strip()

    registration_number = registration_number.split("/")[0].strip()

    tax_id = Selector(response=page).xpath(
        '//*[@id="page"]/table[3]/tr[3]/td[2]/text()').get()

    z_code = Selector(response=page).xpath(
        '//*[@id="page"]/table[3]/tr[4]/td[2]/text()').get()

    data = {

示例#21

0

显示文件

def Vacancy(link, location_id, cookies):
    print("request sent for Vacancy succesfully")
    url = link
    print(url)
    cookies = { "Cookie": cookies }
    headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36"}
    page = requests.get(url, cookies=cookies)

    #Data
    soup = BeautifulSoup(page.text, 'html.parser')
    details = soup.find('div', attrs={"class": "anncmt-details"})


    # --------------------------------------------------------------------------------  Requirements  ------------------------------------------------------------------------------------------------
    # Position
    try:
        position = soup.find('div', attrs={"class": "anncmt-title"}).text
        position = position.lstrip()
        position = position.rstrip()
    except:
        position = ""
        print("There is no vacancy anymore")
    print("Position: ", position)


    # Company
    try:
        company = soup.find('div', attrs={"class": "anncmt-customer"}).text
        company = company.lstrip()
        company = company.rstrip()
    except:
        company = ""
        print("There is no vacancy anymore")
    print("Company: ", company)


    # Dates
    try:
        published = str(details).split("<strong>Dates:</strong>")
        published = published[1].split("-")
        ends = published[1].split("</td>")
        ends = ends[0].lstrip()
        ends = ends.rstrip()
        ends = ends.split()
        ends = ends[0]+"/"+months[f"{ends[1]}"]
        published = published[0].lstrip()
        published = published.rstrip()
        published = published.split()
        published = published[0]+"/"+months[f"{published[1]}"] #Converting verbal month into numeric
    except:
        published = ""
        ends = ""
    print("Published: ", published)
    print("Ends: ", ends)


    # # Location
    # try:
    #     location = str(details).split("<strong>Location:</strong>")
    #     location = location[1].split("</td>")
    #     location = location[0].lstrip()
    #     location = location.rstrip()
    #     location = location.replace("<span>", "")
    #     location = location.replace("</span>", "")
    #     if "," in location:
    #         location_id = []
    #         locations = location.split(',')
    #         for location1 in locations:
    #             location1 = location1.lstrip()
    #             location1 = location1.rstrip()
    #             try:
    #                 # print(Geonames(location1))
    #                 location_id.append({ "Location" : f"{location1}", "ID" : f"{Geonames(location1)}" } )
    #             except:
    #                 location_id.append( {"Location" : f"{location1}", "ID" : "" } )
    #     else:
    #         location_id = [ { "Location" : f"{location}", "ID" : f"{Geonames(location)}" } ]
    # except:
    #     location_id = [{"Location" : "", "ID" : ""}]
    # print("Location: ", location_id)


    # Job Type
    try:
        jtype = str(details).split("<strong> Employment form:</strong>")
        jtype = jtype[1].split("</td>")
        jtype = jtype[0].lstrip()
        jtype = jtype.rstrip()
    except:
        jtype = ""
    print("Job_Type: ", jtype)


    # Salary + Bonuses
    try:
        salary = str(details).split("<strong> Salary:</strong>")
        salary = salary[1].split("</td>")
        salary = salary[0].lstrip()
        salary = salary.rstrip()
        if "+" in salary and "-" not in salary:
            max_salary = salary.split("+")[0].rstrip()
            min_salary = max_salary
            max_salary = int(max_salary)
            bonuses = "Yes"
        elif "+" in salary and "-" in salary:
            salary = salary.split("+")[0].rstrip()
            max_salary = salary.split("-")[1]
            max_salary = int(max_salary)
            min_salary = salary.split("-")[0]
            min_salary = int(min_salary)
            bonuses = "Yes"
        elif "+" not in salary and "-" in salary:
            max_salary = salary.split("-")[1]
            max_salary = int(max_salary)
            min_salary = salary.split("-")[0]
            min_salary = int(min_salary)
            bonuses = "No"
        else:
            min_salary = max_salary = salary
            bonuses = "No"
    except:
        min_salary = 0
        max_salary = 0
        bonuses = "No"
    print("Min_Salary: ", min_salary)
    print("Max_Salary: ", max_salary)
    print("Bonuses: ", bonuses)


    # Experience
    try:
        experience = str(details).split("<strong> Experience:</strong>")
        experience = experience[1].split("</td>")
        experience = experience[0].lstrip()
        experience = experience.rstrip()
    except:
        experience = ""
    print("Experience: ", experience)


    # Education
    try:
        education = str(details).split("<strong> Education:</strong>")
        education = education[1].split("</td>")
        education = education[0].lstrip()
        education = education.rstrip()
    except:
        education = ""
    print("Education: ", education)


    # Languages
    try:
        languages = str(details).split("<strong> Languages:</strong>")
        languages = languages[1].split("</span>")
        languages = languages[0].replace("<span>", "").lstrip()
    except:
        languages = ""
    print("Languages: ", languages)


    # Driver's License
    try:
        dLicense = str(details).split("<strong> Driving licence:</strong>")
        dLicense = dLicense[1].split("</td>")
        dLicense = dLicense[0].lstrip()
        dLicense = dLicense.rstrip()
        dLicense = dLicense.replace("<span>", "")
        dLicense = dLicense.replace("</span>", "")
    except:
        dLicense = ""
    print("D_License: ", dLicense)




    # -------------------------------------------------------------------------  Info  ------------------------------------------------------------------------------
    # E-mail
    # try:
    #     email = str(details).split("<strong> ელ. ფოსტა:</strong>")
    #     email = email[1].split("</td>")
    #     email = email[0].lstrip()
    #     raw_email = email.rstrip()
    #     email =  re.findall(r'[\w\.-]+@[\w\.-]+', raw_email)[0]
    # except:
    #     email = ""
    # print("Email: ", email)


    # Phone Number
    try:
        pNumber = str(details).split("<strong> Phone:</strong>")
        pNumber = pNumber[1].split("</td>")
        pNumber = pNumber[0].lstrip()
        pNumber = pNumber.rstrip()
    except:
        pNumber = ""
    print("Phone_Number: ", pNumber)


    # Address
    try:
        address = str(details).split("<strong> Address:</strong>")
        address = address[1].split("</td>")
        address = address[0].lstrip()
        address = address.rstrip()
    except:
        address = ""
    print("Address: ", address)

    # Description
    try:
        description = soup.find('div', attrs={"class": "firm-descr"}).text
        description = description.rstrip()
        description = description.lstrip()
    except:
        description = ""
    print("Description: ", description)
    
    if detect(position) == "ru":
        description_ru = description
        description_en = Translate(description)
        description_ka = ""
    elif detect(position) == "et":
        description_ru = ""
        description_en = Translate(description)
        description_ka = description
    else:
        description_ru = ""
        description_en = description
        description_ka = ""
    print("----------------------------------------------------------------------------------------------------------------------------------------------------------------------")

    # Email
    try:
        email = re.findall(r'[\w\.-]+@[\w\.-]+', description)[0]
    except:
        try:
            email = str(details).split("<strong> Email:</strong>")
            email = email[1].split("</td>")
            email = email[0].lstrip()
            raw_email = email.rstrip()
            email =  re.findall(r'[\w\.-]+@[\w\.-]+', raw_email)[0]
        except:
            email = ""
    print("Email: ", email)






    # Web Link
    try:
        web_link = re.search("(?P<url>https?://[^\s]+)", description).group("url")
    except:
        web_link = ""
    print("Web_link: ", web_link)






    data = {
        "Job_Type": jtype,
        "Min_Salary": min_salary,
        "Max_Salary": max_salary,
        "Bonuses": bonuses,
        "Experience": experience,
        "Education": education,
        "Languages": languages,
        "Driver_License": dLicense,
        "Location": location_id,
        "Address" : address,
        "Email" : email,
        "Phone_Number": pNumber,
        "Web_Link": web_link,
        "Description_en": description_en,
        "Description_ru": description_ru,
        "Description_ka": description_ka
    }

    print("returned successfully")
    return data

示例#22

0

显示文件

文件： ChatBot.py 项目： lyricsjoker/exomind

class ChatBot:
    def __init__(self,
                 proxies={},
                 entropy_filter=True,
                 lang='en',
                 entropy_top=3,
                 query_top=100,
                 fraction=5):
        self.__ngd = NGD(proxies)
        #self.__ngd.set_context('site:imsdb.com')
        self.__cache = {}
        self.__min_ent = 0.0
        self.__entropy_filter = entropy_filter
        self.__lang = lang
        self.__entropy_top = entropy_top
        self.__fraction = fraction
        self.__query_top = query_top
        self.__translator = Translate()

        self.__lock = Lock()
        self.__voc_translator = None

        random.seed(666)

    def set_voc_translator(self, voc_trans=None):
        self.__voc_translator = voc_trans

    def entropy_min(self, e_min):
        self.__min_ent = e_min

    def reply_to(self, chat_line):
        self.__lock.acquire()
        try:
            chat_line = normalize_token(chat_line)
            if self.__lang != 'en':
                chat_line = self.__translator.translate(
                    chat_line, self.__lang, 'en')
            snippets, answers = [], []
            while len(answers) == 0:
                snippets = self.__ngd.snippets_query(
                    '"%s" site:imsdb.com' % chat_line, self.__query_top)
                answers = self.__extract_answers(snippets, chat_line)
                if len(answers) == 0:
                    chat_line = chat_line[:-1]
                    if len(chat_line) == 0:
                        break
                    continue

            probabilities = self.__build_probs(answers)
            new_ans = []
            for i in range(min(len(answers), self.__fraction)):
                new_ans.append(self.__choose_random_answer(probabilities))
            answers = list(set(new_ans))

            new_answers = []
            for ans in answers:
                if self.__entropy_filter:
                    val = self.__ngd.distance(
                        ('"%s"' % chat_line, '"%s"' % ans.encode()))
                    if val:
                        print 'search engine distance (choosing response): %s %f' % (
                            ans, val)
                        time.sleep(0.25)
                        new_answers.append((ans, val))
            if self.__entropy_filter:
                new_answers.sort(second_compare)
                #new_answers.reverse()
                new_answers = map(lambda x: x[0],
                                  new_answers[:self.__entropy_top])
                answers = filter(lambda x: x in new_answers, answers)

            ans = None
            if len(answers) > 0:
                ans = answers[random.randint(0, len(answers) - 1)]

            if not ans: ans = 'ah'

            # use vocabulary translator, if available
            if self.__voc_translator:
                ans = self.__voc_translator(ans)

            if ans and self.__lang != 'en':
                ans = self.__translator.translate(ans, 'en',
                                                  self.__lang).lower()
            if not ans: ans = 'ah'
            return ans
        finally:
            self.__lock.release()  # release lock, no matter what

    def __extract_answer(self, snippet, chat_line):
        # [^\.!?]+
        snippet = normalize_token(snippet)
        snippet = re.sub('\([^\)]+\) ', '', snippet)
        snippet = re.sub('\[[^\)]+\] ', '', snippet)
        iterator = re.finditer('[A-Z][A-Z]+ [^\.!?]+[\.!?]', snippet)
        lines = []
        for match in iterator:
            line = match.group()
            #print line
            line_s = line.split(' ')
            line = ' '.join(line_s[1:]).lower()
            line = html2text(line)
            #print line
            line = line.replace('_', '').replace('\n', '')
            #line = re.sub( '\([^\)]+\) ', '', line)
            if not '-' in line and not ':' in line and not '**' in line and not '(' in line and not ')' in line and not '"' in line:
                if len(line) > 0 and line[-1] == '.':
                    line = line[:-1]
                lines.append(line)
            #ret.append(strip(match))
            #print strip(match)
        if len(lines) == 0:
            return ''
        prev = lines[0].lower()
        ret = []
        for i in range(1, len(lines)):
            if chat_line.lower() in prev:
                ret.append(lines[i].lower())
            prev = lines[i].lower()
        return ret

    def __extract_answers(self, snippets, chat_line):
        ret, ret_titles = [], []
        for snippet in snippets:
            anss = self.__extract_answer(snippet, chat_line)
            for ans in anss:
                if ans != '':
                    ret.append(ans.strip())
        return ret

    def __build_probs(self, answers):
        d = {}
        for ans in answers:
            if not ans in d:
                d[ans] = 1
            else:
                d[ans] += 1
        ret = []
        for ans, cnt in d.iteritems():
            ret.append((ans, float(cnt) / len(answers)))
        return ret

    def __choose_random_answer(self, probs):
        rand_float = random.random()
        sum = 0.0
        ret = None
        for ans, prob in probs:
            sum += prob
            if sum >= rand_float:
                ret = ans
                break
        return ret

    def start(self):
        msg = ''
        while msg != 'bye':
            msg = raw_input('You: ')
            ans, title = self.reply_to(msg.strip())
        print 'end of chat.'

    def save_cache(self):
        self.__ngd.save_cache()

示例#23

0

显示文件

def get_products():
    from translator import Translate
    for prod in links_db.find({"parsed":False, "source": "ss.ge"0}):
        req = requests.get(prod["link"])
        _print.value(prod["link"])
        
        bread_crumbs = select_many(req, ".detailed_page_navlist ul li a::text")


        city = bread_crumbs[3]

        geonames_id = geo_names(city)

        deal_type = get_deal_type(bread_crumbs[2].strip(), prod['link'])

        property_type = get_property_type(bread_crumbs[1].strip(), prod['link'])

        status = get_status(select_one(req, "fieldValueStatusId2::text"), prod["link"])
        street = select_one(req, ".StreeTaddressList.realestatestr:text").strip()
        address = street
        bedrooms = int(select_many(req, ".ParamsHdBlk text::text")[2])
        bathrooms = ""
        total_area = string_to_int(select_many(req, ".ParamsHdBlk text::text")[0])[0]
        floor = string_to_int(select_many(req, ".ParamsHdBlk text::text")[3])
        floors = string_to_int(select_one(req, ".ParamsHdBlk text text span::text"))
        try:
            _view = int(select_one(req, "#.article_views span::text"))
        except:
            _view = 0

        outdoor_features =  get_outdoor_features(req)
        indoor_features = get_indoor_features(req)
        climate_control = get_climate_control(req)
        details = [{
            "title": select_one(req, "#area_listing > h1"),
            "house_rules": "",
            "description": Translate(select_one(req, "#df_field_additional_information .value", True))
        }]
        price ={
            "price_type":"total_price",
            "min_price":0,
            "max_price":0,
            "fix_price": converted_price(select_one(req, "#lm_loan_amount::attr(value)"),prod["link"]),
            "currency": "USD"
        }
        phones =  [{
            "country_code":995,
            "number":  converted_price(select_one(req, "#df_field_phone .value a::text"),prod["link"])
        }]
        files = get_images(req)
        pprint({
            "location": {
                "country":{
                    "id":"GE"
                },
                "city": {
                    "id":geonames_id,
                    "name": city,
                    "subdivision": ""
                },
                "street":street,
                "address": address,
            },
            "created_at": datetime.datetime.utcnow(),
            "deal_type": deal_type,
            "type_of_property": [property_type],
            "status": status,
            "bedrooms": bedrooms,
            "bathrooms": bathrooms,
            "total_area":total_area,
            "metric":"feet_square",
            "floor":floor,
            "floors":floors,
            "car_spaces":0,
            "is_agent":	True,
            "outdoor_features":outdoor_features,
            "indoor_features":indoor_features,
            "climate_control":climate_control,
            "detail":details,
            "price":price,
            "phones":phones,
            "files": files,
            "source": "Home.ge",
            "view":_view
        })

示例#24

0

显示文件

文件： vacancy.py 项目： Caravan2/scripts

def Vacancy_info(url):
    print(url)
    page = requests.get(url)

    # Description
    try:
        description = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[1]/div[2]/div[4]').get()
        description = remove_tags(description)
        description = description.rstrip()
        description = description.lstrip()
        description = re.sub(r"\s+", " ", description)
        print(description)
    except:
        description = ""
    if detect(description) == "ru":
        description_ru = description
        description_en = Translate(description)
        description_ka = ""
    elif detect(description) == "et":
        description_ru = ""
        try:
            description_en = Translate(description)
        except:
            description_en = ""
        description_ka = description
    else:
        description_ru = ""
        description_en = description
        description_ka = ""

    # Email
    try:
        email = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[1]/div[2]/div[2]/div[2]/div/div/a/@href'
        ).get()
        email = email.replace("mailto:", "")
    except:
        email = ""

    # Location
    try:
        location = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[1]/div[2]/div[3]/div[2]/div[1]/div[2]/span/text()'
        ).get()
        location_id = []
        try:
            location_id.append({
                "city": f"{location}",
                "id": f"{Geonames(location)}"
            })
        except:
            location_id.append({"city": f"{location}", "id": "611717"})
    except:
        location_id = [{"city": "Tbilisi", "id": "611717"}]

    # Category
    try:
        category = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[1]/div[2]/div[3]/div[2]/div[2]/div[2]/span[1]/text()'
        ).get()
    except:
        category = ""

    # Stack
    try:
        stack = Selector(response=page).xpath(
            '/html/body/div[2]/div/div[1]/div[2]/div[3]/div[2]/div[4]/div[2]/text()'
        ).get()
        if "სრული განაკვეთი" in stack:
            stack = "Full-Stack"
    except:
        stack = ""

    data = {
        "description_en": description_en,
        "description_ka": description_ka,
        "description_ru": description_ru,
        "email": email,
        "location": location_id,
        "category": category,
        "stack": stack
    }

    print("Vacancy Scraped Succesfully")
    return data

示例#25

0

显示文件

文件： vacancy.py 项目： Caravan2/scripts

def Vacancy(link):
    print("request sent for Vacancy succesfully")
    url = link
    print(url)
    # headers = {"Accept-Language": "en-US,en;q=0.5"}
    page = requests.get(url)  #headers=headers)

    # Location
    try:
        location = Selector(response=page).xpath(
            '/html/body/main/section/div/div[1]/div[3]/ul/li[3]/a/text()').get(
            )
        location = location.strip()
        location = location.split(",")[0]
        location = [{"city": location, "id": Geonames(location)}]
    except:
        location = [{"city": "Yerevan", "id": "616052"}]

    # Website
    try:
        website = Selector(response=page).xpath(
            '/html/body/main/section/div/div[1]/div[3]/ul/li[4]/a/@href').get(
            )
        if website is None:
            website = []
        else:
            website = [website]
    except:
        website = []

    # Job Type
    try:
        job_type = Selector(response=page).xpath(
            '/html/body/main/section/div/div[2]/div/ul/li[3]/text()').get()
        job_type = job_type.strip()
    except:
        job_type

    # Published
    try:
        published = Selector(response=page).xpath(
            '/html/body/main/section/div/div[2]/div/ul/li[7]/text()').get()
        published = published.strip()
    except:
        published = ""

    # Salary
    try:
        salary = Selector(response=page).xpath(
            '/html/body/main/section/div/div[2]/div/ul/li[2]/text()').get()
        salary = salary.strip()
        salary = salary.replace("֏", "")
        salary = salary.replace(",", "")
        salary = salary.replace(" ", "")
        salary = int(salary)
    except:
        salary = 0

    # Gender
    try:
        gender = Selector(response=page).xpath(
            '/html/body/main/section/div/div[2]/div/ul/li[4]/text()[2]').get()
        gender = gender.strip()
    except:
        gender = ""

    # Description
    try:
        description = Selector(response=page).xpath(
            '/html/body/main/section/div/div[2]/div/p').get()
        description = remove_tags(description).strip()
    except:
        description = ""
    try:
        if detect(description) == "et":
            try:
                description_en = Translate(description)
            except:
                description_en = ""
            description_am = description
        else:
            description_en = description
            description_am = ""
    except:
        description_en = ""
        description_am = ""

    # Email
    try:
        driver.get(link)
        email = driver.find_element_by_xpath(
            '/html/body/main/section/div/div[2]/div/p').text
        email = re.findall(r'[\w\.-]+@[\w\.-]+', email)
    except Exception as e:
        email = []

    data = {
        "location": location,
        "website": website,
        "job_type": job_type,
        "publish_day": published,
        "salary": salary,
        "gender": gender,
        "description_am": description_am,
        "description_en": description_en,
        "email": email
    }

    # print(data)
    return data


# Vacancy("https://www.worknet.am/en/job/%D5%A2%D5%A1%D5%B6%D5%BE%D5%B8%D6%80-%D5%BA%D5%A1%D5%B0%D5%A5%D5%BD%D5%BF%D5%AB-%D5%A1%D5%B7%D5%AD%D5%A1%D5%BF%D5%A1%D5%AF%D5%AB%D6%81-4656")

示例#26

0

显示文件

文件： vacancy.py 项目： Caravan2/scripts

def Vacancy(link):
    print("request sent for Vacancy succesfully")
    url = link
    # headers = {"Accept-Language": "en-US,en;q=0.5"}
    page = requests.get(url)  #headers=headers)

    # Location
    try:
        location = Selector(response=page).xpath(
            '/html/body/div[2]/table/tr[contains(., "Location:")]').get()
        location = location.split("<td>")[1].split("</td>")[0].replace(
            "&amp;nbsp", " ")
        location = location.split(",")[0]
        location = [{'city': location, 'id': Geonames(location)}]
    except:
        location = [{'city': 'Yerevan', 'id': '616052'}]

    # Company url
    try:
        c_url = Selector(response=page).xpath(
            '/html/body/div[2]/table/tr[contains(., "Company:")]').get()
        c_url = c_url.split('href="')[1].split('">')[0]
    except:
        c_url = ""

    # Vacancy Description
    try:
        description = Selector(response=page).xpath('/html/body/div[4]').get()
        description = remove_tags(description)
        description = description.strip()
        description = description.replace('&amp;nbsp', " ")
    except:
        description = ""
    try:
        if detect(description) == "et":
            try:
                description_en = Translate(description)
            except:
                description_en = ""
            description_am = description
        else:
            description_en = description
            description_am = ""
    except:
        description_en = ""
        description_am = ""

    # Email
    try:
        email = Selector(response=page).xpath('//*[@id="job"]/a/@href').get()
        email = email.replace('mailto:', "")
        email = [email]
    except:
        email = []

    data = {
        "location": location,
        "c_link": c_url,
        "description_am": description_am,
        "description_en": description_en,
        "email": email
    }

    # print(data)
    return data

示例#27

0

显示文件

def Vacancy_Info(link):
    url = link
    page = requests.get(url)

    # Industry
    try:
        industry = Selector(response=page).xpath(
            '/html/body/section/div/div/div/div[2]/div[1]/div/div/div[1]/div[contains(., "Industry:")]'
        ).get()
        industry = industry.split('"font-weight-bold">')[1].split('</span>')[0]
    except:
        industry = ""

    # Salary
    try:
        salary = Selector(response=page).xpath(
            '/html/body/section/div/div/div/div[2]/div[1]/div/div/div[1]/div[contains(., "Salary:")]'
        ).get()
        salary = salary.split('"font-weight-bold">')[1].split('</span>')[0]
        salary = salary.replace(",", "")
        salary = int(salary)
        if salary is None:
            salary = 0
    except:
        salary = 0

    # Employment type
    try:
        employment_type = Selector(response=page).xpath(
            '/html/body/section/div/div/div/div[2]/div[1]/div/div/div[1]/div[contains(., "Employment type:")]'
        ).get()
        employment_type = employment_type.split(
            '"font-weight-bold">')[1].split('</span>')[0]
    except:
        employment_type = ""

    # Ends
    try:
        ends = Selector(response=page).xpath(
            '/html/body/section/div/div/div/div[2]/div[1]/div/div/div[1]/div[contains(., "Deadline:")]'
        ).get()
        ends = ends.split('"font-weight-bold">')[1].split('</span>')[0]
        ends = ends.split('/')
        deadline_day = int(ends[0])
        deadline_month = int(ends[1])
        deadline_year = int(ends[2])
    except:
        deadline_day = 0
        deadline_month = 0
        deadline_year = 0

    # Description
    try:
        description = Selector(response=page).xpath(
            '/html/body/section/div/div/div/div[2]/div[1]/div/div/div[2]').get(
            )
        description = remove_tags(description)
        description = description.strip()
    except:
        description = ""
    if detect(description) == "et":
        try:
            description_en = Translate(description)
        except:
            description_en = ""
        description_am = description
    else:
        description_en = description
        description_am = ""

    # Email
    try:
        email = Selector(
            response=page).xpath('//*[@id="applyEmail"]/text()').get()
    except:
        email = []

    data = {
        "industry": industry,
        "salary": salary,
        "employment_type": employment_type,
        "deadline_day": deadline_day,
        "deadline_month": deadline_month,
        "deadline_year": deadline_year,
        "description_en": description_en,
        "description_am": description_am,
        "email": email
    }

    # print(data)
    return data

示例#28

0

显示文件

def Vacancy(link):
    print("request sent for Vacancy succesfully")
    url = link
    print(url)
    # headers = {"Accept-Language": "en-US,en;q=0.5"}
    page = requests.get(url)  #headers=headers)

    # Company
    try:
        company = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lnkCompany"]/text()'
        ).get()
    except:
        company = ""

    # Website
    try:
        website = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lnkCompany"]/@href'
        ).get()
        website = [website]
    except:
        website = []

    # Position
    try:
        position = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblJobPostTitle"]/text()'
        ).get()
    except:
        position = ""

    # logo
    try:
        logo = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_imgCompanyLogoLink"]/@src'
        ).get()
        logo = "http://jobfinder.am/" + logo
    except:
        logo = ''

    # Job_type
    try:
        job_type = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblPositionType"]/text()'
        ).get()
    except:
        job_type = ""

    # Category
    try:
        category = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblCategory"]/text()'
        ).get()
    except:
        category = ""

    # Experience
    try:
        experience = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblExperience"]/text()'
        ).get()
    except:
        experience = ""

    # Education
    try:
        education = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblEducation"]/text()'
        ).get()
    except:
        education = ""

    # Location
    try:
        location = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblLocation"]/text()'
        ).get()
    except:
        location = ""

    # Published
    try:
        published = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblDate"]/text()'
        ).get()
        published = published.split(" ")
        published = published[0].split("-")
        publish_day = int(published[0])
        publish_month = int(published[1])
        publish_year = int("20" + published[2])
    except:
        publish_day = 0
        publish_month = 0
        publish_year = 0
    if yesterday_day != publish_day or yesterday_month != publish_month:
        print("Not published yesterday")
        return

    # Ends
    try:
        ends = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblDate"]/text()'
        ).get()
        ends = ends.split(" ")
        ends = ends[0].split("-")
        deadline_day = int(ends[0])
        deadline_month = int(ends[1])
        deadline_year = int("20" + ends[2])
    except:
        deadline_day = 0
        deadline_month = 0
        deadline_year = 0

    # Salary
    try:
        salary = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblSalary"]/text()'
        ).get()
        salary = int(salary)
    except:
        salary = 0

    # Age
    try:
        age = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblAge"]/text()'
        ).get()
        if "--------" in age:
            age = ""
    except:
        age = ""

    # Gender
    try:
        gender = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblGender"]/text()'
        ).get()
        if "--------" in gender:
            gender = ""
    except:
        gender = ""

    # Job Description
    try:
        j_description = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblJobDescription"]/text()'
        ).get()
    except:
        j_description = ""

    # Job Responsibilities
    try:
        j_responsibilities = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblJobResponsibilities"]/text()'
        ).get()
    except:
        j_responsibilities = ""

    # Required Qualifications
    try:
        r_qualifications = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblRequiredQualifications"]'
        ).get()
        r_qualifications = remove_tags(r_qualifications)
    except:
        r_qualifications = ""

    # Application Procedure
    try:
        a_procedure = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblApplicationProcedure"]'
        ).get()
        a_procedure = remove_tags(a_procedure)
    except:
        a_procedure = remove_tags(a_procedure)

    v_description = j_description + "\n" + j_responsibilities + "\n" + r_qualifications + "\n" + a_procedure
    try:
        if detect(v_description) == "et":
            try:
                v_description_en = Translate(v_description)
            except:
                v_description_en = ""
            v_description_am = v_description
        else:
            v_description_en = v_description
            v_description_am = ""
    except:
        v_description_en = ""
        v_description_am = ""

    # About Company
    try:
        c_description = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblAboutCompany"]'
        ).get()
        c_description = remove_tags(c_description)
    except:
        c_description = ""
    try:
        if detect(c_description) == "et":
            try:
                c_description_en = Translate(c_description)
            except:
                c_description_en = ""
            c_description_am = c_description
        else:
            c_description_en = c_description
            c_description_am = ""
    except:
        c_description_en = ""
        c_description_am = ""

    # Email
    try:
        email = Selector(response=page).xpath(
            '//*[@id="ctl00_bdyPlaceHolde_jfpanelViewJob_jfJobPreview_lblApplicationProcedure"]/a/text()'
        ).get()
        email = email.strip()
        email = [email]
    except:
        email = []

    # Phone
    try:
        phone = re.search(r"\d{9}", v_description_en).group()
        phone = [{"country_code": "374", "number": phone}]
    except:
        phone = []

    data = {
        "company": company,
        "position": position,
        "website": website,
        "logo": logo,
        "job_type": job_type,
        "category": category,
        "experience": experience,
        "education": education,
        "location": location,
        "publish_day": publish_day,
        "publish_month": publish_month,
        "publish_year": publish_year,
        "deadline_day": deadline_day,
        "deadline_month": deadline_month,
        "deadline_year": deadline_year,
        "salary": salary,
        "age": age,
        "gender": gender,
        "v_description_am": v_description_am,
        "v_description_en": v_description_en,
        "c_description_am": c_description_am,
        "c_description_en": c_description_en,
        "email": email,
        "phone": phone,
    }

    # print(data)
    return data


# Vacancy('http://jobfinder.am/ViewJob.aspx?JobPostingID=49217')

示例#29

0

显示文件

文件： company.py 项目： Caravan2/scripts

def Company_Info(link):
    url = link
    page = requests.get(url)

    # Industry
    try:
        address = Selector(response=page).xpath('/html/body/div[3]/div[1]/div/div/div/div[3]/div/div[1]/p/span[2]/text()').get()
    except:
        address = ""


    try:
        phone = Selector(response=page).xpath('/html/body/div[3]/div[1]/div/div/div/div[3]/div/div[3]/p/a/text()').get()
        number = phone.replace("+", "")
        number = number.replace("374", "")
        number = number.replace("tel: ", "")
        phone = [{"country_code" : "374", "number" : number}]
    except:
        phone = []


    try:
        website = Selector(response=page).xpath('/html/body/div[3]/div[1]/div/div/div/div[3]/div/div[2]/p/a/@href').get()
        if "+" in website or "374" in website:
            phone = website
            number = phone.replace("+", "")
            number = number.replace("374", "")
            number = number.replace("tel: ", "")
            phone = [{"country_code" : "374", "number" : number}]
            website = []
        elif website is None:
            website = []
        else:
            website = [website]
    except:
        website = []

    try:
        description = Selector(response=page).xpath('/html/body/div[3]/div[2]/div/div/div/div[2]/div/div/p/text()').get()
    except:
        description = ""
    try:
        if detect(description) == "et":
            try: 
                description_en = Translate(description)
            except:
                description_en = ""
            description_am = description
        else:
            description_en = description
            description_am = ""
    except:
        description_en = description
        description_am = ""

    data = {
        "address" : address,
        "phone" : phone,
        "website" : website,
        "description_am" : description_am,
        "description_en" : description_en,

    }

    # print(data)
    return data

# Company_Info('https://job.am/en/company/18390/san-holding-spe')