Пример #1
0
def read_job_from_html(skill, html_file):
    """
    read job info from downloaded html file
    :param html_file: contains job info, but sometime the contents are empty.
    """
    html = read_all(html_file)
    soup = make_soup(html)
    detail = soup.find('dl', 'job_detail')

    # in some rare cases, e.g. the job is closed already, then the job info is missing.
    if not detail:
        return None

    job = Job()

    job.job_id = int(soup.find('input', {'id': 'jobid'})['value'])
    job.skill_tag = skill

    log('*** JOB ***')
    title = detail.find('h1')
    log(title['title'])
    log(title.div.text)

    job.title = title['title']
    job.dept = title.div.text

    log('')
    request = detail.find('dd', 'job_request')
    main_features = []
    for s in request.stripped_strings:
        f = s.strip().lstrip(u'职位诱惑 : ').lstrip(u'发布时间:').rstrip(u'发布')
        log(f)
        main_features.append(f)

    assert len(main_features) == 7
    job.salary = main_features[0]
    job.city = main_features[1]
    job.experience = main_features[2]
    job.education = main_features[3]
    job.full_time = main_features[4] == u'全职'
    job.benefits = main_features[5]
    job.published_date = get_published_date(main_features[6], created_on(html_file))

    log('')
    desc_html = []
    desc = detail.find('dd', 'job_bt').find_all('p')
    for bt in desc:
        desc_html.append(unicode(bt))
    job.desc = ''.join(desc_html)
    log(job.desc)

    log('\n*** COMPANY ***\n')
    company = Company()

    comp = soup.find('dl', 'job_company')
    url = comp.dt.a['href']
    pat = re.compile(r'(?P<comp_id>\d+)')
    m = re.search(pat, url)
    log(url)
    company.comp_id = int(m.group('comp_id'))
    job.comp_id = company.comp_id

    log(comp.dt.a.img['src'])
    log(comp.dt.a.div.h2.text.split()[0])
    company.logo = comp.dt.a.img['src']
    company.name = comp.dt.a.div.h2.text.split()[0]

    log('')
    comp_features = comp.dd
    features = []
    for li in comp_features.ul.find_all('li'):
        for ls in li.stripped_strings:
            features.append(ls)

    log(''.join(features))
    if len(features) == 6:
        company.domain = features[1]
        company.size = features[3]
        company.url = features[5]
    else:
        print(u'features ex: ' + html_file)

    log('')
    stage_h = comp_features.h4
    stage_tags = stage_h.find_next_sibling('ul').find_all('li')
    stage = []
    for li in stage_tags:
        for ls in li.stripped_strings:
            stage.append(ls)
    log('\t'.join(stage))
    if len(stage) % 2 == 0:
        for i in xrange(0, len(stage), 2):
            if stage[i] == u'目前阶段':
                company.cur_stage = stage[i + 1]
            elif stage[i] == u'投资机构':
                company.investor = stage[i + 1]
    else:
        print(u'stages ex: ' + html_file)

    log('')
    # address
    if comp_features.div:
        log(comp_features.div.text)
        company.address = comp_features.div.text

    return job, company
Пример #2
0
def create_sample_company():
    # instatiate Company:
    company = Company(name="Eric BLABLA KGB")
    company.set_founder_password("aaa")
    company.set_joining_password("bbb")

    # update database and query the ID of the new company:
    try:
        db.session.add(company)
        db.session.commit()
    except:
        db.session.rollback()
        flash(
            "Any error occured when created the sample company registration. Please try again.",
            "error")
        return redirect(url_for("register_company"))

    registered_company = Company.query.filter_by(
        name="Eric BLABLA KGB").first()

    # instatiate Jhon Do:
    colleague = Colleagues(user_name="jhon_do",
                           email="*****@*****.**",
                           first_name="Jhon",
                           last_name="Do",
                           position="Founder",
                           confirmed=1)

    colleague.set_password("aaa")

    data = {
        "company_id": registered_company.id,
        "colleague": colleague,
        "sample_avatar": "john_do.jpg"
    }

    create_sample_colleague(data)

    # set the founder as Admin with full privilegs:
    registered_colleague = Colleagues.query.filter_by(
        email="*****@*****.**").first()
    # instatiate Admins:
    admin = instatiate_admin(True)
    admin.colleague_id = registered_colleague.id
    try:
        db.session.add(admin)
        db.session.commit()
    except:
        db.session.rollback()
        flash(
            "Any error occured when created sample admin registration. Please try again.",
            "error")
        return redirect(url_for("register_company"))

    # copy logo:
    location = "static/sample_logo/blabla.png"
    destination = f"static/logo/{registered_colleague.company_id}.png"
    shutil.copy2(location, destination)

    # update database:
    company.logo = "png"
    try:
        db.session.commit()
        print("Company logo copied.")
    except:
        db.session.rollback()
        print("An error occured when copied logo.")

    # instatiate Jane Do:
    colleague = Colleagues(user_name="jane_do",
                           email="*****@*****.**",
                           first_name="Jane",
                           last_name="Do",
                           position="Co-Founder",
                           confirmed=1)
    colleague.set_password("aaa")
    data = {
        "company_id": registered_company.id,
        "colleague": colleague,
        "sample_avatar": "jane_do.png"
    }

    create_sample_colleague(data)

    # instatiate Do Do:
    colleague = Colleagues(user_name="dodo",
                           email="*****@*****.**",
                           first_name="Do",
                           last_name="Do",
                           position="dodo",
                           confirmed=1)
    colleague.set_password("aaa")
    data = {
        "company_id": registered_company.id,
        "colleague": colleague,
        "sample_avatar": "dodo.svg"
    }

    create_sample_colleague(data)

    # instatiate x more colleagues:
    x_more = 20

    usernames = open("fake_dataset/username.txt").readlines()
    emails = open("fake_dataset/fake_email.txt").readlines()
    first_names = open("fake_dataset/first_name.txt").readlines()
    last_names = open("fake_dataset/last_name.txt").readlines()
    positions = open("fake_dataset/position.txt").readlines()

    for x in range(x_more):

        colleague = Colleagues(
            user_name=get_random_item(usernames).strip(),
            email=get_random_item(emails),
            first_name=get_random_item(first_names),
            last_name=get_random_item(last_names).lower().title(),
            position=get_random_item(positions),
            confirmed=1)
        colleague.set_password("aaa")
        data = {
            "company_id": registered_company.id,
            "colleague": colleague,
            "sample_avatar": None
        }

        create_sample_colleague(data)

    # create sample Idea Box:
    admin = Admins.query.filter(
        Admins.colleague_id == registered_colleague.id).first()
    for x in range(2):
        new_box = Boxes(name=lorem.sentence().replace(".", ""),
                        description=lorem.paragraph(),
                        close_at=str_to_date(
                            add_day(str_to_date(today()),
                                    x).strftime('%Y-%m-%d')),
                        admin_id=admin.id)

        try:
            print("Trying to add new Idea Box to the database...")
            db.session.add(new_box)
            db.session.commit()
        except SQLAlchemyError as e:
            error = str(e.__dict__['orig'])
            print("**************************************")
            print(error)
            print("New Idea Box not created!")
            print("new_box.name: ", new_box.name)
            print("new_box.description: ", new_box.description)
            print("new_box.close_at: ", new_box.close_at)
            print("new_box.admin_id: ", new_box.admin_id)
            db.session.rollback()

    # create sample Idea:
    colleagues = Colleagues.query.filter(
        Colleagues.company_id == registered_company.id).all()
    boxes = db.session.query(
        Boxes, Admins, Colleagues).filter(Boxes.admin_id == admin.id).all()
    for x in range(7):
        colleague = get_random_item(colleagues)
        sign = [
            "incognito", colleague.user_name, colleague.first_name,
            colleague.fullname()
        ]
        idea = Ideas(idea=lorem.paragraph(),
                     sign=get_random_item(sign),
                     box_id=get_random_item(boxes).Boxes.id,
                     colleague_id=colleague.id)
        db.session.add(idea)

    try:
        db.session.commit()
    except:
        db.session.rollback()

    print("The sample company registered successfully!")