def extract_traininfo(text): train = resume_struct.get_training_struct() for line in text.split('\n'): m_train = re.search(train_reg, line) if m_train: timestamp = match_timestamp.match_timestamp_by_reg(train_reg, line) train["start_time"], train["end_time"], train[ "so_far"] = StringUtils.transform_timestamp(timestamp) train["name"] = m_train.group("train").strip() if len(train["name"].split("\t")) == 2: train["authority"] = train["name"].split("\t")[0] train["name"] = train["name"].split("\t")[1] mauth = re.search(u"^培训机构:(.+)", line) if mauth: train["authority"] = mauth.group(1).strip() mcity = re.search(u"^培训地点:(.+)", line) if mcity: train["city"] = mcity.group(1).strip() mdesc = re.search(u"^培训描述:(.+)", line) if mdesc: train["description"] = mdesc.group(1).strip() return train
def extract_projectinfo(text): project = resume_struct.get_project_struct() project["ori_text"] = text lines = text.split("\n") isDesc, isResp = False, False for preline, line in izip([""] + lines, lines): m_proj = re.search(project_reg, line) if m_proj: timestamp = match_timestamp.match_timestamp_by_reg( project_reg, line) project["name"] = m_proj.group("project").strip() project["start_time"], project["end_time"], project[ "so_far"] = StringUtils.transform_timestamp(timestamp) m_desc = re.search(u"项目描述(:|:)", line) if m_desc: line = re.sub(u"项目描述(:|:)", "", line).strip() isDesc, isResp = True, False m_resp = re.search(u"责任描述(:|:)", line) if m_resp: line = re.sub(u"责任描述(:|:)", "", line).strip() isDesc, isResp = False, True pass if isDesc: project["describe"] += '\n' + line if project[ "describe"] and line else line if isResp: project["responsibilities"] += '\n' + line if project[ "responsibilities"] and line else line return project
def extract_certinfo(text): cert = resume_struct.get_certificate_struct() for line in text.split('\n'): m_cert = re.search(certi_reg, line) if m_cert: timestamp = match_timestamp.match_timestamp_by_reg(certi_reg, line) cert["name"] = m_cert.group("name").strip() cert["start_time"], _, _ = StringUtils.transform_timestamp( timestamp) return cert
def extract_eduinfo(expblock): edu = resume_struct.get_education_struct() edu["ori_text"] = expblock for line in expblock.split("\n"): m = re.search(edu_reg, line) if m: timestamp = match_timestamp.match_timestamp_by_reg(edu_reg, line) edu["school_name"] = m.group("school").strip() edu["start_time"], edu["end_time"], edu[ "so_far"] = StringUtils.transform_timestamp(timestamp) edu["degree"] = match_education.match_degree(m.group('degree'), 99) edu["discipline_name"] = m.group('discipline').strip() return edu
def extract_projectinfo(text): project = resume_struct.get_project_struct() project["ori_text"] = text time_found = False isResp, isDesc, isAchi = False, False, False for line in text.split('\n'): m_proj = re.search(project_reg, line) if m_proj: timestamp = match_timestamp.match_timestamp_by_reg( project_reg, line) project["start_time"], project["end_time"], project[ "so_far"] = StringUtils.transform_timestamp(timestamp) time_found = True continue if time_found: project["name"], time_found = line, False continue m_posi = re.search(u"项目职务(:|:)\s*(?P<posi>.+)", line) if m_posi: project["position_name"] = m_posi.group("posi") m_corp = re.search(u"所在公司(:|:)(?P<corp>.+)", line) if m_corp: project["corporation_name"] = m_corp.group("corp") m_desc = re.search(u"项目简介(:|:)", line) if m_desc: line = re.sub(u"项目简介(:|:)", "", line).strip() isResp, isDesc, isAchi = False, True, False m_resp = re.search(u"项目职责(:|:)", line) if m_resp: line = re.sub(u"项目职责(:|:)", "", line).strip() isResp, isDesc, isAchi = True, False, False m_achi = re.search(u"项目业绩(:|:)", line) if m_achi: line = re.sub(u"项目业绩(:|:)", "", line).strip() isResp, isDesc, isAchi = False, False, True pass if isDesc: project["describe"] += '\n' + line if project[ "describe"] and line else line if isResp: project["responsibilities"] += '\n' + line if project[ "responsibilities"] and line else line if isAchi: project["achivement"] += "\n" + line if project[ "achivement"] and line else line pass return project
def extract_eduinfo(expblock): edu = resume_struct.get_education_struct() edu["ori_text"] = expblock for line in expblock.split("\n"): m = re.search(edu_reg, line) if m: edu["school_name"] = m.group("school").strip() edu["discipline_name"] = m.group("disc").strip() edu["degree"] = match_education.match_degree( m.group("degree").strip(), 99) timestamp = match_timestamp.match_timestamp_by_reg(edu_reg, line) edu["start_time"], edu["end_time"], edu[ "so_far"] = StringUtils.transform_timestamp(timestamp) else: edu["discipline_desc"] += "\n" + line.strip( ) if edu["discipline_desc"] else line.strip() edu["discipline_desc"] = re.sub(u"^专业描述(:|:)", "", edu["discipline_desc"]).strip() return edu
def extract_workinfo(text): work = resume_struct.get_emplyment_struct() work["ori_text"] = text last_line, isResp = "", False for line in text.split('\n'): m_time = re.search(work_reg, line) if m_time: timestamp = match_timestamp.match_timestamp_by_reg(work_reg, line) work["start_time"], work["end_time"], work[ "so_far"] = StringUtils.transform_timestamp(timestamp) last_line = "time" continue if last_line == "time": work["corporation_name"] = line last_line = "corp_name" continue if last_line == "corp_name": work["position_name"] = line last_line = "" continue m_loc = re.search(u"所在地区(:|:)(?P<loc>.+)", line) if m_loc: work["city"] = m_loc.group("loc") m_resp = re.search(u"职责业绩(:|:)", line) if m_resp: line = re.sub(u".*职责业绩(:|:)", "", line) isResp = True if isResp: work["responsibilities"] += "\n" + line if work[ "responsibilities"] and line else line pass work["responsibilities"] = re.sub(u"^工作描述(:|:)", "", work["responsibilities"]).strip() return work
def extract_workinfo(text): work = resume_struct.get_emplyment_struct() work["ori_text"] = text lastline = "not found company" for line in text.split('\n'): if re.search(u"工作描述(:|:)", line): lastline = "position" if lastline == "not found company": m_company = re.search(work_reg, line) if m_company: timestamp = match_timestamp.match_timestamp_by_reg( work_reg, line) work["corporation_name"] = clean_company_name( m_company.group("company").strip()) work["start_time"], work["end_time"], work[ "so_far"] = StringUtils.transform_timestamp(timestamp) lastline = "company name" pass elif lastline == "company name": items = line.split("|") if len(items) > 0: work["industry_name"] = items[0].strip() lastline = "industry" continue elif lastline == "industry": items = re.split("\s+", line) if len(items) > 1: work["architecture_name"] = items[0] work["position_name"] = items[1] lastline = "position" elif lastline == "position": work["responsibilities"] += '\n' + line if work[ "responsibilities"] else line pass work["responsibilities"] = re.sub(u"^工作描述(:|:)", "", work["responsibilities"]).strip() return work
def extract_workinfo(text): work = resume_struct.get_emplyment_struct() work["ori_text"] = text last_industry = False for line in text.split('\n'): m_company = re.search(work_reg, line) if m_company: timestamp = match_timestamp.match_timestamp_by_reg(work_reg, line) work["corporation_name"] = m_company.group("company").strip() work["corporation_name"] = clean_company_name( work["corporation_name"]) work["start_time"], work["end_time"], work[ "so_far"] = StringUtils.transform_timestamp(timestamp) m_position = re.search(u"职位名称(:|:)(?P<pos>.+)部门(:|:)(?P<arc>.+)", line) if m_position: work["position_name"] = m_position.group("pos").replace( u"(兼职)", "").strip() work["architecture_name"] = m_position.group("arc") m_industry = re.search(u"(行业|所属行业)(:|:)\s*(?P<ind>.+?)(\s|$)", line) if m_industry: work["industry_name"] = m_industry.group("ind").strip() last_industry = True continue if not work["position_name"] and last_industry: if len(line.split("\t")) == 2: work["architecture_name"] = line.split("\t")[0].strip() work["position_name"] = line.split("\t")[1].strip() if len(line.split("\t")) == 1: work["position_name"] = line.split("\t")[0].strip() continue if last_industry: work["responsibilities"] += "\n" + line if work[ "responsibilities"] else line pass work["responsibilities"] = re.sub(u"^工作描述(:|:)", "", work["responsibilities"]).strip() return work
def extract_eduinfo(expblock): edu = resume_struct.get_education_struct() edu["ori_text"] = expblock lastline = "not found school" for line in expblock.split("\n"): if lastline == "not found school": m = re.search(edu_reg, line) if m: timestamp = match_timestamp.match_timestamp_by_reg( edu_reg, line) edu["school_name"] = re.sub(u"海外经历", "", m.group("school")).strip() edu["start_time"], edu["end_time"], edu[ "so_far"] = StringUtils.transform_timestamp(timestamp) lastline = "school" elif lastline == "school": items = line.split("|") if len(items) >= 2: edu["degree"] = match_education.match_degree(items[0], 99) edu["degree_ori"] = items[0].strip() edu["discipline_name"] = items[1].strip() else: if match_education.match_degree(items): edu["degree"] = match_education.match_degree(items[0]) edu["degree_ori"] = items[0].strip() else: edu["discipline_name"] = items[0].strip() lastline = "degree" elif lastline == "degree": edu["discipline_desc"] += '\n' + line.strip( ) if edu["discipline_desc"] else line.strip() pass pass edu["discipline_desc"] = re.sub(u"^专业描述(:|:)", "", edu["discipline_desc"]).strip() return edu
def extract_eduinfo(expblock): edu = resume_struct.get_education_struct() edu["ori_text"] = expblock time_found = False for line in expblock.split("\n"): m_time = re.search(edu_reg, line) if m_time: timestamp = match_timestamp.match_timestamp_by_reg(edu_reg, line) edu["start_time"], edu["end_time"], edu[ "so_far"] = StringUtils.transform_timestamp(timestamp) time_found = True continue if time_found: edu["school_name"] = line.strip() time_found = False m_dis = re.search(u"专业(:|:)(?P<dis>.+)", line) if m_dis: edu["discipline_name"] = m_dis.group("dis").strip() m_deg = re.search(u"学历(:|:)(?P<deg>.+)", line) if m_deg: edu["degree"] = match_education.match_degree( m_deg.group("deg").strip()) return edu
def extract_projectinfo(text): project = resume_struct.get_project_struct() project["ori_text"] = text lastline = "not found project" isResp, isDesc = False, False for line in text.split('\n'): if lastline == "not found project": m_proj = re.search(project_reg, line) if m_proj: timestamp = match_timestamp.match_timestamp_by_reg( project_reg, line) project["name"] = m_proj.group("project").strip() project["name"] = re.sub(u"已关联$", "", project["name"]).strip() project["start_time"], project["end_time"], project[ "so_far"] = StringUtils.transform_timestamp(timestamp) lastline = "project" pass elif lastline == "project": m_desc = re.search(u"项目描述(:|:)", line) if m_desc: line = re.sub(u"项目描述(:|:)", "", line).strip() isDesc, isResp = True, False m_resp = re.search(u"责任描述(:|:)", line) if m_resp: line = re.sub(u"责任描述(:|:)", "", line).strip() isDesc, isResp = False, True pass if isDesc: project[ "describe"] += '\n' + line if project["describe"] else line if isResp: project["responsibilities"] += '\n' + line if project[ "responsibilities"] else line pass return project