Python ResumeParser.get_extracted_data示例，pyresparser.ResumeParser.get_extracted_data Python示例

示例#1

0

显示文件

 def __extract_from_file(self, file):
     if os.path.exists(file):
         print_cyan('Extracting data from: {}'.format(file))
         resume_parser = ResumeParser(file)
         return [resume_parser.get_extracted_data()]
     else:
         return 'File not found. Please provide a valid file name.'

示例#2

0

显示文件

def resume_result_wrapper(args):
    if len(args) == 2:
        print_cyan('Extracting data from: {}'.format(args[0]))
        parser = ResumeParser(args[0], args[1])
    else:
        print_cyan('Extracting data from: {}'.format(args))
        parser = ResumeParser(args)
    return parser.get_extracted_data()

示例#3

0

显示文件

 def __extract_from_remote_file(self, remote_file):
     print_cyan('Extracting data from: {}'.format(remote_file))
     req = Request(remote_file, headers={'User-Agent': 'Mozilla/5.0'})
     webpage = urlopen(req).read()
     _file = io.BytesIO(webpage)
     _file.name = remote_file.split('/')[-1]
     resume_parser = ResumeParser(_file)
     return [resume_parser.get_extracted_data()]

示例#4

0

显示文件

文件： command_line.py 项目： hafees500/resparser

 def __extract_from_file(self, file, skills_file=None, custom_regex=None):
     if os.path.exists(file):
         print_cyan('Extracting data from: {}'.format(file))
         resume_parser = ResumeParser(file, skills_file, custom_regex)
         return [resume_parser.get_extracted_data()]
     else:
         print('File not found. Please provide a valid file name')
         sys.exit(1)

示例#5

0

显示文件

 def __extract_from_remote_file(self, remote_file):
     try:
         print_cyan('Extracting data from: {}'.format(remote_file))
         req = Request(remote_file, headers={'User-Agent': 'Mozilla/5.0'})
         webpage = urlopen(req).read()
         _file = io.BytesIO(webpage)
         _file.name = remote_file.split('/')[-1]
         resume_parser = ResumeParser(_file)
         return [resume_parser.get_extracted_data()]
     except urllib.error.HTTPError:
         return 'File not found. Please provide correct URL for resume file.'

示例#6

0

显示文件

文件： test_name.py 项目： zseda/pyresparser

def get_remote_data():
    try:
        remote_file = 'https://www.omkarpathak.in/downloads/OmkarResume.pdf'
        print('Extracting data from: {}'.format(remote_file))
        req = Request(remote_file, headers={'User-Agent': 'Mozilla/5.0'})
        webpage = urlopen(req).read()
        _file = io.BytesIO(webpage)
        _file.name = remote_file.split('/')[-1]
        resume_parser = ResumeParser(_file)
        return [resume_parser.get_extracted_data()]
    except urllib.error.HTTPError:
        return 'File not found. Please provide correct URL for resume file.'

示例#7

0

显示文件

文件： app.py 项目： elliott-king/hostile-board-parser

def extract_resume(url, skills_file=None, custom_regex=None):
    try:
        req = urllib.request.Request(url,
                                     headers={'User-Agent': 'Mozilla/5.0'})
        webpage = urllib.request.urlopen(req).read()
        _file = io.BytesIO(webpage)
        _file.name = 'test.pdf'  # the name is only really needed to identify the extension
        resume_parser = ResumeParser(_file, skills_file, custom_regex)
        return [resume_parser.get_extracted_data()]
    except urllib.error.HTTPError:
        s = 'File not found. Please provide correct URL for resume file.'
        print(s)
        raise NameError(s)

示例#8

0

显示文件

文件： views.py 项目： mkulsha/resume_parser

def homepage(request):
    if request.method == 'POST':
        Resume.objects.all().delete()
        file_form = UploadResumeModelForm(request.POST, request.FILES)
        files = request.FILES.getlist('resume')
        resumes_data = []
        if file_form.is_valid():
            for file in files:
                try:
                    # saving the file
                    resume = Resume(resume=file)
                    resume.save()

                    # extracting resume entities
                    parser = ResumeParser(
                        os.path.join(settings.MEDIA_ROOT, resume.resume.name))
                    data = parser.get_extracted_data()
                    resumes_data.append(data)
                    resume.name = data.get('name')
                    resume.email = data.get('email')
                    resume.mobile_number = data.get('mobile_number')
                    if data.get('degree') is not None:
                        resume.education = ', '.join(data.get('degree'))
                    else:
                        resume.education = None
                    resume.company_names = data.get('company_names')
                    resume.college_name = data.get('college_name')
                    resume.designation = data.get('designation')
                    resume.total_experience = data.get('total_experience')
                    if data.get('skills') is not None:
                        resume.skills = ', '.join(data.get('skills'))
                    else:
                        resume.skills = None
                    if data.get('experience') is not None:
                        resume.experience = ', '.join(data.get('experience'))
                    else:
                        resume.experience = None
                    resume.save()
                except IntegrityError:
                    messages.warning(request, 'Duplicate resume found:',
                                     file.name)
                    return redirect('homepage')
            resumes = Resume.objects.all()
            messages.success(request, 'Resumes uploaded!')
            context = {
                'resumes': resumes,
            }
            return render(request, 'base.html', context)
    else:
        form = UploadResumeModelForm()
    return render(request, 'base.html', {'form': form})

示例#9

0

显示文件

def homepage(request):
    if request.method == 'POST':
        # pass
        # Resume.objects.all().delete()
        file_form = UploadResumeModelForm(request.POST, request.FILES)
        files = request.FILES.getlist('resume')
        resumes_data = []
        if file_form.is_valid():
            for file in files:
                try:
                    # saving the file
                    resume = Resume(resume=file)

                    fileExtension = (str(file)).split(".")[-1]
                    if fileExtension != "pdf" and fileExtension != "doc" and fileExtension != "docx":
                        messages.warning(
                            request,
                            'Please provide .pdf or .doc or .docx resume', '')
                        return redirect('homepage')
                    resume.save()

                    filePath = os.path.join(settings.MEDIA_ROOT,
                                            resume.resume.name)
                    # extracting resume entities
                    parser = ResumeParser(os.path.join(filePath))
                    data = parser.get_extracted_data()
                    resumes_data.append(data)
                    resume.name = data.get('name')
                    resume.doctype = (str(file)).split(".")[-1]
                    resume.email = data.get('email')
                    if resume.email is None:
                        resume.email = ''
                    # resume.mobile_number      = data.get('mobile_number')
                    # if data.get('degree') is not None:
                    #     resume.education      = ', '.join(data.get('degree'))
                    # else:
                    #     resume.education      = None
                    # resume.company_names      = data.get('company_names')
                    # resume.college_name       = data.get('college_name')
                    # resume.designation        = data.get('designation')
                    # resume.total_experience   = data.get('total_experience')
                    # if data.get('skills') is not None:
                    #     resume.skills         = ', '.join(data.get('skills'))
                    # else:
                    #     resume.skills         = None
                    # if data.get('experience') is not None:
                    #     resume.experience     = ', '.join(data.get('experience'))
                    # else:
                    #     resume.experience     = None
                    if resume.doctype == "pdf":
                        if sys.platform == 'linux':
                            cmd = "python3.8 " + settings.BASE_DIR + "/mypdf2txt.py -o r1.xml " + os.path.join(
                                filePath)
                        elif sys.platform == 'win32':
                            cmd = "python " + settings.BASE_DIR + "/mypdf2txt.py -o r1.xml " + filePath
                        else:
                            messages.warning(
                                request,
                                'Compatible with Linux and Windows only', '')
                            return redirect('homepage')
                        os.system(cmd)
                        fonts, imgCount = xmlParser('r1.xml')
                        resume.fonts = fonts
                        resume.imgCount = imgCount
                        resume.linkedin, resume.mobile_number = get_text_info(
                            filePath)
                        # os.system('rm r1.xml')
                        resume.textCount = textCount(filePath)
                        resume.tableCount = pdfTableCount(filePath)
                    else:
                        resume.fonts, resume.tableCount, resume.imgCount = docxXmlParser(
                            filePath)
                        resume.linkedin, resume.mobile_number = getText(
                            filePath)
                    resume.save()
                except IntegrityError as e:
                    print(e)
                    messages.warning(request, 'Duplicate resume found:',
                                     file.name)
                    return redirect('homepage')
            resumes = Resume.objects.all()
            messages.success(request, 'Resumes uploaded!')
            context = {
                'resumes': resumes,
            }
            return render(request, 'base.html', context)
    else:
        form = UploadResumeModelForm()
    return render(request, 'base.html', {'form': form})

示例#10

0

显示文件

文件： views.py 项目： pxian/careercare

def uploadresume(request):
    if request.method == "POST":
        form = ResumeForm(request.POST, request.FILES)
        if form.is_valid():
            files = request.FILES.getlist("resume")
            for file in files:
                try:
                    resume = Resume(resume=file)
                    resume.user = request.user
                    resume.save()
                    parser = ResumeParser(
                        os.path.join(settings.MEDIA_ROOT, resume.resume.name))
                    data = parser.get_extracted_data()
                    resume.name = data.get("name")
                    if data.get("email") is not None:
                        resume.email = data.get("email")
                    else:
                        resume.email = None
                        messages.warning(
                            request, "Email is not found in " + file.name +
                            ", please review resume and add in the required information."
                        )
                    if data.get("mobile_number") is not None:
                        resume.mobile_number = data.get("mobile_number")
                    else:
                        resume.mobile_number = None
                        messages.warning(
                            request,
                            "Mobile number is not found in " + file.name +
                            ", please review resume and add in the required information."
                        )
                    if data.get("experience") is not None:
                        resume.experience = '\n '.join(data.get("experience"))
                    else:
                        resume.experience = None
                    if data.get("education") is not None:
                        resume.education = '\n '.join(data.get("education"))
                    else:
                        resume.education = None
                    if data.get("skills") is not None:
                        resume.skills = '\n '.join(data.get("skills"))
                    else:
                        resume.skills = None
                    if data.get("designation") is not None:
                        resume.designation = '\n '.join(
                            data.get("designation"))
                    else:
                        resume.designation = None
                    if data.get("company_names") is not None:
                        resume.company_name = '\n '.join(
                            data.get("company_names"))
                    else:
                        resume.company_name = None

                    text = file.open()
                    if resume.extension() == "docx":
                        text = docx2txt.process(text)
                        text = str(text)
                    elif resume.extension() == "pdf":
                        content = ""
                        with pdfplumber.open(text) as pdf:
                            for page in pdf.pages:
                                text = page.extract_text()
                                content += text
                            text = str(content)
                    resume.summary = summarize(str(text), ratio=0.01)
                    resume.save()
                    messages.add_message(request, messages.INFO,
                                         "Resume(s) uploaded successfully.")
                except ValidationError:
                    messages.warning(request,
                                     "Duplicate resume found: " + file.name)
                except IntegrityError:
                    messages.warning(
                        request,
                        "Encountered a problem with the file, please try again."
                    )
            return redirect("resumeanalyzer:resumelist")
        else:
            form = ResumeForm()
            messages.warning(request, "Please select file.")
    else:
        form = ResumeForm()
    return render(request, "uploadresume.html", {"form": form})

示例#11

0

显示文件

def resume_result_wrapper(resume):
    print_cyan('Extracting data from: {}'.format(resume))
    parser = ResumeParser(resume)
    return parser.get_extracted_data()

示例#12

0

显示文件

文件： run.py 项目： jacksonx9/pyresparser

def get_init_data():
    resume_text = "We are Signify, the new company name of Philips Lighting.\nWe’re the world leader in lighting for professionals, consumers and for the Internet of Things. Our passion for sustainability makes us one of the top 10 greenest companies in the world.\nSee #SignifyLife through the eyes of our employees! \nWorking as an Applied AI/ML Scientist at Signify is dynamic. You‘ll be responsible to innovate new breakthrough solutions based on AI/ML. We’re greater together through peer learning in our globally diverse team, with different backgrounds and nationalities.\nWe’re looking for an Applied AI/ML Scientist who’ll be responsible for developing and applying new concepts of machine learning to data. You’ll be part of the Signify Research Team located at the heart of Cambridge, MA; a center of excellence on Artificial Intelligence and Machine learning Innovation for Signify. The team works together with the Signify Global Research, businesses and functions on AI/ML. Its responsibilities are at the heart of driving AI/M innovations for systems, new IoT propositions, and innovative apps. If you are someone who always comes up with great ideas and ready to use your creative mind to solve practical problems, this role may be for you. You will learn and apply new cutting-edge techniques that will be rewarding for your career. With a great freedom to innovate, you will play an active role and grow yourself on AI and ML.\nWhat you’ll do\nApply your AI/ML knowledge to develop innovative solutions and build first-of-a-kind proof-of-concept demonstrators using data coming from IoT devices, connected lighting systems and other data sources.\nActively scout, keep track of, evaluate, and leverage disruptive technologies, and emerging industrial, academic, technological and socioeconomic trends.\nTransfer technology and share insights and best practices across innovation teams.\nGenerate intellectual property for the company.\nWe look at you to partner with us in building breakthrough AI/ML enabled solutions and apply modern AI techniques to create value from the data. These AI enabled solutions go beyond lighting; we are exploring new applications that leverage the ubiquitous connected nodes of lighting in buildings, homes, cities, retail, industrial facilities, and other external data sources.\nWhat you’ll need\nPhD degree in Computer Science, Electrical Engineering or related fields, MS with strong relevant school or work experience can also be accepted.\nIn-depth technical knowledge of AI, deep learning, and machine learning algorithms with proven experience, including strong knowledge of the mathematical underpinnings behind these various methods; Computer vision/Deep Learning architectures\nStrong fundamental knowledge of statistical data processing, regression techniques, neural networks, decision trees, clustering, pattern recognition, probability theory, stochastic systems, Bayesian inference, statistical techniques and dimensionality reduction, including mastering the mathematical underpinnings of these methods\nApplying deep learning models in large-scale AI systems\nKnowledge of Natural Language Processing is a strong plus.\nStrong Programming experience with Java, Python, TensorFlow or other related tools\nStrong experience with opensource technologies to accelerate innovation\nStrong interpersonal, communication, and presentation skills as well as ability to work in a global team\nSkills and experience in some of the following areas would be a plus:\nExperience with distributed software systems, computer networking and big data analytics infrastructure\nProficiency in software API design and access control to enable new services and applications\nExperience in data engineering and visualization\nUnderstanding of IoT frameworks and edge computing\nPrior agile/Scrum experience\nWhat you’ll get in return…\nYou will have a unique opportunity to make a difference by innovating new IoT applications, data and analytics enabled systems and services for the Philips Lighting business. You will have a great opportunity to create innovations and technical breakthroughs with major potential business impact, drive them to business successes, protected by relevant IP. You will conduct end-to-end innovation with the business, markets and end-customers by leveraging global teams, and the US entrepreneurial and innovative ecosystem. You will have ample opportunities to partner with world class universities. You will be encouraged to publish and secure your ideas with patents\nWhat we promise\nWe’re committed to the continuous development of our employees, using our learning to shape the future of light and create a sustainable future. Join the undisputed leader in the lighting industry and be part of our diverse global team. #WeAreSignify #SignifyLife";
    resume_parser = ResumeParser(resume_text)
    return [resume_parser.get_extracted_data()]

示例#13

0

显示文件

文件： parser.py 项目： mananarora200/Resume-Parser

def parseDocument(filePath):
    parse = ResumeParser(filePath)
    data = parse.get_extracted_data()
    fileObject = open(f"output/{data['name']}.json", "w")
    json.dump(data, fileObject)