Пример #1
0
def main():
    sys.path.append(os.path.join(os.path.dirname('__file__'), '..'))

    from keras_en_parser_and_analyzer.library.rule_based_parser import ResumeParser
    from keras_en_parser_and_analyzer.library.utility.io_utils import read_pdf_and_docx

    current_dir = os.path.dirname('__file__')
    current_dir = current_dir if current_dir is not '' else '.'
    data_dir_path = current_dir + '/demo/data_' # directory to scan for any pdf and docx files

    collected = read_pdf_and_docx(data_dir_path, command_logging=True)
    for file_path, file_content in collected.items():

        print('parsing file: ', file_path)

        parser = ResumeParser()
        parser.parse(file_content)
        print(parser.raw) # print out the raw contents extracted from pdf or docx files

        if parser.unknown is False:
            print(parser.summary())

        print('++++++++++++++++++++++++++++++++++++++++++')

    print('count: ', len(collected))
def main():
    current_dir = os.path.dirname(__file__)
    current_dir = current_dir if current_dir is not '' else '.'

    data_dir_path = current_dir + '/demo/data'  # directory to scan for any pdf files
    training_data_dir_path = current_dir + '/demo/data/training_data'
    collected = read_pdf_and_docx(data_dir_path, command_logging=True, callback=lambda index, file_path, file_content: {
        gui_annotate(training_data_dir_path, index, file_path, file_content)
    })
    print('count: ', len(collected))
Пример #3
0
def main():
    sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

    from keras_en_parser_and_analyzer.library.dl_based_parser import ResumeParser
    from keras_en_parser_and_analyzer.library.utility.io_utils import read_pdf_and_docx

    current_dir = os.path.dirname(__file__)
    current_dir = current_dir if current_dir is not '' else '.'
    data_dir_path = current_dir + '/data/resume_samples' # directory to scan for any pdf and docx files

    def parse_resume(file_path, file_content):
        print('parsing file: ', file_path)

        parser = ResumeParser()
        parser.load_model(current_dir + '/models')
        parser.parse(file_content)
        # print(parser.raw.encode("utf-8"))  # print out the raw contents extracted from pdf or docx files
        
        if parser.unknown is False:
            summary, expertise, experience, knowledge, project = parser.summary()
            # print(parser.summary().encode("utf-8"))
            print(summary.encode('utf-8'))
            vocabulary = []
            for i in experience:
                vocabulary.append(i.encode("utf-8"))
            print(vocabulary)
            # prefinal_vocabulary = []
            # for i in vocabulary:
            #     try:
            #         prefinal_vocabulary.append(i.decode("utf-8"))
            #     except:
            #         prefinal_vocabulary.append(str(i,"utf-8"))
            # print(prefinal_vocabulary[9])
            final_vocabulary = []
            # for i in prefinal_vocabulary:
            #     print(i)
                # try:
                #     j = i.split(',')
                # except:
                #     pass
                # print(j)
        print('++++++++++++++++++++++++++++++++++++++++++')

    collected = read_pdf_and_docx(data_dir_path, command_logging=True, callback=lambda index, file_path, file_content: {
        parse_resume(file_path, file_content)
    })
    print('count: ', len(collected))
Пример #4
0
def main():
    data_dir_path = './data/resume_samples' # directory to scan for any pdf and docx files
    collected = read_pdf_and_docx(data_dir_path)
    for file_path, file_content in collected.items():

        print('parsing file: ', file_path)

        parser = ResumeParser()
        parser.parse(file_content)
        print(parser.raw) # print out the raw contents extracted from pdf or docx files

        if parser.unknown is False:
            print(parser.summary())

        print('++++++++++++++++++++++++++++++++++++++++++')

    print('count: ', len(collected))