def extract_from_date(from_date_string, to_date_string):
    current_date = format_date(from_date_string)
    d, m, y = utils.formatted_date(current_date)
    if y == 100000:
        current_date = format_date(to_date_string)
        d, m, y = utils.formatted_date(current_date)
    elif not y:
        [issue_flag, date_diff] = utils.date_difference(from_date_string,
                                                        to_date_string)
        if not issue_flag:
            last_date = format_date(to_date_string)
            ld, lm, ly = utils.formatted_date(last_date)
            if ly and ly != 100000:
                to_date = ld + lm * 30 + ly * 365 - date_diff
                d, m, y = int((to_date % 365) % 12), int(
                    (to_date % 365) / 12), int(to_date / 365)
    return "{},{},{}".format(d, m, y)
예제 #2
0
def compute_median_career_time(db_utils,
                               work_meta_info,
                               min_conf=0,
                               soc_level=configurator.commons.SOC_LEVEL):
    ''' Creating an array of work ex index attributes '''
    from_index = 1
    to_index = 15
    title_durations = {}
    resume_count = 0
    ''' Fetching a cursor over all non-duplicate resumes '''
    resumes = db_utils.fetch_data("resume", "cursor", {"Duplicate": False})
    ''' Iterating over the cursor to find the transition times for each title '''
    for resume in resumes:

        resume_count += 1
        if resume_count % 10000 == 0:
            print "Processed {} resumes".format(resume_count)
        career_start_date = None
        for index in list(reversed(range(from_index - 1, to_index))):
            ''' Check whether the given work experience is a valid one, in case it is we need to do further processing otherwise not '''
            current_work_ex_meta_info = work_meta_info[index]
            if is_valid_work_experience(resume, current_work_ex_meta_info,
                                        min_conf):
                ''' This is a valid work experience, we should add the title and the time taken by this person to reach this title '''
                current_date = extract_from_date(
                    resume[current_work_ex_meta_info['from']],
                    resume[current_work_ex_meta_info['to']])
                cy = [int(x) for x in current_date.split(',')][2]
                if cy != 100000 and cy:
                    ''' From Date is valid '''
                    ''' Check whether this is the start point in this career '''
                    if not career_start_date:
                        ''' We have found the career start date '''
                        career_start_date = current_date
                    title_tuple = (
                        resume[current_work_ex_meta_info['title']],
                        resume[current_work_ex_meta_info['soc']][:soc_level])
                    if title_tuple not in title_durations:
                        title_durations[title_tuple] = []
                    [issue_flag, duration_to_reach
                     ] = utils.date_difference(career_start_date, current_date)
                    if not issue_flag:
                        title_durations[title_tuple].append(duration_to_reach)
    return title_durations
예제 #3
0
def create_major_titles_map(work_meta_info, end_title_limit, start_title_limit,
                            majors, degrees):
    major_field = "latest_ed_major"

    degree_field = "latest_ed_degree"

    latest_ed_from = "latest_ed_from"

    latest_ed_to = "latest_ed_to"

    resumes = dbutils.fetch_data('resume', 'cursor', {
        'latest_ed_major': {
            '$in': majors
        },
        'latest_ed_degree': {
            '$in': degrees
        }
    })

    major_title_map = {}

    resume_count = 0

    for resume in resumes:
        degree = norm_degree(resume[degree_field])
        date_string = format_date(resume[latest_ed_from])
        d, m, y = utils.formatted_date(date_string)
        is_valid_date_flag = True
        if y == 100000 or not y:
            is_valid_date_flag = False
        if not is_valid_date_flag:
            date_string = format_date(resume[latest_ed_to])
            d, m, y = utils.formatted_date(date_string)
            is_valid_date_flag = True
            if y == 100000 or not y:
                is_valid_date_flag = False

        if resume[major_field] and degree and is_valid_date_flag:
            if resume[major_field] not in major_title_map:
                major_title_map[resume[major_field]] = {}
            if degree not in major_title_map[resume[major_field]]:
                major_title_map[resume[major_field]][degree] = {}
                major_title_map[
                    resume[major_field]][degree]["valid_resumes"] = 0
                major_title_map[resume[major_field]][degree]["title_dict"] = {}
            major_title_map[resume[major_field]][degree]["valid_resumes"] += 1
            total_exp = 0
            for work_info_obj in work_meta_info:
                if is_valid_work_info(resume, work_info_obj):
                    date_string = format_date(resume[work_info_obj['to']])
                    wtd, wtm, wty = utils.formatted_date(date_string)
                    is_valid_date_flag = True
                    to_years = 0
                    from_years = 0
                    if wty == 100000 or not wty:
                        is_valid_date_flag = False
                    else:
                        to_years += (wtd + wtm * 30 + wty * 12 * 30)
                    wfy = 0
                    if not is_valid_date_flag:
                        date_string = format_date(
                            resume[work_info_obj['from']])
                        wfd, wfm, wfy = utils.formatted_date(date_string)
                        is_valid_date_flag = True
                        if wfy == 100000 or not wfy:
                            is_valid_date_flag = False
                        else:
                            from_years += (wfd + wfm * 30 + wfy * 12 * 30)

                    if wfy == 100000 or wty == 100000 or (
                            is_valid_date_flag and
                        (to_years > (d + m * 30 + y * 12 * 30) or
                         (not to_years and from_years >=
                          (d + m * 30 + y * 12 * 30)))):
                        [issue_flag, years_of_exp] = utils.date_difference(
                            resume[work_info_obj["from"]],
                            resume[work_info_obj["to"]])
                        if not issue_flag:
                            if resume[work_info_obj[
                                    "title"]] not in major_title_map[resume[
                                        major_field]][degree]["title_dict"]:
                                major_title_map[
                                    resume[major_field]][degree]["title_dict"][
                                        resume[work_info_obj["title"]]] = {}
                            exp = int(total_exp)
                            if exp not in major_title_map[
                                    resume[major_field]][degree]["title_dict"][
                                        resume[work_info_obj["title"]]]:
                                major_title_map[resume[major_field]][degree][
                                    "title_dict"][resume[
                                        work_info_obj["title"]]][exp] = 0
                            major_title_map[
                                resume[major_field]][degree]["title_dict"][
                                    resume[work_info_obj["title"]]][exp] += 1
                            total_exp += years_of_exp
        resume_count += 1
        if resume_count % 10000 == 0:
            print "{} Resumes Processed".format(resume_count)
    print "{} Resumes Processed".format(resume_count)
    return major_title_map
def create_edges_from_resume(resume, work_meta_info, min_conf, edges, nodes,
                             major):
    is_valid_date_flag = True
    [d, m, y] = [
        int(x)
        for x in extract_from_date(resume["latest_ed_from"], resume[
            "latest_ed_to"]).split(',')
    ]
    if y == 100000 or not y:
        is_valid_date_flag = False

    if is_valid_date_flag:
        valid_index = False
        path = []
        title_time_map = {}
        for index in reversed(range(1, 15)):
            current_work_ex_meta_info = work_meta_info[index]
            next_work_ex_meta_info = work_meta_info[index - 1]
            if (current_work_ex_meta_info["title"] in resume and
                    resume[current_work_ex_meta_info["title"]] and
                    current_work_ex_meta_info["match"] in resume and
                    resume[current_work_ex_meta_info["match"]] and
                    current_work_ex_meta_info["confidence"] in resume and
                    resume[current_work_ex_meta_info["confidence"]] >= min_conf
                    and next_work_ex_meta_info["title"] in resume and
                    resume[next_work_ex_meta_info["title"]] and
                    next_work_ex_meta_info["match"] in resume and
                    resume[next_work_ex_meta_info["match"]] and
                    next_work_ex_meta_info["confidence"] in resume and
                    resume[next_work_ex_meta_info["confidence"]] >= min_conf
                    and resume[current_work_ex_meta_info["title"]] !=
                    resume[next_work_ex_meta_info["title"]]):
                ''' There is a valid edge present from the work experience at index to the one 
                at (index-1) '''
                current_date = extract_from_date(
                    resume[current_work_ex_meta_info['from']],
                    resume[current_work_ex_meta_info['to']])
                [cd, cm, cy] = [int(x) for x in current_date.split(',')]
                if valid_index or (d + m * 30 + y * 365) <= (
                        cd + cm * 30 + cy * 365):
                    from_title = resume[current_work_ex_meta_info["title"]]
                    to_title = resume[next_work_ex_meta_info["title"]]
                    if not valid_index:
                        valid_index = True
                        path.append(from_title)
                        title_time_map[from_title] = current_date
                    path.append(to_title)
                    edge = (from_title, to_title)
                    if edge not in edges:
                        edges[edge] = {}
                        edges[edge]['count'] = 0
                        edges[edge]['time_intervals'] = []
                    if from_title not in nodes:
                        nodes.add(from_title)
                    if to_title not in nodes:
                        nodes.add(to_title)
                    edges[edge]['count'] += 1
                    next_date = extract_from_date(
                        resume[next_work_ex_meta_info['from']],
                        resume[next_work_ex_meta_info['to']])
                    title_time_map[to_title] = next_date
                    [issue_flag, date_diff] = utils.date_difference(
                        current_date, next_date)
                    if not issue_flag:
                        edges[edge]['time_intervals'].append(date_diff)
        for i, node1 in enumerate(path):
            current_date = title_time_map[
                node1] if node1 in title_time_map else ''
            for j, node2 in enumerate(path[i + 2:]):
                edge = (node1, node2)
                if edge not in edges:
                    edges[edge] = {}
                    edges[edge]['count'] = 0
                    edges[edge]['time_intervals'] = []
                edges[edge]['count'] += 1 / (j + 2)
                next_date = title_time_map[
                    node2] if node2 in title_time_map else ''
                if current_date and next_date:
                    [issue_flag, date_diff] = utils.date_difference(
                        current_date, next_date)
                    if not issue_flag:
                        edges[edge]['time_intervals'].append(date_diff)
예제 #5
0
    def run(self):
        home_dir = pathlib.Path.home()
        desktop = os.path.expanduser('~/Desktop/output.csv')
        filename = qtw.QFileDialog.getSaveFileName(self, 'Save File', desktop)

        n_rows = self.ui.rate_changes_table.rowCount()
        rate_change_list = []
        previous_date = self.ui.date_input.date().toPyDate()
        rate_value = float(self.ui.initial_interest_input.text()) / (100 * 12)
        total_months = 0
        for i in range(n_rows):
            change_date = self.ui.rate_changes_table.item(i, 0)
            change_rate = self.ui.rate_changes_table.item(i, 1)

            if change_date is not None and change_rate is not None:
                change_date_value = datetime.strptime(change_date.text(), '%d/%m/%Y')
                n_months = date_difference(change_date_value, previous_date)
                total_months += n_months
                change_rate_value = float(change_rate.text()) / (100 * 12)
                if not n_months > 0:
                    qtw.QMessageBox.critical(
                        self, 'Warning', 'Please ensure dates are in the correct order.'
                    )
                    return
                rate_change_list.append((n_months, rate_value))
                rate_value = change_rate_value 
                previous_date = change_date_value

        rate_change_list.append((0, rate_value))
        if not filename[0]:
            return

        try:
            mortgage_value = int(self.ui.mortgage_input.text())
            interest_rate = float(self.ui.initial_interest_input.text()) / (100 * 12)
            term_length_base = int(self.ui.base_full_term_input.text())
            term_length_second = int(self.ui.second_scen_full_term_input.text())
            if self.ui.interest_type_box.currentText() == 'PIO':
                IO_term_length_base = int(self.ui.base_IIOT_input.text())
                IO_term_length_second = int(self.ui.second_scen_IIOT_input.text())

        except ValueError:
            qtw.QMessageBox.critical(
                self, 'Warning', 'Please ensure all information is put in correctly'
            )
            return

        if self.ui.interest_type_box.currentText() == 'ANN':
            interest_paid_base = annuity_total(mortgage_value, interest_rate, term_length_base, rate_change_list) - mortgage_value
            interest_paid_second = annuity_total(mortgage_value, interest_rate, term_length_second, rate_change_list) - mortgage_value
        elif self.ui.interest_type_box.currentText() == 'IOM':
            interest_paid_base = interest_only_total(mortgage_value, interest_rate, term_length_base, rate_change_list) - mortgage_value
            interest_paid_second = interest_only_total(mortgage_value, interest_rate, term_length_second, rate_change_list) - mortgage_value
        elif self.ui.interest_type_box.currentText() == 'PIO':
            interest_paid_base = partial_interest_only_total(mortgage_value, interest_rate, term_length_base, IO_term_length_base, rate_change_list) - mortgage_value
            interest_paid_second = partial_interest_only_total(mortgage_value, interest_rate, term_length_second, IO_term_length_second, rate_change_list) - mortgage_value
        base_minus_second = interest_paid_base - interest_paid_second
        labels = [
            'Mortgage Value',
            'Initial Interest Rate',
            'Base Term Length',
            'Second Scenario Term Length',
            'Interest Paid Base',
            'Interest Paid Second',
            'Difference'
        ]
        data = [
            mortgage_value,
            100 * 12 * interest_rate,
            term_length_base,
            term_length_second,
            interest_paid_base,
            interest_paid_second,
            base_minus_second,
        ]
        new_rate_list = [(self.ui.date_input.date().toString('dd/MM/yyyy'), 100 * 12 * interest_rate)]
        for i in range(n_rows):
            change_date = self.ui.rate_changes_table.item(i, 0)
            change_rate = self.ui.rate_changes_table.item(i, 1)
            if change_date is not None and change_rate is not None:
                new_rate_list.append((change_date.text(), change_rate.text()))

        write_to_file(filename[0], labels, data, new_rate_list)