示例#1
0
def main_perdict(config_file):
    with open(config_file, 'r') as f:
        config = json.load(f)

    with open(config['model_filename'], 'r') as f:
        model = pickle.load(f)

    logging.info("Loaded model " + config['model_filename'])

    logging.info("Extracting features")
    predictCaseversions = loadLocalCaseversions(config['perdictLocalJson'])
    comb_it = genAllCombinations(predictCaseversions)
    # TODO: extract the slice size to config

    labels = ['dup', 'merge', 'none']  #TODO:Move to config
    # TODO: this does not align with the fit function
    le = LabelEncoder()
    le.fit(labels)
    slice_num = 1
    for combinations in getCombinationSlice(config['slice_size'],
                                            comb_it,
                                            step=config.get('sample_step', 1)):
        vectorized_features = extractFeatures(predictCaseversions,
                                              combinations)
        logging.info("Making perdictions")
        perdictions = perdict(vectorized_features,
                              model)  # This can be interrupted by Ctrl+C

        answer = {'ids': combinations, 'perdictions': perdictions}

        logging.info("preparing data for saving to file")
        answer['perdictions'] = le.inverse_transform(
            answer['perdictions']).tolist()
        logging.info("saving to file")
        rawJson = "{perdiction_filename}_{slice_num}.raw.json".format(
            perdiction_filename=config['perdiction_filename'],
            slice_num=str(slice_num))
        with open(rawJson, 'w') as f:
            json.dump(answer, f, indent=2)
        logging.info(rawJson + " created")

        outputCsv = output.formatResultCsv(answer)

        csv_filename = "{perdiction_filename}_{slice_num}.csv".format(
            perdiction_filename=config['perdiction_filename'],
            slice_num=str(slice_num))
        with open(csv_filename, 'w') as f:
            f.writelines(outputCsv)
        logging.info(csv_filename + " created")

        slice_num += 1
示例#2
0
def main_perdict(config_file):
    with open(config_file, 'r') as f:
        config = json.load(f)

    with open(config['model_filename'], 'r') as f:
        model = pickle.load(f)

    logging.info("Loaded model " + config['model_filename'])

    logging.info("Extracting features")
    predictCaseversions = loadLocalCaseversions(config['perdictLocalJson'])
    comb_it = genAllCombinations(predictCaseversions)
    # TODO: extract the slice size to config

    labels = ['dup', 'merge', 'none'] #TODO:Move to config
    # TODO: this does not align with the fit function
    le = LabelEncoder()
    le.fit(labels)
    slice_num = 1
    for combinations in getCombinationSlice(config['slice_size'],comb_it, step=config.get('sample_step', 1)):
        vectorized_features = extractFeatures(predictCaseversions, combinations)
        logging.info("Making perdictions")
        perdictions = perdict(vectorized_features, model) # This can be interrupted by Ctrl+C

        answer = {'ids': combinations, 'perdictions': perdictions}

        logging.info("preparing data for saving to file")
        answer['perdictions'] = le.inverse_transform(answer['perdictions']).tolist()
        logging.info("saving to file")
        rawJson = "{perdiction_filename}_{slice_num}.raw.json".format(
            perdiction_filename = config['perdiction_filename'],
            slice_num = str(slice_num)
        )
        with open(rawJson, 'w') as f:
            json.dump(answer, f, indent=2)
        logging.info(rawJson+ " created")

        outputCsv = output.formatResultCsv(answer)

        csv_filename = "{perdiction_filename}_{slice_num}.csv".format(
            perdiction_filename = config['perdiction_filename'],
            slice_num = str(slice_num)
        )
        with open(csv_filename, 'w') as f:
            f.writelines(outputCsv)
        logging.info(csv_filename+ " created")

        slice_num += 1
def test_formatResultCsv():
    inputResult = {
        "perdictions": [
            "merge",
            "dup",
            "none",
        ],
        "ids": [
            {
                "lhs_id": 210204,
                "rhs_id": 210202
            },
            {
                "lhs_id": 210205,
                "rhs_id": 210202
            },
            {
                "lhs_id": 210204,
                "rhs_id": 210208
            },
        ]
    }


    expected = [
        "Dup?,Merge?,Reason,Merged in Moztrap?,Case ID 1,Case ID 2,Case 1 ,Case 2,Diff,CaseVersion ID 1,CaseVersion ID 2\n",
        "No,Yes,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210204&rhs=210202,210204,210202\n",
        "Yes,No,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210205&rhs=210202,210205,210202\n",
        "No,No,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210204&rhs=210208,210204,210208\n"
    ]
    #expected = [
    #"Dup?,Merge?,Reason,Merged in Moztrap?,Case ID 1,Case ID 2,Case 1 ,Case 2,Diff,CaseVersion ID 1,CaseVersion ID 2",
    #"No,Yes,,,706,707,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=707,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210202,210204,210202",
    #"Yes,No,,,706,1696,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=1696,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210521,210205,210202"
    #"No,No,,,706,1696,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=1696,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210521,210204,210208"
    #]
    assert(expected == output.formatResultCsv(inputResult))
def test_formatResultCsv():
    inputResult = {
        "perdictions": [
            "merge",
            "dup",
            "none",
        ],
        "ids": [
            {
                "lhs_id": 210204,
                "rhs_id": 210202
            },
            {
                "lhs_id": 210205,
                "rhs_id": 210202
            },
            {
                "lhs_id": 210204,
                "rhs_id": 210208
            },
        ]
    }

    expected = [
        "Dup?,Merge?,Reason,Merged in Moztrap?,Case ID 1,Case ID 2,Case 1 ,Case 2,Diff,CaseVersion ID 1,CaseVersion ID 2\n",
        "No,Yes,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210204&rhs=210202,210204,210202\n",
        "Yes,No,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210205&rhs=210202,210205,210202\n",
        "No,No,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210204&rhs=210208,210204,210208\n"
    ]
    #expected = [
    #"Dup?,Merge?,Reason,Merged in Moztrap?,Case ID 1,Case ID 2,Case 1 ,Case 2,Diff,CaseVersion ID 1,CaseVersion ID 2",
    #"No,Yes,,,706,707,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=707,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210202,210204,210202",
    #"Yes,No,,,706,1696,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=1696,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210521,210205,210202"
    #"No,No,,,706,1696,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=1696,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210521,210204,210208"
    #]
    assert (expected == output.formatResultCsv(inputResult))