def main_perdict(config_file): with open(config_file, 'r') as f: config = json.load(f) with open(config['model_filename'], 'r') as f: model = pickle.load(f) logging.info("Loaded model " + config['model_filename']) logging.info("Extracting features") predictCaseversions = loadLocalCaseversions(config['perdictLocalJson']) comb_it = genAllCombinations(predictCaseversions) # TODO: extract the slice size to config labels = ['dup', 'merge', 'none'] #TODO:Move to config # TODO: this does not align with the fit function le = LabelEncoder() le.fit(labels) slice_num = 1 for combinations in getCombinationSlice(config['slice_size'], comb_it, step=config.get('sample_step', 1)): vectorized_features = extractFeatures(predictCaseversions, combinations) logging.info("Making perdictions") perdictions = perdict(vectorized_features, model) # This can be interrupted by Ctrl+C answer = {'ids': combinations, 'perdictions': perdictions} logging.info("preparing data for saving to file") answer['perdictions'] = le.inverse_transform( answer['perdictions']).tolist() logging.info("saving to file") rawJson = "{perdiction_filename}_{slice_num}.raw.json".format( perdiction_filename=config['perdiction_filename'], slice_num=str(slice_num)) with open(rawJson, 'w') as f: json.dump(answer, f, indent=2) logging.info(rawJson + " created") outputCsv = output.formatResultCsv(answer) csv_filename = "{perdiction_filename}_{slice_num}.csv".format( perdiction_filename=config['perdiction_filename'], slice_num=str(slice_num)) with open(csv_filename, 'w') as f: f.writelines(outputCsv) logging.info(csv_filename + " created") slice_num += 1
def main_perdict(config_file): with open(config_file, 'r') as f: config = json.load(f) with open(config['model_filename'], 'r') as f: model = pickle.load(f) logging.info("Loaded model " + config['model_filename']) logging.info("Extracting features") predictCaseversions = loadLocalCaseversions(config['perdictLocalJson']) comb_it = genAllCombinations(predictCaseversions) # TODO: extract the slice size to config labels = ['dup', 'merge', 'none'] #TODO:Move to config # TODO: this does not align with the fit function le = LabelEncoder() le.fit(labels) slice_num = 1 for combinations in getCombinationSlice(config['slice_size'],comb_it, step=config.get('sample_step', 1)): vectorized_features = extractFeatures(predictCaseversions, combinations) logging.info("Making perdictions") perdictions = perdict(vectorized_features, model) # This can be interrupted by Ctrl+C answer = {'ids': combinations, 'perdictions': perdictions} logging.info("preparing data for saving to file") answer['perdictions'] = le.inverse_transform(answer['perdictions']).tolist() logging.info("saving to file") rawJson = "{perdiction_filename}_{slice_num}.raw.json".format( perdiction_filename = config['perdiction_filename'], slice_num = str(slice_num) ) with open(rawJson, 'w') as f: json.dump(answer, f, indent=2) logging.info(rawJson+ " created") outputCsv = output.formatResultCsv(answer) csv_filename = "{perdiction_filename}_{slice_num}.csv".format( perdiction_filename = config['perdiction_filename'], slice_num = str(slice_num) ) with open(csv_filename, 'w') as f: f.writelines(outputCsv) logging.info(csv_filename+ " created") slice_num += 1
def test_formatResultCsv(): inputResult = { "perdictions": [ "merge", "dup", "none", ], "ids": [ { "lhs_id": 210204, "rhs_id": 210202 }, { "lhs_id": 210205, "rhs_id": 210202 }, { "lhs_id": 210204, "rhs_id": 210208 }, ] } expected = [ "Dup?,Merge?,Reason,Merged in Moztrap?,Case ID 1,Case ID 2,Case 1 ,Case 2,Diff,CaseVersion ID 1,CaseVersion ID 2\n", "No,Yes,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210204&rhs=210202,210204,210202\n", "Yes,No,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210205&rhs=210202,210205,210202\n", "No,No,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210204&rhs=210208,210204,210208\n" ] #expected = [ #"Dup?,Merge?,Reason,Merged in Moztrap?,Case ID 1,Case ID 2,Case 1 ,Case 2,Diff,CaseVersion ID 1,CaseVersion ID 2", #"No,Yes,,,706,707,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=707,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210202,210204,210202", #"Yes,No,,,706,1696,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=1696,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210521,210205,210202" #"No,No,,,706,1696,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=1696,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210521,210204,210208" #] assert(expected == output.formatResultCsv(inputResult))
def test_formatResultCsv(): inputResult = { "perdictions": [ "merge", "dup", "none", ], "ids": [ { "lhs_id": 210204, "rhs_id": 210202 }, { "lhs_id": 210205, "rhs_id": 210202 }, { "lhs_id": 210204, "rhs_id": 210208 }, ] } expected = [ "Dup?,Merge?,Reason,Merged in Moztrap?,Case ID 1,Case ID 2,Case 1 ,Case 2,Diff,CaseVersion ID 1,CaseVersion ID 2\n", "No,Yes,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210204&rhs=210202,210204,210202\n", "Yes,No,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210205&rhs=210202,210205,210202\n", "No,No,,,,,,,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210204&rhs=210208,210204,210208\n" ] #expected = [ #"Dup?,Merge?,Reason,Merged in Moztrap?,Case ID 1,Case ID 2,Case 1 ,Case 2,Diff,CaseVersion ID 1,CaseVersion ID 2", #"No,Yes,,,706,707,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=707,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210202,210204,210202", #"Yes,No,,,706,1696,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=1696,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210521,210205,210202" #"No,No,,,706,1696,https://moztrap.mozilla.org/manage/cases/?filter-id=706,https://moztrap.mozilla.org/manage/cases/?filter-id=1696,http://shinglyu.github.io/moztrap-new-ui/diff.html?lhs=210201&rhs=210521,210204,210208" #] assert (expected == output.formatResultCsv(inputResult))