def explain(self, doc, truncate_len=512, all_targets=False): """ Highlights text to explain prediction Args: doc (str): text of documnet truncate_len(int): truncate document to this many words all_targets(bool): If True, show visualization for each target. """ try: import eli5 from eli5.lime import TextExplainer except: msg = 'ktrain requires a forked version of eli5 to support tf.keras. '+\ 'Install with: pip3 install git+https://github.com/amaiya/eli5@tfkeras_0_10_1' warnings.warn(msg) return prediction = [self.predict(doc)] if not all_targets else None if not isinstance(doc, str): raise Exception('text must of type str') if self.preproc.is_nospace_lang(): doc = self.preproc.process_chinese([doc]) doc = doc[0] doc = ' '.join(doc.split()[:truncate_len]) te = TextExplainer(random_state=42) _ = te.fit(doc, self.predict_proba) return te.show_prediction(target_names=self.preproc.get_classes(), targets=prediction)
def explain(self, doc, truncate_len=512, all_targets=False, n_samples=2500): """ Highlights text to explain prediction Args: doc (str): text of documnet truncate_len(int): truncate document to this many words all_targets(bool): If True, show visualization for each target. n_samples(int): number of samples to generate and train on. Larger values give better results, but will take more time. Lower this value if explain is taking too long. """ is_array, is_pair = detect_text_format(doc) if is_pair: warnings.warn( "currently_unsupported: explain does not currently support sentence pair classification" ) return if not self.c: warnings.warn( "currently_unsupported: explain does not support text regression" ) return try: import eli5 from eli5.lime import TextExplainer except: msg = ( "ktrain requires a forked version of eli5 to support tf.keras. " + "Install with: pip install https://github.com/amaiya/eli5/archive/refs/heads/tfkeras_0_10_1.zip" ) warnings.warn(msg) return if (not hasattr(eli5, "KTRAIN_ELI5_TAG") or eli5.KTRAIN_ELI5_TAG != KTRAIN_ELI5_TAG): msg = ( "ktrain requires a forked version of eli5 to support tf.keras. It is either missing or not up-to-date. " + "Uninstall the current version and install/re-install the fork with: pip install https://github.com/amaiya/eli5/archive/refs/heads/tfkeras_0_10_1.zip" ) warnings.warn(msg) return if not isinstance(doc, str): raise TypeError("text must of type str") prediction = [self.predict(doc)] if not all_targets else None if self.preproc.is_nospace_lang(): doc = self.preproc.process_chinese([doc]) doc = doc[0] doc = " ".join(doc.split()[:truncate_len]) te = TextExplainer(random_state=42, n_samples=n_samples) _ = te.fit(doc, self.predict_proba) return te.show_prediction(target_names=self.preproc.get_classes(), targets=prediction)
def eli5visual(pData, pDesc, Idx, pAccountName, pVec, nTopKeywrd, pRootDir): try: for i in range(len(Idx)): if Idx[i] <= len(pData): pIntent = pData['Intent'][int(Idx[i])] _, pModels = loadmodel(pRootDir, pAccountName, pIntent) pPipeModel = make_pipeline(pVec, pModels) pTe = TextExplainer(random_state=42).fit( pData[pDesc][int(Idx[i])], pPipeModel.predict_proba) pExplanation = pTe.explain_prediction() pHtml = format_as_html(pExplanation, force_weights=False, include_styles=False, horizontal_layout=True, show_feature_values=False) savehtml(pRootDir, pHtml, Idx[i], pIntent) else: print("Please select valid Id") except Exception as e: print( '*** ERROR[003]: Error in visualiation file of eil5visual function: ', sys.exc_info()[0], str(e)) print(traceback.format_exc()) return (-1) return (0)
def main(): df = pd.read_excel('data/mr_vs_fr_30.xlsx') df = df.sample(frac=1, random_state=seed) df['text_lemmatized'] = df['text'].apply(morphText) X_train, X_test, y_train, y_test = train_test_split( df['text_lemmatized'], df['label'], test_size=0.3, random_state=42, stratify=df['label']) flag_test = True get_pipe(X_train, y_train, flag_test, X_test, y_test) flag_test = False pipe = get_pipe(df['text_lemmatized'], df['label'], flag_test) k = 0 words = [] for index, row in df.iterrows(): te5 = TextExplainer(clf=DecisionTreeClassifier(max_depth=5), random_state=seed) te5.fit(row['text_lemmatized'], pipe.predict_proba) df_eli5_w = eli5.format_as_dataframe(te5.explain_weights()) print('class {}'.format('male' if row['label'] == 0 else 'woman')) print('predict:') print(df_eli5_w) print(100*'*') temp_m = ', '.join(df_eli5_w[df_eli5_w['weight'] > 0]['feature'].tolist()) if temp_m: words.append(temp_m) else: words.append('') k += 1 df['words'] = words df.to_excel('mr_vs_fr_words_30.xlsx', index=False)
def test_lime_explain_probabilistic(newsgroups_train): docs, y, target_names = newsgroups_train try: vec = HashingVectorizer(alternate_sign=False) except TypeError: # sklearn < 0.19 vec = HashingVectorizer(non_negative=True) clf = MultinomialNB() X = vec.fit_transform(docs) clf.fit(X, y) print(clf.score(X, y)) pipe = make_pipeline(vec, clf) doc = docs[0] te = TextExplainer(random_state=42) te.fit(doc, pipe.predict_proba) print(te.metrics_) assert te.metrics_['score'] > 0.7 assert te.metrics_['mean_KL_divergence'] < 0.1 res = te.explain_prediction(top=20, target_names=target_names) expl = format_as_text(res) print(expl) assert 'file' in expl
def _lime_analyze(self, query, indicies, max_len, max_replace, top_targets=None): model = self.model vocab = self.vocab.word_to_idx label = self.label.word_to_idx prepro_query = self.preprocess(query) explainer_generator = ExplainerGenerator(model, vocab, max_len) sampler = MaskingTextSampler(replacement=UNK, max_replace=max_replace, token_pattern=None, bow=False) explainer_list = list() for i in indicies: predict_fn = explainer_generator.get_predict_function(i) te = TextExplainer( sampler=sampler, position_dependent=True, random_state=RANDOM_SEED, ) te.fit(' '.join(prepro_query), predict_fn) pred_explain = te.explain_prediction( target_names=[l for l in label][3:], top_targets=top_targets) explainer_list.append(pred_explain) return explainer_list
def test_text_explainer_rbf_sigma(): text = 'foo bar baz egg spam' predict_proba = substring_presence_predict_proba('bar') te1 = TextExplainer().fit(text, predict_proba) te2 = TextExplainer(rbf_sigma=0.1).fit(text, predict_proba) te3 = TextExplainer(rbf_sigma=1.0).fit(text, predict_proba) assert te1.similarity_.sum() < te3.similarity_.sum() assert te1.similarity_.sum() > te2.similarity_.sum()
def test_text_explainer_token_pattern(): text = "foo-bar baz egg-spam" predict_proba = substring_presence_predict_proba('bar') # a different token_pattern te = TextExplainer(token_pattern=r'(?u)\b[-\w]+\b') te.fit(text, predict_proba) print(te.metrics_) assert te.metrics_['score'] > 0.95 assert te.metrics_['mean_KL_divergence'] < 0.1 expl = te.explain_prediction() format_as_all(expl, te.clf_) assert expl.targets[0].feature_weights.pos[0].feature == 'foo-bar'
def predict(model_id): if os.path.exists("model/" + str(int(model_id)) + ".pkl"): try: if str(model_id) in clfs: clf = clfs[str(model_id)] else: clf = joblib.load(model_file_name(model_id)) explainers = [] if is_text_type(model_id): pipe = make_pipeline(vectorizer, clf) prediction = pipe.predict(request.json) for post in request.json: te = TextExplainer(random_state=42, n_samples=500) te.fit(post['text'], pipe.predict_proba) made = te.explain_prediction(target_names=['pos', 'neg']) explanation = made.targets[0].feature_weights op_exp = {'pos': [], 'neg': []} for feature in explanation.pos: op_exp['pos'].append([feature.feature, feature.weight]) for feature in explanation.neg: op_exp['neg'].append([feature.feature, feature.weight]) explainers.append(op_exp) else: rows = request.json query = pd.get_dummies(pd.DataFrame(rows)) query = query.reindex(columns=model_columns, fill_value=0) prediction = clf.predict(query) for index, row in query.iterrows(): explanation = eli5.explain_prediction( clf, row).targets[0].feature_weights op_exp = {'pos': [], 'neg': []} for feature in explanation.pos: op_exp['pos'].append([feature.feature, feature.weight]) for feature in explanation.neg: op_exp['neg'].append([feature.feature, feature.weight]) explainers.append(op_exp) # Converting to int from int64 return jsonify({ "predictions": list(map(str, prediction)), "explanations": explainers }) except Exception as e: return jsonify({'error': str(e), 'trace': traceback.format_exc()}) else: print('train first') return 'no model here'
def highlight_text(text): predict_dict = predict(text) try: te = TextExplainer(random_state=42, n_samples=1000) te.fit(text, nn_model.predict_proba) highlight_html = te.show_prediction( target_names=[val for val in CLASSES.values()], top_targets=3, top=200) predict_dict["highlight"] = highlight_html except: predict_dict["highlight"] = None return predict_dict
def explain(self, doc, truncate_len=512, all_targets=False, n_samples=2500): """ Highlights text to explain prediction Args: doc (str): text of documnet truncate_len(int): truncate document to this many words all_targets(bool): If True, show visualization for each target. n_samples(int): number of samples to generate and train on. Larger values give better results, but will take more time. Lower this value if explain is taking too long. """ is_array, is_pair = detect_text_format(doc) if is_pair: warnings.warn( 'currently_unsupported: explain does not currently support sentence pair classification' ) return if not self.c: warnings.warn( 'currently_unsupported: explain does not support text regression' ) return try: import eli5 from eli5.lime import TextExplainer except: msg = 'ktrain requires a forked version of eli5 to support tf.keras. '+\ 'Install with: pip3 install git+https://github.com/amaiya/eli5@tfkeras_0_10_1' warnings.warn(msg) return prediction = [self.predict(doc)] if not all_targets else None if not isinstance(doc, str): raise Exception('text must of type str') if self.preproc.is_nospace_lang(): doc = self.preproc.process_chinese([doc]) doc = doc[0] doc = ' '.join(doc.split()[:truncate_len]) te = TextExplainer(random_state=42, n_samples=n_samples) _ = te.fit(doc, self.predict_proba) return te.show_prediction(target_names=self.preproc.get_classes(), targets=prediction)
def limeTextClassification( dataset, data, pr=Predictor(callingFunction="TextClassifier") ): # example retrieved from https://eli5.readthedocs.io/en/latest/tutorials/black-box-text-classifiers.html#textexplainer pr.dataset = dataset resultColumnName = pr.resultColumn dataClasses = list(dict.fromkeys(data[resultColumnName].astype(str))) dataClasses.sort() te = TextExplainer(random_state=42)
def test_lime_flat_neighbourhood(newsgroups_train): docs, y, target_names = newsgroups_train doc = docs[0] @_apply_to_list def predict_proba(doc): """ This function predicts non-zero probabilities only for 3 labels """ proba_graphics = [0, 1.0, 0, 0] proba_other = [0.9, 0, 0.1, 0] return proba_graphics if 'file' in doc else proba_other te = TextExplainer(expand_factor=None, random_state=42) te.fit(doc, predict_proba) print(te.metrics_) print(te.clf_.classes_, target_names) res = te.explain_prediction(top=20, target_names=target_names) for expl in format_as_all(res, te.clf_): assert 'file' in expl assert "comp.graphics" in expl
def st_lime_explanation( text: str, predict_func: Callable[[List[str]], np.ndarray], unique_labels: List[str], n_samples: int, position_dependent: bool = True, ): # TODO just use ELI5's built-in visualization when streamlit supports it: # https://github.com/streamlit/streamlit/issues/779 with st.spinner("Generating LIME explanations..."): te = TextExplainer( random_state=1, n_samples=n_samples, position_dependent=position_dependent ) te.fit(text, predict_func) st.json(te.metrics_) explanation = te.explain_prediction() explanation_df = eli5.format_as_dataframe(explanation) for target_ndx, target in enumerate( sorted(explanation.targets, key=lambda t: -t.proba) ): target_explanation_df = explanation_df[ explanation_df["target"] == target_ndx ].copy() target_explanation_df["contribution"] = ( target_explanation_df["weight"] * target_explanation_df["value"] ) target_explanation_df["abs_contribution"] = abs( target_explanation_df["contribution"] ) target_explanation_df = ( target_explanation_df.drop("target", axis=1) .sort_values(by="abs_contribution", ascending=False) .reset_index(drop=True) ) st.subheader( f"Target: {unique_labels[target_ndx]} (probability {target.proba:.4f}, score {target.score:.4f})" ) st.dataframe(target_explanation_df)
def limeTextClassification( dataset, data, pr=Predictor(callingFunction="TextClassifier") ): # example retrieved from https://eli5.readthedocs.io/en/latest/tutorials/black-box-text-classifiers.html#textexplainer pr = Predictor(dataset=dataset, callingFunction="TextClassifier") resultColumnName = pr.resultColumn dataClasses = list(dict.fromkeys(data[resultColumnName].astype(str))) dataClasses.sort() pr = Predictor(dataset=dataset, callingFunction="TextClassifier") te = TextExplainer(random_state=42) te.fit(dataset["text"], pr.predict_proba) te.fit(dataset["text"], pr.predict_proba) te.show_prediction(target_names=pr._classes_000.tolist()) return te, pr._classes_000.tolist() '''
def test_text_explainer_custom_classifier(): text = "foo-bar baz egg-spam" predict_proba = substring_presence_predict_proba('bar') # use decision tree to explain the prediction te = TextExplainer(clf=DecisionTreeClassifier(max_depth=2)) te.fit(text, predict_proba) print(te.metrics_) assert te.metrics_['score'] > 0.99 assert te.metrics_['mean_KL_divergence'] < 0.01 expl = te.explain_prediction() format_as_all(expl, te.clf_) # with explain_weights we can get a nice tree representation expl = te.explain_weights() print(expl.decision_tree.tree) assert expl.decision_tree.tree.feature_name == "bar" format_as_all(expl, te.clf_)
def test_text_explainer_char_based(token_pattern): text = "Hello, world!" predict_proba = substring_presence_predict_proba('lo') te = TextExplainer(char_based=True, token_pattern=token_pattern) te.fit(text, predict_proba) print(te.metrics_) assert te.metrics_['score'] > 0.95 assert te.metrics_['mean_KL_divergence'] < 0.1 res = te.explain_prediction() format_as_all(res, te.clf_) check_targets_scores(res) assert res.targets[0].feature_weights.pos[0].feature == 'lo' # another way to look at results (not that useful for char ngrams) res = te.explain_weights() assert res.targets[0].feature_weights.pos[0].feature == 'lo'
def test_text_explainer_show_methods(): pytest.importorskip('IPython') from IPython.display import HTML text = "Hello, world!" @_apply_to_list def predict_proba(doc): return [0.0, 1.0] if 'lo' in doc else [1.0, 0.0] te = TextExplainer() te.fit(text, predict_proba) pred_expl = te.show_prediction() assert isinstance(pred_expl, HTML) assert 'lo' in pred_expl.data weight_expl = te.show_weights() assert isinstance(weight_expl, HTML) assert 'lo' in weight_expl.data
train = fetch_subset('train') test = fetch_subset('test') vec = TfidfVectorizer(min_df=3, stop_words='english', ngram_range=(1, 2)) svd = TruncatedSVD(n_components=100, n_iter=7, random_state=42) lsa = make_pipeline(vec, svd) clf = SVC(C=150, gamma=2e-2, probability=True) pipe = make_pipeline(lsa, clf) pipe.fit(twenty_train.data, twenty_train.target) pipe.score(twenty_test.data, twenty_test.target) doc = twenty_test.data[0] print_prediction(doc) te = TextExplainer(random_state=42) te.fit(doc, pipe.predict_proba) #print(te.explain_prediction(target_names=twenty_train.target_names)) #print(eli5.format_as_image(te.explain_weights(target_names=twenty_train.target_names))) show_html = lambda html: display(HTML(html)) show_html_expl = lambda expl, **kwargs: show_html( format_as_html(expl, include_styles=False, **kwargs)) show_html(format_html_styles()) weights = eli5.show_weights(clf, vec=vec, target_names=train['target_names'], horizontal_layout=False) pred = show_html_expl(explain_prediction(clf,
__author__ = 'xead' # coding: utf-8 from sentiment_classifier import SentimentClassifier from sklearn.externals import joblib from eli5.lime import TextExplainer #clf = SentimentClassifier() #pred = clf.get_prediction_message("Хороший телефон") text = 'Хороший был у меня телефон 5 лет назад' pipe = joblib.load("./pipe6.pkl") te = TextExplainer(random_state=42) te.fit(text, pipe.predict_proba) res = te.show_prediction(target_names=['negative', 'positive'], top=25) print (res)
# + Data # + Model # + Target Names # + Function # In[499]: from eli5.lime import TextExplainer # In[500]: pipe.predict_proba # In[501]: exp = TextExplainer(random_state=42) # In[502]: X_test.values[0] # In[515]: a = pipelog.predict([input()]) if a == 1: print("hate statement") elif a == 0: print("Not hate bro!") # In[374]:
# -*- coding: utf-8 -*- """ __title__ = 'eli5' __author__ = 'JieYuan' __mtime__ = '2018/8/21' """ from eli5.lime import TextExplainer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline X = [ "The dimension of the input documents is reduced to 100, and then a kernel SVM is used to classify the documents.", "This is what the pipeline returns for a document - it is pretty sure the first message in test data belongs to sci.med:" ] y = [0, 1] piplie = make_pipeline(TfidfVectorizer(), LogisticRegression()) te = TextExplainer(random_state=42) te.fit(X[0], piplie.predict_proba) te.show_prediction() te.show_weights() eli5.show_prediction
for word in words: index = 0 for word_block in word: if len(average_word_vector) == index: average_word_vector.append(0) average_word_vector[index] += float(word_block) index += 1 index = 0 for word_block in average_word_vector: average_word_vector[index] /= float(len(words)) index += 1 xout.append(average_word_vector) return np.array(xout) vectorizer = V() for classifier in classifiers: print(classifier) gnb = classifier() pipe = make_pipeline(vectorizer, gnb) pipe.fit(x[:testcutoff], y[:testcutoff]) y_predicted = pipe.predict_proba(x[testcutoff:]) #print(classification_report(y[testcutoff:], y_predicted, target_names=['known weird', 'less weird'])) te = TextExplainer(random_state=101, n_samples=500) te.fit('Green new deal is the best bro, bring it on', pipe.predict_proba) te.show_prediction(target_names=['known weird', 'less weird'])
x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size = 0.1, random_state = 40, stratify=Y) text_model.fit(x_train, y_train) text_model.score(x_test, y_test) from IPython.display import display, HTML import eli5 from eli5.lime import TextExplainer for idx in x_test.index[190:210]: te = TextExplainer(random_state=42) te.fit(cleaner(x_test[idx]), text_model.predict_proba, ) print("Real Class:", ["Non Toxic" if x == 0 else "Toxic" for x in [df_corpus_final.iloc[idx]['class']]]) print("Text uncleaned tweet:", df_corpus_final.iloc[idx]['tweet']) print("ELI5 Predicted Class:") HTML(display((te.show_prediction(target_names=[ 'Non Toxic','Toxic',])))) import pickle pickle.dump(text_model, open('toxic.pickle', 'wb'))
def get_result(self, text): te = TextExplainer(random_state=42) te.fit(text, self.pipe.predict_proba) res = te.show_prediction(target_names=['negative', 'positive'], top=25) return res
# Used in pickle pipeline on TF-IDF def dummy(token): return token # Load pre-trained ML model model = pickle.load(open('model.pkl', 'rb')) # NEEDS TO BE CREATED WITH BOTH FILES IN FOLDER pickle_model_for_webapp # Create object of class preprocessing to clean data reading = clean_data.preprocessing.preprocessing(convert_lower=True, use_spell_corrector=True, only_verbs_nouns=False) # clf: define ML classifier # vec: define vectorizer # n_samples: sets the number of random examples to generate from given instance of text (default value 5000) # use LIME method to train a white box classifier to make the same prediction as the black box one (pipeline) te = TextExplainer(vec=TfidfVectorizer(ngram_range=(1, 2), preprocessor=dummy, token_pattern='(?u)\\b\\w+\\b'), n_samples=5000, char_based=False, random_state=42) def one_word_get_prediction_class_name(prediction): ''' Pipeline with XGBoost - translate the prediction class number into words :param prediction: the predicted number/class :return: the predicted class in natural language ''' # The order of classes in predict_proba: ['hate speech', 'neither', 'offensive language'] if prediction == 0: output = "hate speech" elif prediction == 1: output = "neither" else: output = "offensive language"
def test_text_explainer_position_dependent(): text = "foo bar baz egg spam bar baz egg spam ham" @_apply_to_list def predict_proba(doc): tokens = doc.split() # 'bar' is only important in the beginning of the document, # not in the end return [0, 1] if len(tokens) >= 2 and tokens[1] == 'bar' else [1, 0] # bag of words model is not powerful enough to explain predict_proba above te = TextExplainer(random_state=42, vec=CountVectorizer()) te.fit(text, predict_proba) print(te.metrics_) assert te.metrics_['score'] < 0.9 assert te.metrics_['mean_KL_divergence'] > 0.3 # position_dependent=True can make it work te = TextExplainer(position_dependent=True, random_state=42) te.fit(text, predict_proba) print(te.metrics_) assert te.metrics_['score'] > 0.95 assert te.metrics_['mean_KL_divergence'] < 0.3 expl = te.explain_prediction() format_as_all(expl, te.clf_) # it is also possible to almost make it work using a custom vectorizer vec = CountVectorizer(ngram_range=(1, 2)) te = TextExplainer(vec=vec, random_state=42) te.fit(text, predict_proba) print(te.metrics_) assert te.metrics_['score'] > 0.95 assert te.metrics_['mean_KL_divergence'] < 0.3 expl = te.explain_prediction() format_as_all(expl, te.clf_) # custom vectorizers are not supported when position_dependent is True with pytest.raises(ValueError): te = TextExplainer(position_dependent=True, vec=HashingVectorizer())
a_set = Sentence(p_str) stacked_embeddings.embed(a_set) to_ret = a_set.get_embedding().cpu().detach().numpy( ).reshape(1, -1) except: print(type(X)) print(X) return to_ret pipe = joblib.load('saved_card_classification.pkl') if keras: pipe.named_steps['model'].model = load_model('keras_model.h5') te = TextExplainer(random_state=42, n_samples=3000, position_dependent=False) def explain_pred(sentence): te.fit(sentence, pipe.predict_proba) #txt = format_as_text(te.explain_prediction(target_names=["green", "neutral", "red"])) t_pred = te.explain_prediction(top=20, target_names=[ "ANB", "CAP", "ECON", "EDU", "ENV", "EX", "FED", "HEG",
import jieba seg_list = jieba.cut("看了快一半了才发现是mini的广告", cut_all=False) list(seg_list) # ### Example 1 # In[10]: get_proba(["看 了 快 一半 了 才 发现 是 mini 的 广告"]) # In[11]: from eli5.lime import TextExplainer te = TextExplainer(random_state=42, n_samples=5000) te.fit(" ".join(jieba.cut("看了快一半了才发现是mini的广告", cut_all=False)), get_proba) te.show_prediction(target_names=["neg", "pos"]) # In[12]: te.metrics_ # In[13]: te.samples_[:10] # #### Character-based Whitebox # In[14]:
# opcodes_dir = '/home/hwangdz/coreutils/coreutils-8.28/install_m32/bin/md5funcs_ops' opcodes_dir = '/home/hwangdz/git/rl-select-div/only-similarity/explanation/%s_ops_info' % bin_name output_dir = 'explanation/%s_html' % bin_name if not os.path.isdir(output_dir): os.mkdir(output_dir) for file_name in os.listdir(opcodes_dir): # if file_name != 'dump.s': # continue if file_name == 'op_distribution': continue file_path = os.path.join(opcodes_dir, file_name) with open(file_path, 'r') as f: op_codes = f.read() if len(op_codes) < 20: continue num_ops = len(op_codes.split()) op_codes = op_codes.replace('\n', ' ') opcode_explainer = TextExplainer(random_state=59, sampler=ops_sampler, n_samples=5000) #repeat_times = (len(op_codes.split()) / 100) ** 2 repeat_times = 1 for _ in range(repeat_times): opcode_explainer.fit(op_codes, ss.predict_proba) explanation = opcode_explainer.explain_prediction()._repr_html_() with open('explanation/%s_html/explanation-%s.html' % (bin_name, file_name), 'w') as ef: ef.write(explanation) ef.write('num of opcodes: %d\n' % num_ops) ef.write('</br>\n') ef.write(op_codes)