def test_generate_averages(): test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.count() test.test_option = TestOptions(formula="0") test.save_formula() test.generate_scores() test.generate_averages() assert test.averages == [1.0, 0.0, 1.0, 0.0] test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='test') test.add_file(file_name="file2", label='file2', content='other file') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.count() test.test_option = TestOptions(formula="0") test.save_formula() test.generate_scores() test.generate_averages() assert test.averages == [0.5, 0.0, 1.5, 0.0] test.count() test.test_option = TestOptions(formula="4*[dict1]**2") test.save_formula() test.generate_scores() test.generate_averages() assert test.averages == [0.5, 2.0, 1.5, 2.0]
def test_check_formula(): test = ContentAnalysisModel(TestOptions(formula="()sin(1)")) assert test.check_formula() == "" test.test_option = TestOptions(formula="(") test.save_formula() assert test.check_formula() == "Formula errors:<br>" \ "Mismatched parenthesis<br>" test.test_option = TestOptions(formula="sin()") test.save_formula() assert test.check_formula() == "Formula errors:<br>" \ "sin takes exactly one argument (0 given)" \ "<br>" test.test_option = TestOptions(formula="cos()") test.save_formula() assert test.check_formula() == "Formula errors:<br>" \ "cos takes exactly one argument (0 given)" \ "<br>" test.test_option = TestOptions(formula="tan()") test.save_formula() assert test.check_formula() == "Formula errors:<br>" \ "tan takes exactly one argument (0 given)" \ "<br>" test.test_option = TestOptions(formula="log()") test.save_formula() assert test.check_formula() == "Formula errors:<br>" \ "log takes exactly one argument (0 given)" \ "<br>"
def test_dictionary_colors(): test = ContentAnalysisModel() test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2") colors = test.dictionary_colors assert isinstance(colors["dict1"], str) and len(colors["dict1"]) > 0 assert isinstance(colors["dict2"], str) and len(colors["dict1"]) > 0
def test_add_dictionary(): test = ContentAnalysisModel() test.add_dictionary(file_name="dict1", label="dict1", content="test") assert test.dictionaries[0].name == "dict1" assert test.dictionaries[0].label == "dict1" assert test.dictionaries[0].content == ["test"] assert test.dictionaries[0].active
def test_join_active_dicts(): test = ContentAnalysisModel() test.add_dictionary(file_name="dict1.txt", label="dict1", content="test1") test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2") joined_dicts = test.join_active_dicts() assert joined_dicts[0].dict_label == 'dict1' assert joined_dicts[0].content == 'test1' assert joined_dicts[1].dict_label == 'dict2' assert joined_dicts[1].content == 'test2'
def test_generate_corpus_counts_table(): test = ContentAnalysisModel() test.test_option = TestOptions(formula="[]") test.save_formula() test.add_file(file_name="file1", label='file1', content='test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2") html_table = test.generate_corpus_results(test.count()) assert html_table[0][0] == "dict1" assert html_table[-1][0] == "dict2"
def test_generate_files_raw_counts_tables(): test = ContentAnalysisModel() test.test_option = TestOptions(formula="[]") test.save_formula() test.add_file(file_name="file1", label='file1', content='test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2") html_tables = test.generate_document_results(test.count()) for html_table in html_tables: assert html_table["name"] == "file1"
def test_analyze(): test = ContentAnalysisModel() test.test_option = TestOptions(formula="[]") test.save_formula() test.add_file(file_name="file1", label='file1', content='test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2") overview_results, overview_csv, corpus_results, corpus_csv, \ document_results, errors = test.analyze() assert overview_results == "" assert isinstance(errors, str)
def test_generate_corpus_counts_table(): test = ContentAnalysisModel() test.test_option = TestOptions(formula="[]") test.save_formula() test.add_file(file_name="file1", label='file1', content='test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2") html_table = test.generate_corpus_counts_table(test.count(), test.dictionary_colors) assert html_table.startswith("<table") assert html_table.endswith("</table>")
def test_to_data_frame(): test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='test') test.add_file(file_name="file2", label='file2', content='other file') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.add_dictionary(file_name="dict2.txt", label="dict2", content="test") test.count() test.test_option = TestOptions(formula="") test.save_formula() test.generate_scores() test.generate_averages() assert isinstance(test.to_data_frame(), type(pd.DataFrame()))
def test_generate_scores(): test = ContentAnalysisModel(TestOptions(formula="")) test.add_file(file_name="file1", label='file1', content='test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.count() test.generate_scores() assert test.scores[0] == 0.0 test.test_option = TestOptions(formula="[dict1]") test.save_formula() test.generate_scores() assert test.scores[0] == 1 test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='test a') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.count() test.test_option = TestOptions(formula="[dict1]") test.save_formula() test.generate_scores() assert test.scores[0] == 0.5 test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='a test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.count() test.test_option = TestOptions(formula="[dict1]*2") test.save_formula() test.generate_scores() assert test.scores[0] == 1 test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='a test a') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.count() test.test_option = TestOptions(formula="[dict1]") test.save_formula() test.generate_scores() assert test.scores[0] == round(1 / 3, 3)
def analyze(): """ Analyzes the files. :return: The results of the analysis. """ path = get_path() analysis = ContentAnalysisModel() file_manager = load_file_manager() active_files = file_manager.get_active_files() # Set the formula session["formula"] = ContentAnalysisReceiver() \ .options_from_front_end().formula # Add the files to analyze for file in active_files: analysis.add_file(file_name=file.name, label=file.label, content=file.load_contents()) # Add the dictionaries for name in os.listdir(path): analysis.add_dictionary(file_name=name, label=name, content=open(os.path.join(path, name), 'r').read()) # Analyze overview_results, overview_csv, corpus_results, corpus_csv, \ document_results, errors = analysis.analyze() # Return the results if len(errors): return jsonify({"error": errors}) if not len(corpus_results): return jsonify({"error": "Failed to perform the analysis."}) return jsonify({ "overview-table-head": overview_results[0], "overview-table-body": overview_results[1:], "overview-table-csv": overview_csv, "corpus-table-head": ["Dictionary", "Phrase", "Count"], "corpus-table-body": corpus_results, "corpus-table-csv": corpus_csv, "documents": document_results, "error": False })
def test_analyze(): test = ContentAnalysisModel() test.test_option = TestOptions(formula="[]") test.save_formula() test.add_file(file_name="file1", label='file1', content='test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2") result_table, individual_counts_table, files_raw_counts_tables, \ formula_errors = test.analyze() assert result_table == "" assert isinstance(formula_errors, str) test.test_option = TestOptions(formula="[dict1]") test.save_formula() result_table, individual_counts_table, files_raw_counts_tables, \ formula_errors = test.analyze() assert result_table == test.to_html() assert formula_errors == ""
def test_is_secure(): test = ContentAnalysisModel() test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.add_dictionary(file_name="dict2.txt", label="dict2", content="test") test.test_option = TestOptions(formula="") test.save_formula() assert test.is_secure() test.test_option = TestOptions(formula="[dict1][dict2]") test.save_formula() assert test.is_secure() test.test_option = TestOptions( formula="0123456789 +-*/ () sin cos tan log sqrt") test.save_formula() assert test.is_secure() test.test_option = TestOptions(formula="os.system()") test.save_formula() assert test.is_secure() is False
def test_count_words(): test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.count() assert test.counters[0][0] == 1 test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='test test test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.count() assert test.counters[0][0] == 3 test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='a test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test, a") test.count() assert test.counters[0][0] == 2 test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='a test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test, a, a test") test.count() assert test.counters[0][0] == 1 test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='a test test') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test, a, a test") test.count() assert test.counters[0][0] == 2 test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='a test test a') test.add_dictionary(file_name="dict1.txt", label="dict1", content="test, a, a test") test.count() assert test.counters[0][0] == 3
def test_get_active_dicts(): test = ContentAnalysisModel() test.add_dictionary(file_name="dict1.txt", label="dict1", content="test") test.add_dictionary(file_name="dict2.txt", label="dict2", content="test") active = test.get_active_dicts() assert len(active) == 2
def test_add_corpus(): test = ContentAnalysisModel() test.add_file(file_name="file1", label='file1', content='test') assert test.corpus[0].name == "file1" assert test.corpus[0].label == "file1" assert test.corpus[0].content == "test"
def content_analysis(): """Handles the functionality on the contentanalysis page. :return: a response object (often a render_template call) to flask and eventually to the browser. """ analysis = ContentAnalysisModel() path = os.path.join(constants.TMP_FOLDER, constants.UPLOAD_FOLDER, session['id'], 'content_analysis/') if os.path.isdir(path): dictionary_names = [name for name in os.listdir(path)] else: dictionary_names = [] if request.method == 'GET': if 'dictionary_labels' in session: dict_labels = session['dictionary_labels'] else: dict_labels = [] if 'active_dictionaries' in session: active_dicts = session['active_dictionaries'] else: active_dicts = [True] * len(dict_labels) if 'toggle_all_value' in session: toggle_all_value = session['toggle_all_value'] else: toggle_all_value = True if 'formula' in session: formula = session['formula'] else: formula = "" return render_template('contentanalysis.html', dictionary_labels=dict_labels, active_dictionaries=active_dicts, toggle_all_value=toggle_all_value, itm="content-analysis", formula=formula) else: num_active_docs = detect_active_docs() active_dicts = ContentAnalysisReceiver().options_from_front_end( ).active_dicts dict_labels = ContentAnalysisReceiver().options_from_front_end( ).dict_labels session['formula'] = ContentAnalysisReceiver().options_from_front_end( ).formula if len(dict_labels) == 0: dict_labels = [os.path.splitext(dict_name)[0] for dict_name in dictionary_names] active_dicts = [True] * len(dict_labels) num_active_dicts = active_dicts.count(True) if num_active_docs == 0 and num_active_dicts == 0: return error("At least 1 active document and 1 active " "dictionary are required to perform a " "content analysis.") elif num_active_docs == 0: return error("At least 1 active document is required to perform " "a content analysis.") elif num_active_dicts == 0: return error("At least 1 active dictionary is required to perform" " a content analysis.") file_manager = load_file_manager() active_files = file_manager.get_active_files() for file in active_files: analysis.add_file(file_name=file.name, label=file.label, content=file.load_contents()) for dict_name, dict_label, active in zip(dictionary_names, dict_labels, active_dicts): if active: f = open(os.path.join(path, dict_name), "r") content = f.read() analysis.add_dictionary(file_name=dict_name, label=dict_label, content=content) result_table, corpus_raw_counts_table, files_raw_counts_tables,\ formula_errors = analysis.analyze() if len(formula_errors) != 0 or result_table is None: return error(formula_errors) data = {"result_table": result_table, "dictionary_labels": dict_labels, "active_dictionaries": active_dicts, "corpus_raw_counts_table": corpus_raw_counts_table, "files_raw_counts_tables": files_raw_counts_tables, "error": False} return json.dumps(data)
def test_save_formula(): test = ContentAnalysisModel(TestOptions(formula="√([dict1])^([dict2])")) test.save_formula() assert test._formula == "sqrt([dict1])**([dict2])"