import pandas as pd import scripts from preparing_neural import apply_preparing_merge from cleaning import apply_cleaning from train_neural_liam_test import split_train_test_data, fit_model, save_model from preparing_neural import apply_lemmatize from train_neural_liam_test import get_preprocessor data = pd.read_csv('../raw_data/working.csv') data_clean = apply_cleaning(data) data_merge = apply_preparing_merge(data_clean) data_lemmatize = apply_lemmatize(data_merge) x_train, x_test, y_train, y_test = split_train_test_data(data_lemmatize) x_train_preprocessed, x_test_preprocessed = get_preprocessor(x_train, x_test) neural_model = fit_model(x_train_preprocessed, y_train) save_model(neural_model) print(neural_model.evaluate(x_test_preprocessed, y_test))
if input_method == 'Text': title = st.text_input('Article title') text = st.text_area('Article body') elif input_method == 'Link': url = st.text_input('Article URL') analyze_status_logistic = st.button('Analyze_with_Logistic') analyze_status_neural = st.button('Analyze_with_Neural') ###### Logistic if input_method == 'Text' and analyze_status_logistic == True: input_df = convert(title,text) input_df = apply_cleaning(input_df) input_df = input_df[['title_clean', 'text_clean','title_length_char','title_Upper_Ratio','text_stop_words_ratio']] prediction = logistic_model.predict(input_df) if prediction == 1: st.write('I think its true') else: st.write('I think its fake') if input_method == 'Link' and analyze_status_logistic == True: input_df = get_title_text_web(url) input_df = apply_cleaning(input_df) input_df = input_df[['title_clean', 'text_clean','title_length_char','title_Upper_Ratio','text_stop_words_ratio']]
import pandas as pd from preparing_df import apply_preparing from cleaning import apply_cleaning from typo import apply_typo_ratio from train import split_train_test_data, fit_model, save_model true = pd.read_csv('True.csv') fake = pd.read_csv('Fake.csv') data = apply_preparing(true, fake) apply_cleaning(data) apply_typo_ratio(data) x_train, x_test, y_train, y_test = split_train_test_data(data) pipe = fit_model(x_train, y_train) save_model(pipe, 'model_test') print(pipe.score(x_test, y_test))
index0 = st.file_uploader("Choose a file") if index0 is not None: st.sidebar.header('Dataset Parameter') x1 = pd.ExcelFile(index0) index1 = st.sidebar.selectbox('What Dataset you choose?', x1.sheet_names) # Load data example (dari functional maupun nonfunctional) st.header('Dataset parameters') statement = fulldataset(index0, index1) # Get text to clean (dari row yang diinginkan) text_to_clean = list(statement['Requirement Statement']) # Clean text print("Loading Original & Cleaned Text...") cleaned_text = apply_cleaning(text_to_clean) # Show first example text_df = pd.DataFrame([text_to_clean, cleaned_text], index=['ORIGINAL', 'CLEANED'], columns=statement['ID']).T st.write(text_df) st.header('Traceability parameters') id_requirement = fulldataset(index0, index1)['ID'] genre = st.sidebar.radio( "What do you choose?", ('Information_Retrieval', 'Ontology', 'IR+LSA', 'IR+LDA')) if genre == 'Information_Retrieval': st.subheader("bag of words")