import en_vectors_web_lg, en_core_web_lg import json import os from keras.utils.np_utils import to_categorical import logging_util import numpy as np import matplotlib.pyplot as plt import pandas as pd import itertools from matplotlib import cm from sklearn.preprocessing import LabelEncoder logdir = "./logging" if not os.path.exists(logdir): os.makedirs(logdir) logger = logging_util.logger(__name__, logdir) def extract_words(doc, att_words, att_sents, sents_count, max_sentence_length, nlp): sent_mean = np.mean(att_sents[:sents_count]) sent_median = np.median(att_sents[:sents_count]) sent_threshold = min(sent_mean, sent_median) phrases = {} for index, sentence in enumerate(doc.sents): if index >= sents_count: break
from keras.engine.topology import Layer from keras import initializers from keras import backend as K from keras.engine import InputSpec import os import logging_util import tensorflow as tf logger = logging_util.logger(__name__) def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel) class AttWeightLayer(Layer): def __init__(self, **kwargs): self.init_normal = initializers.get('normal')
from collections import Counter warnings.filterwarnings("ignore") import logging_util from imblearn.over_sampling import SMOTE from imblearn.under_sampling import RandomUnderSampler import os.path as path # Load data logdir = "../logging" if not os.path.exists(logdir): os.makedirs(logdir) data_path = '/Users/sli/Projects/data' search_path = data_path + '/hyperparameter_search/mental_health_forum_simple_clf' dataset_path = data_path + '/mental_health_forum_data' logger = logging_util.logger("Hyper_search_simple_model", logging_folder=logdir) # Prepare data and label def data_prep(df, params, if_resample=False): """ Convert data from dataframe format into tensor of input and target :param df: dataframe containing disorder name, dialog :param params: parameter for data processing :param if_resample: whether to perform resampling to balance the sample size :return: output: dictionary containing data, encoded_label, binary_label label_encode: LabelEncoder() object for inverse fitting """