def viterbi_path(prior, transmat, obslik): ###obslik = obslik.T scaled = 1 T = obslik.shape[1] Q = prior.size delta = np.zeros((Q, T)) psi = np.zeros((Q, T), dtype=int) path = np.zeros((1, T), dtype=int) ## = np.zeros((1, T)) scale = np.ones((1, T)) t = 0 delta[:, t] = prior * obslik[:, t] if scaled: delta[:, t], n = normalise(delta[:, t]) scale[0, t] = 1 / n psi[:, t] = 0 for t in range(1, T): for j in range(0, Q): temp = delta[:, t - 1] * transmat[:, j] psi[j, t] = temp.argmax(axis=0) delta[j, t] = temp[psi[j, t]] * obslik[j, t] if scaled: delta[:, t], n = normalise(delta[:, t]) scale[0, t] = 1 / n ###p[0, T - 1] = np.max(delta[:, T - 1]) # path[0, T - 1] = np.argmax(delta[:, T - 1]) + 1 path[0, T - 1] = np.argmax(delta[:, T - 1]) for t in range(T - 2, -1, -1): path[0, t] = psi[(path[0, t + 1]), t + 1] return path
def read_csv_file_by_pandas(csv_file): data_frame = None if (os.path.exists(csv_file)): data_frame = pandas.read_csv(csv_file, sep='\t') df2 = data_frame.iloc[:, 0].str.split("\t", expand=True) for i in range(0, len(df2.index), 1): if (df2.iloc[i, 1] != None and df2.iloc[i, 1] != ""): indeksmaks = 0 #Normalize Severity if (df2.iloc[i, 6] == "None" or df2.iloc[i, 6] == None or df2.iloc[i, 6].isdigit() == False): df2.iloc[i, 6] = 0 if (df2.iloc[i, 9] == "None" or df2.iloc[i, 9] == None or df2.iloc[i, 9].isdigit() == False): df2.iloc[i, 9] = 0 if (df2.iloc[i, 12] == "None" or df2.iloc[i, 12] == None or df2.iloc[i, 12].isdigit() == False): df2.iloc[i, 12] = 0 #Classification if (int(df2.iloc[i, 6]) != 0 or int(df2.iloc[i, 9]) != 0 or int(df2.iloc[i, 12]) != 0): indeksmaks = maxseverity(int(df2.iloc[i, 6]), int(df2.iloc[i, 9]), int(df2.iloc[i, 12])) df2.iloc[i, 6] = int(df2.iloc[i, indeksmaks]) df2.iloc[i, 5] = "Bullying" df2.iloc[i, 7] = df2.iloc[i, indeksmaks + 1] else: df2.iloc[i, 6] = 0 df2.iloc[i, 5] = "NotBullying" df2.iloc[i, 7] = "" df2.iloc[i, 1] = ' '.join( normalize( normalise(denoise_text(df2.iloc[i, 1]), verbose=False))) #df2.iloc[i, 7] = ' '.join(normalize(normalise(denoise_text(df2.iloc[i, 7]), verbose=False))) else: df2.iloc[i, 1] = "Empty" df2.iloc[i, 6] = 0 df2.iloc[i, 5] = "NotBullying" df2.iloc[i, 7] = "" df2.columns = [ n.replace('', '') for n in data_frame.columns.str.split('\t')[0] ] df2.drop("ques", axis=1, inplace=True) df2.drop("ans", axis=1, inplace=True) df2.drop("userid", axis=1, inplace=True) df2.drop("asker", axis=1, inplace=True) df2.drop("ans2", axis=1, inplace=True) df2.drop("severity2", axis=1, inplace=True) df2.drop("bully2", axis=1, inplace=True) df2.drop("ans3", axis=1, inplace=True) df2.drop("severity3", axis=1, inplace=True) df2.drop("bully3", axis=1, inplace=True) else: print(csv_file + " do not exist.") return df2
def posterior_finiteMixture(y,K,iterationsations): # N=length(y); a=0.1; b=0.1; alpha=5; lambda_=np.zeros((iterations+1,K)); pi=np.zeros((iterations+1,K)); lambda_[0]=np.random.gamma(1,1,K); pi[0]=np.random.dirichlet(alpha*np.ones((1,K))[0],1) for i in range(1,iterations): #z variable p_z=-y*lambda_[i-1]+np.log(pi[i-1]*lambda_[i-1]); p_z=normalise(p_z); z=categorical_sample(p_z); #lambda_ variable n_k=sum(z); gam_a=a+n_k; gam_b=b+sum(z*y); lambda_[i]=np.random.gamma(gam_a,1./gam_b); #pi variable dir_par=alpha+n_k; pi[i]=np.random.dirichlet(dir_par,1); return lambda_,pi
import normalise text = ["Wat u doin ?"] normalise(text, Verbose=True)
def q_z2(y, pi, lambda_): p_z = -y*lambda_+np.log(pi*lambda_) p_z = normalise(p_z) z = categorical_sample(p_z) return z