def generate_words(array, window, shift, file_id, sensor_id, output_file, file_handle): file = file_handle bucket_size = window overlap_count = window - shift slider = Slider(bucket_size, overlap_count) slider.fit(array) sensor_word_tf_map = {} #change refactor t = 0 while True: window_data = slider.slide() if len(window_data) == window: file_output_line = str([file_id, sensor_id, t]) + '|' + str( list(window_data)) + '\n' file.write(file_output_line) t += shift word = str(list(window_data)).split('[')[1] word = word.split(']')[0] if word in sensor_word_tf_map: sensor_word_tf_map[word] += 1 else: sensor_word_tf_map[word] = 1 if slider.reached_end_of_list(): break return sensor_word_tf_map
def tag_slider(tag_list, bucket_size=10, overlap_count=9): window_tag = [] tag_array = np.array(tag_list) if len(tag_array) < bucket_size: return [tag_array] slider = Slider(bucket_size, overlap_count) slider.fit(tag_array) while True: window_data = slider.slide() window_tag.append(window_data) if slider.reached_end_of_list(): return window_tag
def generate_words(array, window, shift): bucket_size = window overlap_count = window - shift slider = Slider(bucket_size, overlap_count) slider.fit(array) #change refactor words = {} t = 0 while True: window_data = slider.slide() if len(window_data) == window: # words[t] = ','.join([str(i) for i in window_data]) t += shift words[t] = str(window_data) if slider.reached_end_of_list(): break return words
def generate_average_amplitude(array, window, shift, bands, resolution): bucket_size = window overlap_count = window - shift slider = Slider(bucket_size, overlap_count) slider.fit(array) #change refactor map = {} symbolic = {} t = 0 while True: window_data = slider.slide() if len(window_data) == window: window_average = calcualate_average(window_data) map[t] = window_average symbolic[t] = get_quantized_number(window_average, bands, resolution) t += shift if slider.reached_end_of_list(): break return map, symbolic
import pyedflib import numpy as np from window_slider import Slider overlap_ratio = 0.5 window_data_array = [] fname = "C:/Users/user/Desktop/Motor_Imagery_using_EEG/files/S001/S001R" for j in range(1, 14): f = pyedflib.EdfReader(fname + f"{j}.edf") n = f.signals_in_file signal_labels = f.getSignalLabels() sigbufs = np.zeros((14, n, f.getNSamples()[0])) for i in np.arange(n): sigbufs[j, i, :] = f.readSignal(i) list = sigbufs[j] bucket_size = 5000 # length of sliding window overlap_count = bucket_size * overlap_ratio # overlap slider = Slider(bucket_size, overlap_count) slider.fit(list) while True: window_data = slider.slide() print(window_data[j]) if slider.reached_end_of_list(): window_data_array.append(window_data) break
windows = [] dnts = [] G_count = [] C_count = [] tot_count = [] #Accessing each position in the location file and seeking the position in our genome file for pos1 in pos: genome.seek(pos1) frame = genome.read(1000) res = list(frame) res_arr = np.array(res) #Using Window Slider to slide the window slider = Slider(size, overlap) slider.fit(res_arr) i = 0 #While the genome does not end/ reach the end the following tasks are performed while True: data = slider.slide() Window = str(pos1 + i) + "-" + str(pos1 + i + 1) windows.append(Window) dnt = ''.join(data) dnts.append(dnt) #Finding the G,C and toal GC for each window G_pat = 'G' C_pat = 'C' Gct = dnt.count(G_pat)
for i in range(0, 64): emptylist.append(sigbufs[rearrange[i] - 1]) # print(emptylist) list2.append(emptylist) list1.append(list2) #print(list1) window_data_array = [] list = [] overlap_ratio = 0.5 bucket_size = 5000 overlap_count = int(bucket_size * overlap_ratio) for x in range(1, 3): window_data_list1 = [] for y in range(1, 15): #list = list1[x][y] #print(list) slider = Slider(bucket_size, overlap_count) slider.fit(list1[x][y]) window_data_list = [] while True: window_data = slider.slide() window_data_array.append(window_data) if slider.reached_end_of_list(): break window_data_list1.append(window_data_list) window_data_array.append(window_data_list1) print(window_data_array)
def finding_motifs(start_pos, end_pos, gen_id, per_bin, gen_file, motif): #Reading the genome sequence file. complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'R': 'Y', 'Y': 'R'} genome = open(gen_file, 'r') R = ['A', 'G'] Y = ['C', 'T'] N = ['A', 'T', 'G', 'C'] List = [] reverse_List = [] #Using regular expression to check for the presence a character and replacing it with allowed nucleotides in its place and making a list of it if re.search('N', motif): for a in N: motif_replaced = motif.replace('N', a) List.append(motif_replaced) elif re.search('R', motif): for a in R: motif_replaced = motif.replace('R', a) List.append(motif_replaced) elif re.search('Y', motif): for a in Y: motif_replaced = motif.replace('Y', a) List.append(motif_replaced) else: List.append(motif) for x in List: motif_rev = "".join(complement.get(base, base) for base in reversed(x)) reverse_List.append(motif_rev) reverse = "".join(complement.get(base, base) for base in reversed(motif)) size = len(motif) overlap = size - 1 total_temp = 0 total_non_temp = 0 #Defining arrays windows = [] strings = [] PAM_count = [] PAM_rev_count = [] total_count = [] average_count = [] #Iterating through our genome file with the start position and end position for each gene and using bin size of that particular gene as the iterator value for start, end, bin_size, gene in zip(start_pos, end_pos, per_bin, gen_id): ct = 1 x = start #for x in range(start, end, bin_size): while x <= end: if ct == 1: genome.seek(x) frame = genome.read(bin_size + 2) else: x = x - 2 genome.seek(x) frame = genome.read(bin_size + 2) res = list(frame) res_arr = np.array(res) #Creating window slider of required size and overlap. slider = Slider(size, overlap) slider.fit(res_arr) i = 0 # template=0 # non_template=0 while True: data = slider.slide() Window = str(x + i) + "-" + str(x + i + (size - 1)) windows.append(Window) string = ''.join(data) strings.append(string) #Finding PAM motif and printing frequency. count = List.count(string) PAM_count.append(count) non_template = non_template + count count_rev = reverse_List.count(string) PAM_rev_count.append(count_rev) # template=template+count_rev i = i + 1 if slider.reached_end_of_list(): dict1 = { 'Windows': windows, 'PAM': strings, motif: PAM_count, reverse: PAM_rev_count } df = pd.DataFrame.from_dict(dict1) df = df.transpose() print(df) str1 = str(gene) + "_bin" + str(ct) ct = ct + 1 df.to_csv(str1) windows = [] strings = [] PAM_count = [] PAM_rev_count = [] dict1.clear() x = x + bin_size + 2 break # total_temp=total_temp+template # total_non_temp=total_non_temp+non_template count_list = [total_temp, total_non_temp] return (count_list)
#seperate data data=pd.DataFrame({ 'x': har[ :,0], 'y': har[ :,1], 'z': har[ :,2] }) data.head() #sliding window - 1 bucket_size with 0 overlap count from window_slider import Slider bucket_size = 1 overlap_count = 0 slider1 = Slider(bucket_size, overlap_count) slider2 = Slider(bucket_size, overlap_count) slider3 = Slider(bucket_size, overlap_count) slider1.fit(data['x'].values) slider2.fit(data['y'].values) slider3.fit(data['z'].values) i = 1 while True: acc1 = slider1.slide() acc2 = slider2.slide() acc3 = slider3.slide() arr = np.array([acc1, acc2, acc3]) mag = norm(arr) # write to csv-file. with open('Tot_BodyMag_Freq_51.14.csv', 'a', newline='' ) as f: writer = csv.writer( f ) if (i==1): writer.writerow(["blank","AccMag"])
#plt.title("Acc_x") #plt.show() #Sliding window - 128 bucket_size with 64 overlap count. from window_slider import Slider bucket_size = 1 overlap_count = 0 slider1 = Slider(bucket_size, overlap_count) slider2 = Slider(bucket_size, overlap_count) slider3 = Slider(bucket_size, overlap_count) slider4 = Slider(bucket_size, overlap_count) slider5 = Slider(bucket_size, overlap_count) slider6 = Slider(bucket_size, overlap_count) slider7 = Slider(bucket_size, overlap_count) slider1.fit(filtx) slider2.fit(filty) slider3.fit(filtz) slider4.fit(data['acc_x'].values) slider5.fit(data['acc_y'].values) slider6.fit(data['acc_z'].values) slider7.fit(data['time'].values) i = 1 while True: fx = slider1.slide() fy = slider2.slide() fz = slider3.slide() rx = slider4.slide() ry = slider5.slide()
har = list(csv.reader(file)) #first_row = np.array(har[0:1], dtype=np.string) har = np.array(har[1:], dtype=np.float) #seperate data data=pd.DataFrame({ 'mag': har[ :,0] }) data.head() #Sliding window - 128 bucket_size with 64 overlap count. from window_slider import Slider bucket_size = 128 overlap_count = 64 slider1 = Slider(bucket_size,overlap_count) slider1.fit(data['mag']) i = 1 while True: x = slider1.slide() #Calculate values to cvs-file meanx = st.mean(x) mad1x = pd.Series(x) madx = mad1x.mad() maxx = max(x) minx = min(x) stdx = st.stdev(x) iqx = iqr(x) #Calculate signal entropy sx = pd.Series(x)
with open('Tot_Gyro_Freq_51.14.csv', 'r') as file: har = list(csv.reader(file)) har = np.array(har[1:], dtype=np.float) #seperate data data = pd.DataFrame({'x': har[:, 0], 'y': har[:, 1], 'z': har[:, 2]}) data.head() #Sliding window - 128 bucket_size with 64 overlap count from window_slider import Slider bucket_size = 128 overlap_count = 64 slider1 = Slider(bucket_size, overlap_count) slider2 = Slider(bucket_size, overlap_count) slider3 = Slider(bucket_size, overlap_count) slider1.fit(data['x']) slider2.fit(data['y']) slider3.fit(data['z']) i = 1 while True: x = slider1.slide() y = slider2.slide() z = slider3.slide() #FFT fft_x = abs(np.fft.rfft(x)) fft_y = abs(np.fft.rfft(y)) fft_z = abs(np.fft.rfft(z)) fft_x_freq = np.fft.rfftfreq(x.size, d=1. / sample_rate) fft_y_freq = np.fft.rfftfreq(y.size, d=1. / sample_rate)
print("feature imp", feature_imp) sns.barplot(x=feature_imp, y=feature_imp.index) # Add labels to your graph plt.xlabel('Feature Importance Score') plt.ylabel('Features') plt.title("Visualizing Important Features") plt.legend() plt.show() #write feature importance to file from window_slider import Slider bucket_size = 50 overlap_count = 0 slider1 = Slider(bucket_size, overlap_count) slider1.fit(feature_imp) while True: x = slider1.slide() print(x) if slider1.reached_end_of_list(): break #create a confusion matrix from sklearn.metrics import confusion_matrix conf_mat = confusion_matrix(y_test, y_pred) print(conf_mat) #Import scikit-learn metrics module for accuracy calculation from sklearn import metrics # Model Accuracy, how often is the classifier correct?
'time': har[:, 6] }) data.head() from window_slider import Slider bucket_size = 1 overlap_count = 0 slider1 = Slider(bucket_size, overlap_count) slider2 = Slider(bucket_size, overlap_count) slider3 = Slider(bucket_size, overlap_count) slider4 = Slider(bucket_size, overlap_count) slider5 = Slider(bucket_size, overlap_count) slider6 = Slider(bucket_size, overlap_count) slider7 = Slider(bucket_size, overlap_count) slider1.fit(data['acc_x'].values) slider2.fit(data['acc_y'].values) slider3.fit(data['acc_z'].values) slider4.fit(data['grav_x'].values) slider5.fit(data['grav_y'].values) slider6.fit(data['grav_z'].values) slider7.fit(data['time'].values) i = 1 while True: gx = slider1.slide() gy = slider2.slide() gz = slider3.slide() ax = slider4.slide() ay = slider5.slide() az = slider6.slide()
def finding_motifs(gene_positions, gen_file, motif): #Reading the genome sequence file. complement = {'A' : 'T', 'C' : 'G', 'G' : 'C', 'T' : 'A', 'R':'Y', 'Y':'R'} genome=open(gen_file, 'r') R=['A','G'] Y=['C','T'] N=['A','T','G','C'] List=[] reverse_List=[] #Using regular expression to check for the presence a character and replacing it with allowed nucleotides in its place and making a list of it if re.search('N',motif): for a in N: motif_replaced=motif.replace('N', a) List.append(motif_replaced) elif re.search('R', motif): for a in R: motif_replaced=motif.replace('R', a) List.append(motif_replaced) elif re.search('Y', motif): for a in Y: motif_replaced=motif.replace('Y', a) List.append(motif_replaced) else: List.append(motif) print(List) for x in List: motif_rev="".join(complement.get(base, base) for base in reversed(x)) reverse_List.append(motif_rev) print(reverse_List) reverse="".join(complement.get(base, base) for base in reversed(motif)) size=len(motif) overlap=size-1 total_temp=0 total_non_temp=0 #Defining arrays windows=[] strings=[] PAM_count=[] PAM_rev_count=[] total_count=[] average_count=[] #Finding the start positions, creating 1000 bp frame, and storing it as an array. final=0 for pos in gene_positions: genome.seek(pos-1) frame=genome.read(1000) res=list(frame) res_arr=np.array(res) #Creating window slider of required size and overlap. slider=Slider(size, overlap) slider.fit(res_arr) i=0 template=0 non_template=0 dict={} while True: data=slider.slide() Window=str(pos+i)+"-"+str(pos+i+(size-1)) windows.append(Window) string=''.join(data) strings.append(string) #Finding PAM motif and printing frequency. count=List.count(string) PAM_count.append(count) non_template=non_template+count count_rev=reverse_List.count(string) PAM_rev_count.append(count_rev) template=template+count_rev i=i+1 if slider.reached_end_of_list(): dict1= {'Windows':windows,'PAM':strings, motif : PAM_count, reverse : PAM_rev_count} df=pd.DataFrame.from_dict(dict1) df=df.transpose() print(df) str1="gene"+str(pos) df.to_csv(str1) windows=[] strings=[] PAM_count=[] PAM_rev_count=[] dict1.clear() break dict1.clear() total_temp=total_temp+template total_non_temp=total_non_temp+non_template count_list=[total_temp, total_non_temp] return(count_list)