예제 #1
0
파일: phase1.py 프로젝트: xavikiran/cse515
def generate_words(array, window, shift, file_id, sensor_id, output_file,
                   file_handle):
    file = file_handle
    bucket_size = window
    overlap_count = window - shift
    slider = Slider(bucket_size, overlap_count)
    slider.fit(array)

    sensor_word_tf_map = {}
    #change refactor
    t = 0
    while True:
        window_data = slider.slide()
        if len(window_data) == window:
            file_output_line = str([file_id, sensor_id, t]) + '|' + str(
                list(window_data)) + '\n'
            file.write(file_output_line)
            t += shift
            word = str(list(window_data)).split('[')[1]
            word = word.split(']')[0]
            if word in sensor_word_tf_map:
                sensor_word_tf_map[word] += 1
            else:
                sensor_word_tf_map[word] = 1
        if slider.reached_end_of_list(): break
    return sensor_word_tf_map
예제 #2
0
def tag_slider(tag_list, bucket_size=10, overlap_count=9):
    window_tag = []
    tag_array = np.array(tag_list)
    if len(tag_array) < bucket_size:
        return [tag_array]
    slider = Slider(bucket_size, overlap_count)
    slider.fit(tag_array)
    while True:
        window_data = slider.slide()
        window_tag.append(window_data)
        if slider.reached_end_of_list(): return window_tag
예제 #3
0
def generate_words(array, window, shift):
    bucket_size = window
    overlap_count = window - shift
    slider = Slider(bucket_size, overlap_count)
    slider.fit(array)

    #change refactor
    words = {}
    t = 0
    while True:
        window_data = slider.slide()
        if len(window_data) == window:
            # words[t] = ','.join([str(i) for i in window_data])
            t += shift
            words[t] = str(window_data)

        if slider.reached_end_of_list(): break
    return words
예제 #4
0
def generate_average_amplitude(array, window, shift, bands, resolution):
    bucket_size = window
    overlap_count = window - shift
    slider = Slider(bucket_size, overlap_count)
    slider.fit(array)

    #change refactor
    map = {}
    symbolic = {}
    t = 0
    while True:
        window_data = slider.slide()
        if len(window_data) == window:
            window_average = calcualate_average(window_data)
            map[t] = window_average
            symbolic[t] = get_quantized_number(window_average, bands,
                                               resolution)
            t += shift

        if slider.reached_end_of_list(): break
    return map, symbolic
예제 #5
0
import pyedflib
import numpy as np
from window_slider import Slider

overlap_ratio = 0.5

window_data_array = []
fname = "C:/Users/user/Desktop/Motor_Imagery_using_EEG/files/S001/S001R"
for j in range(1, 14):
    f = pyedflib.EdfReader(fname + f"{j}.edf")
    n = f.signals_in_file
    signal_labels = f.getSignalLabels()

    sigbufs = np.zeros((14, n, f.getNSamples()[0]))
    for i in np.arange(n):
        sigbufs[j, i, :] = f.readSignal(i)

    list = sigbufs[j]
    bucket_size = 5000  # length of sliding window
    overlap_count = bucket_size * overlap_ratio  # overlap
    slider = Slider(bucket_size, overlap_count)
    slider.fit(list)
    while True:
        window_data = slider.slide()
        print(window_data[j])
        if slider.reached_end_of_list():
            window_data_array.append(window_data)
            break
예제 #6
0
windows = []
dnts = []
G_count = []
C_count = []
tot_count = []

#Accessing each position in the location file and seeking the position in our genome file
for pos1 in pos:
    genome.seek(pos1)
    frame = genome.read(1000)
    res = list(frame)
    res_arr = np.array(res)

    #Using Window Slider to slide the window
    slider = Slider(size, overlap)
    slider.fit(res_arr)
    i = 0

    #While the genome does not end/ reach the end the following tasks are performed
    while True:
        data = slider.slide()
        Window = str(pos1 + i) + "-" + str(pos1 + i + 1)
        windows.append(Window)
        dnt = ''.join(data)
        dnts.append(dnt)

        #Finding the G,C and toal GC for each window
        G_pat = 'G'
        C_pat = 'C'

        Gct = dnt.count(G_pat)
예제 #7
0
        for i in range(0, 64):
            emptylist.append(sigbufs[rearrange[i] - 1])
            # print(emptylist)
        list2.append(emptylist)
    list1.append(list2)

#print(list1)

window_data_array = []
list = []
overlap_ratio = 0.5
bucket_size = 5000
overlap_count = int(bucket_size * overlap_ratio)

for x in range(1, 3):
    window_data_list1 = []
    for y in range(1, 15):
        #list = list1[x][y]
        #print(list)
        slider = Slider(bucket_size, overlap_count)
        slider.fit(list1[x][y])
        window_data_list = []
        while True:
            window_data = slider.slide()
            window_data_array.append(window_data)
            if slider.reached_end_of_list():
                break
        window_data_list1.append(window_data_list)
    window_data_array.append(window_data_list1)

print(window_data_array)
예제 #8
0
def finding_motifs(start_pos, end_pos, gen_id, per_bin, gen_file, motif):
    #Reading the genome sequence file.
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'R': 'Y', 'Y': 'R'}
    genome = open(gen_file, 'r')

    R = ['A', 'G']
    Y = ['C', 'T']
    N = ['A', 'T', 'G', 'C']
    List = []
    reverse_List = []

    #Using regular expression to check for the presence a character and replacing it with allowed nucleotides in its place and making a list of it
    if re.search('N', motif):
        for a in N:
            motif_replaced = motif.replace('N', a)
            List.append(motif_replaced)
    elif re.search('R', motif):
        for a in R:
            motif_replaced = motif.replace('R', a)
            List.append(motif_replaced)
    elif re.search('Y', motif):
        for a in Y:
            motif_replaced = motif.replace('Y', a)
            List.append(motif_replaced)
    else:
        List.append(motif)

    for x in List:
        motif_rev = "".join(complement.get(base, base) for base in reversed(x))
        reverse_List.append(motif_rev)
    reverse = "".join(complement.get(base, base) for base in reversed(motif))

    size = len(motif)
    overlap = size - 1
    total_temp = 0
    total_non_temp = 0

    #Defining arrays
    windows = []
    strings = []
    PAM_count = []
    PAM_rev_count = []
    total_count = []
    average_count = []
    #Iterating through our genome file with the start position and end position for each gene and using bin size of that particular gene as the iterator value
    for start, end, bin_size, gene in zip(start_pos, end_pos, per_bin, gen_id):
        ct = 1
        x = start
        #for x in range(start, end, bin_size):
        while x <= end:
            if ct == 1:
                genome.seek(x)
                frame = genome.read(bin_size + 2)
            else:
                x = x - 2
                genome.seek(x)
                frame = genome.read(bin_size + 2)

            res = list(frame)
            res_arr = np.array(res)

            #Creating window slider of required size and overlap.
            slider = Slider(size, overlap)
            slider.fit(res_arr)
            i = 0

            #        template=0
            #       non_template=0

            while True:
                data = slider.slide()
                Window = str(x + i) + "-" + str(x + i + (size - 1))
                windows.append(Window)
                string = ''.join(data)
                strings.append(string)

                #Finding PAM motif and printing frequency.
                count = List.count(string)
                PAM_count.append(count)
                non_template = non_template + count

                count_rev = reverse_List.count(string)
                PAM_rev_count.append(count_rev)
                #             template=template+count_rev
                i = i + 1

                if slider.reached_end_of_list():
                    dict1 = {
                        'Windows': windows,
                        'PAM': strings,
                        motif: PAM_count,
                        reverse: PAM_rev_count
                    }
                    df = pd.DataFrame.from_dict(dict1)
                    df = df.transpose()
                    print(df)
                    str1 = str(gene) + "_bin" + str(ct)
                    ct = ct + 1
                    df.to_csv(str1)
                    windows = []
                    strings = []
                    PAM_count = []
                    PAM_rev_count = []
                    dict1.clear()
                    x = x + bin_size + 2
                    break

#          total_temp=total_temp+template
#          total_non_temp=total_non_temp+non_template

    count_list = [total_temp, total_non_temp]
    return (count_list)
예제 #9
0
#seperate data
data=pd.DataFrame({
    'x': har[ :,0],
    'y': har[ :,1],
    'z': har[ :,2]
})
data.head()

#sliding window - 1 bucket_size with 0 overlap count
from window_slider import Slider
bucket_size = 1
overlap_count = 0
slider1 = Slider(bucket_size, overlap_count)
slider2 = Slider(bucket_size, overlap_count)
slider3 = Slider(bucket_size, overlap_count)
slider1.fit(data['x'].values)
slider2.fit(data['y'].values)
slider3.fit(data['z'].values)
i = 1
while True:
  acc1 = slider1.slide()
  acc2 = slider2.slide()
  acc3 = slider3.slide()
  arr = np.array([acc1, acc2, acc3])
  mag = norm(arr)

  # write to csv-file.
  with open('Tot_BodyMag_Freq_51.14.csv', 'a', newline='' ) as f:
     writer = csv.writer( f )
     if (i==1):
        writer.writerow(["blank","AccMag"])
#plt.title("Acc_x")
#plt.show()

#Sliding window - 128 bucket_size with 64 overlap count.
from window_slider import Slider
bucket_size = 1
overlap_count = 0
slider1 = Slider(bucket_size, overlap_count)
slider2 = Slider(bucket_size, overlap_count)
slider3 = Slider(bucket_size, overlap_count)
slider4 = Slider(bucket_size, overlap_count)
slider5 = Slider(bucket_size, overlap_count)
slider6 = Slider(bucket_size, overlap_count)
slider7 = Slider(bucket_size, overlap_count)

slider1.fit(filtx)
slider2.fit(filty)
slider3.fit(filtz)
slider4.fit(data['acc_x'].values)
slider5.fit(data['acc_y'].values)
slider6.fit(data['acc_z'].values)
slider7.fit(data['time'].values)

i = 1

while True:
    fx = slider1.slide()
    fy = slider2.slide()
    fz = slider3.slide()
    rx = slider4.slide()
    ry = slider5.slide()
 har = list(csv.reader(file))
 #first_row = np.array(har[0:1], dtype=np.string)
 har = np.array(har[1:], dtype=np.float)

#seperate data
data=pd.DataFrame({
    'mag': har[ :,0]
})
data.head()

#Sliding window - 128 bucket_size with 64 overlap count.
from window_slider import Slider
bucket_size = 128
overlap_count = 64
slider1 = Slider(bucket_size,overlap_count)
slider1.fit(data['mag'])
i = 1
while True:
    x = slider1.slide()

    #Calculate values to cvs-file
    meanx = st.mean(x)
    mad1x = pd.Series(x)
    madx = mad1x.mad()
    maxx = max(x)
    minx = min(x)
    stdx = st.stdev(x)
    iqx = iqr(x)

    #Calculate signal entropy
    sx = pd.Series(x)
예제 #12
0
with open('Tot_Gyro_Freq_51.14.csv', 'r') as file:
    har = list(csv.reader(file))
    har = np.array(har[1:], dtype=np.float)

#seperate data
data = pd.DataFrame({'x': har[:, 0], 'y': har[:, 1], 'z': har[:, 2]})
data.head()

#Sliding window - 128 bucket_size with 64 overlap count
from window_slider import Slider
bucket_size = 128
overlap_count = 64
slider1 = Slider(bucket_size, overlap_count)
slider2 = Slider(bucket_size, overlap_count)
slider3 = Slider(bucket_size, overlap_count)
slider1.fit(data['x'])
slider2.fit(data['y'])
slider3.fit(data['z'])
i = 1
while True:
    x = slider1.slide()
    y = slider2.slide()
    z = slider3.slide()

    #FFT
    fft_x = abs(np.fft.rfft(x))
    fft_y = abs(np.fft.rfft(y))
    fft_z = abs(np.fft.rfft(z))

    fft_x_freq = np.fft.rfftfreq(x.size, d=1. / sample_rate)
    fft_y_freq = np.fft.rfftfreq(y.size, d=1. / sample_rate)
print("feature imp", feature_imp)
sns.barplot(x=feature_imp, y=feature_imp.index)
# Add labels to your graph
plt.xlabel('Feature Importance Score')
plt.ylabel('Features')
plt.title("Visualizing Important Features")
plt.legend()
plt.show()

#write feature importance to file

from window_slider import Slider
bucket_size = 50
overlap_count = 0
slider1 = Slider(bucket_size, overlap_count)
slider1.fit(feature_imp)

while True:
    x = slider1.slide()
    print(x)
    if slider1.reached_end_of_list(): break

#create a confusion matrix
from sklearn.metrics import confusion_matrix
conf_mat = confusion_matrix(y_test, y_pred)
print(conf_mat)

#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

# Model Accuracy, how often is the classifier correct?
    'time': har[:, 6]
})
data.head()

from window_slider import Slider
bucket_size = 1
overlap_count = 0
slider1 = Slider(bucket_size, overlap_count)
slider2 = Slider(bucket_size, overlap_count)
slider3 = Slider(bucket_size, overlap_count)
slider4 = Slider(bucket_size, overlap_count)
slider5 = Slider(bucket_size, overlap_count)
slider6 = Slider(bucket_size, overlap_count)
slider7 = Slider(bucket_size, overlap_count)

slider1.fit(data['acc_x'].values)
slider2.fit(data['acc_y'].values)
slider3.fit(data['acc_z'].values)
slider4.fit(data['grav_x'].values)
slider5.fit(data['grav_y'].values)
slider6.fit(data['grav_z'].values)
slider7.fit(data['time'].values)
i = 1

while True:
    gx = slider1.slide()
    gy = slider2.slide()
    gz = slider3.slide()
    ax = slider4.slide()
    ay = slider5.slide()
    az = slider6.slide()
예제 #15
0
def finding_motifs(gene_positions, gen_file, motif):
    #Reading the genome sequence file.
    complement = {'A' : 'T', 'C' : 'G', 'G' : 'C', 'T' : 'A', 'R':'Y', 'Y':'R'}
    genome=open(gen_file, 'r')
    
    
    R=['A','G']
    Y=['C','T']
    N=['A','T','G','C']
    List=[]
    reverse_List=[]
    
    #Using regular expression to check for the presence a character and replacing it with allowed nucleotides in its place and making a list of it
    if re.search('N',motif):
        for a in N:
            motif_replaced=motif.replace('N', a)
            List.append(motif_replaced)
    elif re.search('R', motif):
        for a in R:
            motif_replaced=motif.replace('R', a)
            List.append(motif_replaced)
    elif re.search('Y', motif):
        for a in Y:
            motif_replaced=motif.replace('Y', a)
            List.append(motif_replaced)
    else:
        List.append(motif)
    print(List)

    for x in List:
        motif_rev="".join(complement.get(base, base) for base in reversed(x))
        reverse_List.append(motif_rev)
    print(reverse_List)
    reverse="".join(complement.get(base, base) for base in reversed(motif))

    size=len(motif)
    overlap=size-1
    total_temp=0
    total_non_temp=0

    #Defining arrays
    windows=[]
    strings=[]
    PAM_count=[]
    PAM_rev_count=[]
    total_count=[]
    average_count=[]

    #Finding the start positions, creating 1000 bp frame, and storing it as an array.
    final=0
    for pos in gene_positions:
        genome.seek(pos-1)
        frame=genome.read(1000)
        res=list(frame)
        res_arr=np.array(res)

    #Creating window slider of required size and overlap.
        slider=Slider(size, overlap)
        slider.fit(res_arr)
        i=0

        template=0
        non_template=0
        dict={}

        while True:
            data=slider.slide()
            Window=str(pos+i)+"-"+str(pos+i+(size-1))
            windows.append(Window)
            string=''.join(data)
            strings.append(string)

    #Finding PAM motif and printing frequency.
            count=List.count(string)
            PAM_count.append(count)
            non_template=non_template+count
            
            count_rev=reverse_List.count(string)
            PAM_rev_count.append(count_rev)
            template=template+count_rev
            i=i+1

            if slider.reached_end_of_list():
                dict1= {'Windows':windows,'PAM':strings, motif : PAM_count, reverse : PAM_rev_count}
                df=pd.DataFrame.from_dict(dict1)
                df=df.transpose()
                print(df)
                str1="gene"+str(pos)
                df.to_csv(str1)
                windows=[]
                strings=[]
                PAM_count=[]
                PAM_rev_count=[]
                dict1.clear()
                break
        dict1.clear()
        total_temp=total_temp+template
        total_non_temp=total_non_temp+non_template
    
    count_list=[total_temp, total_non_temp]
    return(count_list)