Exemplo n.º 1
0
def generate_words(array, window, shift, file_id, sensor_id, output_file,
                   file_handle):
    file = file_handle
    bucket_size = window
    overlap_count = window - shift
    slider = Slider(bucket_size, overlap_count)
    slider.fit(array)

    sensor_word_tf_map = {}
    #change refactor
    t = 0
    while True:
        window_data = slider.slide()
        if len(window_data) == window:
            file_output_line = str([file_id, sensor_id, t]) + '|' + str(
                list(window_data)) + '\n'
            file.write(file_output_line)
            t += shift
            word = str(list(window_data)).split('[')[1]
            word = word.split(']')[0]
            if word in sensor_word_tf_map:
                sensor_word_tf_map[word] += 1
            else:
                sensor_word_tf_map[word] = 1
        if slider.reached_end_of_list(): break
    return sensor_word_tf_map
Exemplo n.º 2
0
def tag_slider(tag_list, bucket_size=10, overlap_count=9):
    window_tag = []
    tag_array = np.array(tag_list)
    if len(tag_array) < bucket_size:
        return [tag_array]
    slider = Slider(bucket_size, overlap_count)
    slider.fit(tag_array)
    while True:
        window_data = slider.slide()
        window_tag.append(window_data)
        if slider.reached_end_of_list(): return window_tag
Exemplo n.º 3
0
def generate_words(array, window, shift):
    bucket_size = window
    overlap_count = window - shift
    slider = Slider(bucket_size, overlap_count)
    slider.fit(array)

    #change refactor
    words = {}
    t = 0
    while True:
        window_data = slider.slide()
        if len(window_data) == window:
            # words[t] = ','.join([str(i) for i in window_data])
            t += shift
            words[t] = str(window_data)

        if slider.reached_end_of_list(): break
    return words
Exemplo n.º 4
0
def generate_average_amplitude(array, window, shift, bands, resolution):
    bucket_size = window
    overlap_count = window - shift
    slider = Slider(bucket_size, overlap_count)
    slider.fit(array)

    #change refactor
    map = {}
    symbolic = {}
    t = 0
    while True:
        window_data = slider.slide()
        if len(window_data) == window:
            window_average = calcualate_average(window_data)
            map[t] = window_average
            symbolic[t] = get_quantized_number(window_average, bands,
                                               resolution)
            t += shift

        if slider.reached_end_of_list(): break
    return map, symbolic
Exemplo n.º 5
0
import pyedflib
import numpy as np
from window_slider import Slider

overlap_ratio = 0.5

window_data_array = []
fname = "C:/Users/user/Desktop/Motor_Imagery_using_EEG/files/S001/S001R"
for j in range(1, 14):
    f = pyedflib.EdfReader(fname + f"{j}.edf")
    n = f.signals_in_file
    signal_labels = f.getSignalLabels()

    sigbufs = np.zeros((14, n, f.getNSamples()[0]))
    for i in np.arange(n):
        sigbufs[j, i, :] = f.readSignal(i)

    list = sigbufs[j]
    bucket_size = 5000  # length of sliding window
    overlap_count = bucket_size * overlap_ratio  # overlap
    slider = Slider(bucket_size, overlap_count)
    slider.fit(list)
    while True:
        window_data = slider.slide()
        print(window_data[j])
        if slider.reached_end_of_list():
            window_data_array.append(window_data)
            break
Exemplo n.º 6
0
def finding_motifs(start_pos, end_pos, gen_id, per_bin, gen_file, motif):
    #Reading the genome sequence file.
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'R': 'Y', 'Y': 'R'}
    genome = open(gen_file, 'r')

    R = ['A', 'G']
    Y = ['C', 'T']
    N = ['A', 'T', 'G', 'C']
    List = []
    reverse_List = []

    #Using regular expression to check for the presence a character and replacing it with allowed nucleotides in its place and making a list of it
    if re.search('N', motif):
        for a in N:
            motif_replaced = motif.replace('N', a)
            List.append(motif_replaced)
    elif re.search('R', motif):
        for a in R:
            motif_replaced = motif.replace('R', a)
            List.append(motif_replaced)
    elif re.search('Y', motif):
        for a in Y:
            motif_replaced = motif.replace('Y', a)
            List.append(motif_replaced)
    else:
        List.append(motif)

    for x in List:
        motif_rev = "".join(complement.get(base, base) for base in reversed(x))
        reverse_List.append(motif_rev)
    reverse = "".join(complement.get(base, base) for base in reversed(motif))

    size = len(motif)
    overlap = size - 1
    total_temp = 0
    total_non_temp = 0

    #Defining arrays
    windows = []
    strings = []
    PAM_count = []
    PAM_rev_count = []
    total_count = []
    average_count = []
    #Iterating through our genome file with the start position and end position for each gene and using bin size of that particular gene as the iterator value
    for start, end, bin_size, gene in zip(start_pos, end_pos, per_bin, gen_id):
        ct = 1
        x = start
        #for x in range(start, end, bin_size):
        while x <= end:
            if ct == 1:
                genome.seek(x)
                frame = genome.read(bin_size + 2)
            else:
                x = x - 2
                genome.seek(x)
                frame = genome.read(bin_size + 2)

            res = list(frame)
            res_arr = np.array(res)

            #Creating window slider of required size and overlap.
            slider = Slider(size, overlap)
            slider.fit(res_arr)
            i = 0

            #        template=0
            #       non_template=0

            while True:
                data = slider.slide()
                Window = str(x + i) + "-" + str(x + i + (size - 1))
                windows.append(Window)
                string = ''.join(data)
                strings.append(string)

                #Finding PAM motif and printing frequency.
                count = List.count(string)
                PAM_count.append(count)
                non_template = non_template + count

                count_rev = reverse_List.count(string)
                PAM_rev_count.append(count_rev)
                #             template=template+count_rev
                i = i + 1

                if slider.reached_end_of_list():
                    dict1 = {
                        'Windows': windows,
                        'PAM': strings,
                        motif: PAM_count,
                        reverse: PAM_rev_count
                    }
                    df = pd.DataFrame.from_dict(dict1)
                    df = df.transpose()
                    print(df)
                    str1 = str(gene) + "_bin" + str(ct)
                    ct = ct + 1
                    df.to_csv(str1)
                    windows = []
                    strings = []
                    PAM_count = []
                    PAM_rev_count = []
                    dict1.clear()
                    x = x + bin_size + 2
                    break

#          total_temp=total_temp+template
#          total_non_temp=total_non_temp+non_template

    count_list = [total_temp, total_non_temp]
    return (count_list)
Exemplo n.º 7
0
})
data.head()

#sliding window - 1 bucket_size with 0 overlap count
from window_slider import Slider
bucket_size = 1
overlap_count = 0
slider1 = Slider(bucket_size, overlap_count)
slider2 = Slider(bucket_size, overlap_count)
slider3 = Slider(bucket_size, overlap_count)
slider1.fit(data['x'].values)
slider2.fit(data['y'].values)
slider3.fit(data['z'].values)
i = 1
while True:
  acc1 = slider1.slide()
  acc2 = slider2.slide()
  acc3 = slider3.slide()
  arr = np.array([acc1, acc2, acc3])
  mag = norm(arr)

  # write to csv-file.
  with open('Tot_BodyMag_Freq_51.14.csv', 'a', newline='' ) as f:
     writer = csv.writer( f )
     if (i==1):
        writer.writerow(["blank","AccMag"])
        i = 2
     writer.writerow(["%f\r\n" % (i),(mag)])
  if slider2.reached_end_of_list(): break

Exemplo n.º 8
0
def finding_motifs(gene_positions, gen_file, motif):
    #Reading the genome sequence file.
    complement = {'A' : 'T', 'C' : 'G', 'G' : 'C', 'T' : 'A', 'R':'Y', 'Y':'R'}
    genome=open(gen_file, 'r')
    
    
    R=['A','G']
    Y=['C','T']
    N=['A','T','G','C']
    List=[]
    reverse_List=[]
    
    #Using regular expression to check for the presence a character and replacing it with allowed nucleotides in its place and making a list of it
    if re.search('N',motif):
        for a in N:
            motif_replaced=motif.replace('N', a)
            List.append(motif_replaced)
    elif re.search('R', motif):
        for a in R:
            motif_replaced=motif.replace('R', a)
            List.append(motif_replaced)
    elif re.search('Y', motif):
        for a in Y:
            motif_replaced=motif.replace('Y', a)
            List.append(motif_replaced)
    else:
        List.append(motif)
    print(List)

    for x in List:
        motif_rev="".join(complement.get(base, base) for base in reversed(x))
        reverse_List.append(motif_rev)
    print(reverse_List)
    reverse="".join(complement.get(base, base) for base in reversed(motif))

    size=len(motif)
    overlap=size-1
    total_temp=0
    total_non_temp=0

    #Defining arrays
    windows=[]
    strings=[]
    PAM_count=[]
    PAM_rev_count=[]
    total_count=[]
    average_count=[]

    #Finding the start positions, creating 1000 bp frame, and storing it as an array.
    final=0
    for pos in gene_positions:
        genome.seek(pos-1)
        frame=genome.read(1000)
        res=list(frame)
        res_arr=np.array(res)

    #Creating window slider of required size and overlap.
        slider=Slider(size, overlap)
        slider.fit(res_arr)
        i=0

        template=0
        non_template=0
        dict={}

        while True:
            data=slider.slide()
            Window=str(pos+i)+"-"+str(pos+i+(size-1))
            windows.append(Window)
            string=''.join(data)
            strings.append(string)

    #Finding PAM motif and printing frequency.
            count=List.count(string)
            PAM_count.append(count)
            non_template=non_template+count
            
            count_rev=reverse_List.count(string)
            PAM_rev_count.append(count_rev)
            template=template+count_rev
            i=i+1

            if slider.reached_end_of_list():
                dict1= {'Windows':windows,'PAM':strings, motif : PAM_count, reverse : PAM_rev_count}
                df=pd.DataFrame.from_dict(dict1)
                df=df.transpose()
                print(df)
                str1="gene"+str(pos)
                df.to_csv(str1)
                windows=[]
                strings=[]
                PAM_count=[]
                PAM_rev_count=[]
                dict1.clear()
                break
        dict1.clear()
        total_temp=total_temp+template
        total_non_temp=total_non_temp+non_template
    
    count_list=[total_temp, total_non_temp]
    return(count_list)