Exemplo n.º 1
0
def download_chapters(app):
    downlod_cover(app)

    bar = IncrementalBar('Downloading chapters', max=len(app.chapters))
    bar.start()

    if os.getenv('debug_mode') == 'yes':
        bar.next = lambda: None  # Hide in debug mode
    # end if

    futures_to_check = {
        app.crawler.executor.submit(
            download_chapter_body,
            app,
            chapter,
        ): str(chapter['id'])
        for chapter in app.chapters
    }

    app.progress = 0
    for future in futures.as_completed(futures_to_check):
        result = future.result()
        if result:
            bar.clearln()
            logger.error(result)
        # end if
        app.progress += 1
        bar.next()
    # end for

    bar.finish()
    print('Downloaded %d chapters' % len(app.chapters))
Exemplo n.º 2
0
def download_chapters(app):
    download_cover(app)

    bar = IncrementalBar('Downloading chapters', max=len(app.chapters))
    bar.start()
    if os.getenv('debug_mode') == 'true':
        bar.next = lambda: None

    futures_to_check = {
        app.crawler.executor.submit(
            download_chapter_body,
            app,
            chapter,
        ): str(chapter['id'])
        for chapter in app.chapters
    }
    for future in futures.as_completed(futures_to_check):
        result = future.result()
        if result:
            bar.clearln()
            app.logger.error(result)

        bar.next()

    bar.finish()
Exemplo n.º 3
0
def search_novels(app):
    if not app.crawler_links:
        return

    bar = IncrementalBar('Searching', max=len(app.crawler_links))
    if os.getenv('debug_mode') == 'yes':
        bar.next = lambda n=1: None  # Hide in debug mode
    else:
        bar.start()
    # end if

    # Add future tasks
    checked = {}
    futures_to_check = []
    app.progress = 0
    for link in app.crawler_links:
        crawler = crawler_list[link]
        if crawler in checked:
            logger.info('A crawler for "%s" already exists', link)
            bar.next()
            continue
        # end if
        checked[crawler] = True
        future = executor.submit(get_search_result, app, link, bar)
        futures_to_check.append(future)
    # end for

    # Resolve all futures
    combined_results = [item for f in futures_to_check for item in f.result()]

    # Process combined search results
    app.search_results = process_results(combined_results)
    bar.clearln()
    bar.finish()
    print('Found %d results' % len(app.search_results))
Exemplo n.º 4
0
def download_chapters(app):
    app.progress = 0
    bar = IncrementalBar('Downloading chapters', max=len(app.chapters))
    if os.getenv('debug_mode') == 'yes':
        bar.next = lambda: None  # Hide in debug mode
        bar.finish()
    else:
        bar.start()
    # end if

    if not app.output_formats:
        app.output_formats = {}
    # end if

    futures_to_check = [
        app.crawler.executor.submit(
            download_chapter_body,
            app,
            chapter,
        ) for chapter in app.chapters
    ]

    for future in futures_to_check:
        result = future.result()
        if result:
            bar.clearln()
            logger.error(result)
        # end if
        bar.next()
    # end for

    bar.finish()
    print('Processed %d chapters' % len(app.chapters))
Exemplo n.º 5
0
def alignChapter(lang, bookid, chapter):
    """
    Align a chapter of a book

    Args:
        lang (str): language
        bookid (str): identifier of a book
        chapter (int): the chapter to be aligned
    
    Returns:
        list of spacy tokens: the tokens with the added audio alignment information
    """
    bar = IncrementalBar('Processing %s [%s] (%s)' % (bookid, lang, chapter) , max=100)
    bar.start()
    outfile = os.path.join(book_manager.chaptersPath(lang, bookid),book_manager.mappingFile(chapter))
    audio_file, start_time, stop_time = book_manager.chapterAudio(lang, bookid, chapter)
    wavfile = os.path.join(config.TEMP_DIR, 'chapter%s.wav' % chapter)
    gu.removeFile(wavfile)
    encodeForSphinx(audio_file, start_time, stop_time, wavfile) # encode audio for speech recognition
    # get spacy models for language processing
    sp = utils.getSpacy(lang)
    text = book_manager.bookChapter(lang, bookid, chapter)
    doc = sp(text)    
    # prepare sentences without punctuation
    token_count = 0    
    doc_tokens = [tkn for tkn in doc if tkn.is_alpha and (not tkn.is_punct) and tkn.text.strip()]
    token_count = len(doc_tokens)
    audio_segment = AudioSegment.from_wav(wavfile) # read the audio
    audio_len = len(audio_segment)
    begin_tkn = 0
    begin_audio = 0
    startm = time2msec(start_time)
    stopm = time2msec(stop_time)
    l = stopm - startm
    
    while begin_tkn < token_count:
        chunk = doc_tokens[begin_tkn:begin_tkn+50]
        rel_len = 1.25 * len(chunk) / token_count
        end_audio = begin_audio + int(rel_len * audio_len)
        last_idx, begin_audio = alignChunk(lang, audio_segment=audio_segment, audio_begin=begin_audio, audio_end=end_audio, chunk=chunk)        
        bar.goto(int(100.0 * begin_audio / l))
        if last_idx == -1: # could not map anything
            break
        else:
            begin_tkn += last_idx + 1
    gu.removeFile(wavfile)
    saveAudioMapping(doc_tokens, startm, stopm, outfile)
Exemplo n.º 6
0
def search_novels(app):
    executor = futures.ThreadPoolExecutor(10)

    # Add future tasks
    checked = {}
    futures_to_check = {}
    for link in app.crawler_links:
        crawler = crawler_list[link]
        if crawler in checked:
            logger.info('A crawler for "%s" already exists', link)
            continue
        # end if
        checked[crawler] = True
        futures_to_check[
            executor.submit(
                get_search_result,
                app.user_input,
                link
            )
        ] = str(crawler)
    # end for

    bar = IncrementalBar('Searching', max=len(futures_to_check.keys()))
    bar.start()

    if os.getenv('debug_mode') == 'yes':
        bar.next = lambda: None  # Hide in debug mode
    # end if

    # Resolve future tasks
    app.progress = 0
    combined_results = []
    for future in futures.as_completed(futures_to_check):
        combined_results += future.result()
        app.progress += 1
        bar.next()
    # end for

    # Process combined search results
    app.search_results = process_results(combined_results)
    bar.clearln()
    bar.finish()
    print('Found %d results' % len(app.search_results))

    executor.shutdown()
Exemplo n.º 7
0
def download_chapters(app):
    # download or generate cover
    app.book_cover = download_cover(app)
    if not app.book_cover:
        app.book_cover = generate_cover(app)
    # end if
    if not app.book_cover:
        logger.warn('No cover image')
    # end if

    bar = IncrementalBar('Downloading chapters', max=len(app.chapters))
    bar.start()

    if os.getenv('debug_mode') == 'yes':
        bar.next = lambda: None  # Hide in debug mode
    # end if

    if not app.output_formats:
        app.output_formats = {}
    # end if

    futures_to_check = {
        app.crawler.executor.submit(
            download_chapter_body,
            app,
            chapter,
        ): str(chapter['id'])
        for chapter in app.chapters
    }

    app.progress = 0
    for future in futures.as_completed(futures_to_check):
        result = future.result()
        if result:
            bar.clearln()
            logger.error(result)
        # end if
        app.progress += 1
        bar.next()
    # end for

    bar.finish()
    print('Downloaded %d chapters' % len(app.chapters))
Exemplo n.º 8
0
def main():
    """
    Description: This function call all others in the right following order.
    """
    path_csv = create_directory('bts_csv')
    path_images = create_directory('images_bts')

    step_1 = extract_category_list()

    # Initialization of Progress Bar
    bar = IncrementalBar('Progression', max=len(step_1))
    bar.start()

    while step_1 is not []:
        if not step_1:
            break
        else:
            step_2 = extract_books_url(step_1)
            step_3 = transform_books_information(step_2, path_images)
            load_books_information(step_3, path_csv)
            del step_1[0]

            # Progress Bar incrementation :
            bar.next()
Exemplo n.º 9
0
    def bind(self):
        logger.debug('Binding %s.pdf', self.file_name)
        pdf_path = os.path.join(self.app.output_path, 'pdf')
        os.makedirs(pdf_path, exist_ok=True)

        all_pages = []

        bar = IncrementalBar('Adding chapters to PDF', max=len(self.chapters))
        bar.start()

        if os.getenv('debug_mode') == 'yes':
            bar.next = lambda: None  # Hide in debug mode
        # end if

        html = HTML(string=self.create_intro())
        all_pages += html.render().pages
        logger.info('Added intro page')

        for chapter in self.chapters:
            html_string = chapter['body']
            html = HTML(string=html_string)
            all_pages += html.render().pages
            logger.info('Added chapter %d', chapter['id'])
            bar.next()
        # end for

        bar.finish()

        html = HTML(string=self.make_metadata())
        combined = html.render().copy(all_pages)

        output_file = os.path.join(pdf_path, '%s.pdf' % self.file_name)
        combined.write_pdf(output_file)
        print('Created: %s.pdf' % self.file_name)

        return output_file
Exemplo n.º 10
0
            sleep()

for spin in (Spinner, PieSpinner, MoonSpinner, LineSpinner, PixelSpinner):
    for i in spin(spin.__name__ + ' ').iter(range(100)):
        sleep()

for singleton in (Counter, Countdown, Stack, Pie):
    for i in singleton(singleton.__name__ + ' ').iter(range(100)):
        sleep()

bar = IncrementalBar('Random', suffix='%(index)d')
for i in range(100):
    bar.goto(random.randint(0, 100))
    sleep()
bar.finish()
"""
import progressbar
from time import sleep
bar = progressbar.ProgressBar(maxval=20, \
    widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
bar.start()
for i in xrange(20):
    bar.update(i+1)
    sleep(0.1)
bar.finish()
"""

#					TQDM
# https://notebooks.ai/ernest-galton/tqdm-ef22fcc1/lab
# https://github.com/tqdm/tqdm/wiki/How-to-make-a-great-Progress-Bar
# https://pypi.org/project/tqdm/
Exemplo n.º 11
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'path', help='Path to directory containing measurement data'
    )
    args = parser.parse_args()

    # Use given path as working directory for script so glob works from there
    os.chdir(args.path)

    total_lines = get_total_lines(glob.iglob('**/*.CSV', recursive=True))
    total_size = int(total_lines / BUFFER_SIZE)

    bar = IncrementalBar(
        max=total_size, suffix='%(percent)d%% [ETA: %(eta_td)s]'
    )
    bar.start()

    csv_glob = peekable(glob.iglob('**/*.CSV', recursive=True))
    line_reader = buffered_line_reader(
        line_reader(csv_glob), buffer_size=BUFFER_SIZE
    )

    with requests.Session() as session:
        # Iterate through the progress bar to auto update the bar
        for lines in bar.iter(line_reader):
            bar.message = csv_glob.peek(bar.message)
            response = session.post(URL, data=lines)
            assert response.status_code == 204, response.text
def coherent_wf_to_wf_dedispersion(DM, fname, no_of_points_for_fft_dedisp):
    """
    function reads waveform data in wf32 format, makes FFT, cuts the symmetrical half of the spectra and shifts the
    lines of complex data to provide coherent dedispersion. Then a symmetrcal part of spectra are made and joined
    to the shifted one, inverse FFT as applied and data are stored in waveform wf32 format
    Input parameters:
        DM -                            dispersion measure to compensate
        fname -                         name of file with initial wf32 data
        no_of_points_for_fft_dedisp -   number of waveform data points to use for FFT
    Output parameters:
        file_data_name -                name of file with processed data
    """

    #  *** Data file header read ***
    [
        df_filename, df_filesize, df_system_name, df_obs_place, df_description,
        clock_freq, df_creation_timeUTC, Channel, ReceiverMode, Mode, Navr,
        time_resolution, fmin, fmax, df, frequency_list, freq_points_num,
        data_block_size
    ] = FileHeaderReaderJDS(fname, 0, 0)

    # Manually set frequencies for one channel mode
    freq_points_num = int(no_of_points_for_fft_dedisp / 2)

    # Manually set frequencies for 33 MHz clock frequency
    if int(clock_freq / 1000000) == 33:
        fmin = 16.5
        fmax = 33.0
        df = 16500000 / freq_points_num

    # Create long data files and copy first data file header to them

    with open(fname, 'rb') as file:
        # *** Data file header read ***
        file_header = file.read(1024)

        # Removing old DM from file name and updating it to current value
        if fname.startswith('DM_'):
            prev_dm_str = fname.split('_')[1]
            prev_dm = np.float32(prev_dm_str)
            new_dm = prev_dm + DM
            n = len('DM_' + prev_dm_str + '_')
            file_data_name = 'DM_' + str(np.round(new_dm, 6)) + '_' + fname[n:]
        else:
            file_data_name = 'DM_' + str(np.round(DM, 6)) + '_' + fname

        # *** Creating a binary file with data for long data storage ***
        file_data = open(file_data_name, 'wb')
        file_data.write(file_header)
        file_data.close()
        del file_header

        # *** Creating a new timeline TXT file for results ***
        new_tl_file_name = file_data_name.split("_Data_ch",
                                                1)[0] + '_Timeline.wtxt'
        new_tl_file = open(
            new_tl_file_name,
            'w')  # Open and close to delete the file with the same name
        new_tl_file.close()

        # Calculation of the time shifts
        shift_vector = DM_full_shift_calc(freq_points_num, fmin, fmax,
                                          df / pow(10, 6), time_resolution, DM,
                                          'jds')
        max_shift = np.abs(shift_vector[0])

        # Preparing buffer array
        buffer_array = np.zeros((freq_points_num, 2 * max_shift),
                                dtype='complex64')

        print(' Maximal shift is:                            ', max_shift,
              ' pixels ')
        print(' Dispersion measure:                          ', DM,
              ' pc / cm3 ')

        # Calculation of number of blocks and number of spectra in the file
        no_of_spectra_in_bunch = max_shift.copy()
        no_of_bunches_per_file = int(
            np.ceil(
                (df_filesize - 1024) /
                (no_of_spectra_in_bunch * no_of_points_for_fft_dedisp * 4)))

        # Real time resolution of spectra
        fine_clock_freq = (int(clock_freq / 1000000.0) * 1000000.0)
        real_spectra_dt = float(no_of_points_for_fft_dedisp / fine_clock_freq)
        real_spectra_df = float(
            (fine_clock_freq / 2) / (no_of_points_for_fft_dedisp / 2))

        print(' Number of spectra in bunch:                  ',
              no_of_spectra_in_bunch)
        print(' Number of bunches to read in file:           ',
              no_of_bunches_per_file)
        print(' Time resolution of calculated spectra:       ',
              round(real_spectra_dt * 1000, 3), ' ms')
        print(' Frequency resolution of calculated spectra:  ',
              round(real_spectra_df / 1000, 3), ' kHz \n')

        # !!! Fake timing. Real timing to be done!!!
        # *** Reading timeline file ***
        old_tl_file_name = fname.split("_Data_ch", 1)[0] + '_Timeline.wtxt'
        old_tl_file = open(old_tl_file_name, 'r')
        new_tl_file = open(
            new_tl_file_name,
            'w')  # Open and close to delete the file with the same name

        file.seek(1024)  # Jumping to 1024 byte from file beginning

        # bar = IncrementalBar(' Coherent dispersion delay removing: ', max=no_of_bunches_per_file - 1,
        bar = IncrementalBar(' Coherent dispersion delay removing: ',
                             max=no_of_bunches_per_file,
                             suffix='%(percent)d%%')
        bar.start()

        # for bunch in range(no_of_bunches_per_file - 1):
        for bunch in range(no_of_bunches_per_file):

            # Trying to read all the file, not only integer number of bunches
            if bunch >= no_of_bunches_per_file - 1:
                no_of_spectra_in_bunch = int(
                    ((df_filesize - 1024) -
                     bunch * max_shift * no_of_points_for_fft_dedisp * 4) /
                    (no_of_points_for_fft_dedisp * 4))
                # print('\n  Bunch No ', str(bunch+1), ' of ', no_of_bunches_per_file, ' bunches')
                # print('\n  Number of spectra in the last bunch is: ', no_of_spectra_in_bunch)
                # print('\n  Maximal shift is:                       ', max_shift)

            # Read time from timeline file for the bunch
            time_scale_bunch = []
            for line in range(no_of_spectra_in_bunch):
                time_scale_bunch.append(str(old_tl_file.readline()))

            # Reading and reshaping all data with time data
            wf_data = np.fromfile(file,
                                  dtype='f4',
                                  count=no_of_spectra_in_bunch *
                                  no_of_points_for_fft_dedisp)
            '''
            fig = plt.figure(figsize=(9, 5))
            ax1 = fig.add_subplot(111)
            ax1.plot(wf_data, linestyle='-', linewidth='1.00', label='Initial waveform')
            ax1.legend(loc='upper right', fontsize=6)
            ax1.grid(b=True, which='both', color='silver', linestyle='-')
            ax1.set_ylabel('Intensity, a.u.', fontsize=6, fontweight='bold')
            pylab.savefig('00_Initial_waveform_' + str(bunch) + '.png', bbox_inches='tight', dpi=160)
            plt.close('all')
            '''

            wf_data = np.reshape(
                wf_data, [no_of_points_for_fft_dedisp, no_of_spectra_in_bunch],
                order='F')

            # preparing matrices for spectra
            spectra = np.zeros(
                (no_of_points_for_fft_dedisp, no_of_spectra_in_bunch),
                dtype='complex64')

            # Calculation of spectra
            for i in range(no_of_spectra_in_bunch):
                spectra[:, i] = np.fft.fft(wf_data[:, i])
            del wf_data
            '''
            fig = plt.figure(figsize=(9, 5))
            ax1 = fig.add_subplot(111)
            ax1.plot(10 * np.log10(np.power(np.abs(spectra[:, 0]), 2)), linestyle='-', linewidth='1.00',
                     label='Initial spectra before cut')
            ax1.legend(loc='upper right', fontsize=6)
            ax1.grid(b=True, which='both', color='silver', linestyle='-')
            ax1.set_ylabel('Intensity, a.u.', fontsize=6, fontweight='bold')
            pylab.savefig('00a_Initial_doubled_imm_spectra' + str(bunch) + '.png', bbox_inches='tight', dpi=160)
            plt.close('all')
            '''

            # Cut half of the spectra
            spectra = spectra[int(no_of_points_for_fft_dedisp / 2):, :]
            ''' # making figures
            fig = plt.figure(figsize=(9, 5))
            ax1 = fig.add_subplot(111)
            ax1.imshow(np.flipud(10*np.log10(np.power(np.abs(spectra), 2))), aspect='auto', cmap='jet')
            ax1.set_ylabel('Frequency points', fontsize=6, fontweight='bold')
            ax1.set_xlabel('Time points', fontsize=6, fontweight='bold')
            pylab.savefig('01_Initial_spectra_' + str(bunch) + '.png', bbox_inches='tight', dpi=160)
            plt.close('all')

            fig = plt.figure(figsize=(9, 5))
            ax1 = fig.add_subplot(111)
            ax1.plot(10*np.log10(np.power(np.abs(spectra[:, 0]), 2)), linestyle='-', linewidth='1.00', label='Initial waveform')
            ax1.legend(loc='upper right', fontsize=6)
            ax1.grid(b=True, which='both', color='silver', linestyle='-')
            ax1.set_ylabel('Intensity, a.u.', fontsize=6, fontweight='bold')
            pylab.savefig('02_Initial_imm_spectra' + str(bunch) + '.png', bbox_inches='tight', dpi=160)
            plt.close('all')
            '''

            #  Dispersion delay removing
            data_space = np.zeros((freq_points_num, 2 * max_shift),
                                  dtype='complex64')

            # if it is the last bunch - use only availble data
            if bunch >= no_of_bunches_per_file - 1:
                data_space[:, max_shift:max_shift +
                           no_of_spectra_in_bunch] = spectra[:, :]
            else:
                data_space[:, max_shift:] = spectra[:, :]

            data_space = pulsar_DM_compensation_with_indices_changes(
                data_space, shift_vector)
            del spectra

            # Adding the next data block
            buffer_array += data_space

            # Making and filling the array with fully ready data for plotting and saving to a file
            if bunch >= no_of_bunches_per_file - 1:
                array_compensated_dm = buffer_array[:,
                                                    0:no_of_spectra_in_bunch]
            else:
                array_compensated_dm = buffer_array[:, 0:max_shift]

            if bunch > 0:

                # Saving time data to a new file
                for i in range(len(time_scale_bunch)):
                    new_tl_file.write((time_scale_bunch[i][:]) + '')

                # Saving data with compensated DM
                spectra = array_compensated_dm  # .copy()
                '''
                # making figures
                fig = plt.figure(figsize=(9, 5))
                ax1 = fig.add_subplot(111)
                ax1.imshow(np.flipud(10*np.log10(np.power(np.abs(spectra), 2))), aspect='auto', cmap='jet')
                ax1.set_ylabel('Frequency points', fontsize=6, fontweight='bold')
                ax1.set_xlabel('Time points', fontsize=6, fontweight='bold')
                pylab.savefig('03_Compensated_spectra_' + str(bunch) + '.png', bbox_inches='tight', dpi=160)
                plt.close('all')

                fig = plt.figure(figsize=(9, 5))
                ax1 = fig.add_subplot(111)
                ax1.plot(10*np.log10(np.power(np.abs(spectra[:,0]), 2)), linestyle='-', linewidth='1.00', label='Initial waveform')
                ax1.legend(loc='upper right', fontsize=6)
                ax1.grid(b=True, which='both', color='silver', linestyle='-')
                ax1.set_ylabel('Intensity, a.u.', fontsize=6, fontweight='bold')
                pylab.savefig('04_Compensated_imm_spectra' + str(bunch) + '.png', bbox_inches='tight', dpi=160)
                plt.close('all')
                '''

                wf_data = np.zeros(
                    (no_of_points_for_fft_dedisp, no_of_spectra_in_bunch))

                # Add lost half of the spectra

                second_spectra_half = spectra.copy()
                second_spectra_half = np.flipud(second_spectra_half)
                spectra = np.concatenate((second_spectra_half, spectra),
                                         axis=0)  # Changed places!!!
                '''
                fig = plt.figure(figsize=(9, 5))
                ax1 = fig.add_subplot(111)
                ax1.plot(10*np.log10(np.power(np.abs(spectra[:,0]), 2)), linestyle='-', linewidth='1.00', label='Initial waveform')
                ax1.legend(loc='upper right', fontsize=6)
                ax1.grid(b=True, which='both', color='silver', linestyle='-')
                ax1.set_ylabel('Intensity, a.u.', fontsize=6, fontweight='bold')
                pylab.savefig('05_Compensated_doubled_imm_spectra' + str(bunch) + '.png', bbox_inches='tight', dpi=160)
                plt.close('all')
                '''

                # Making IFFT
                for i in range(no_of_spectra_in_bunch):
                    wf_data[:, i] = np.real(np.fft.ifft(spectra[:, i]))
                del spectra

                # Reshaping the waveform to single dimension (real)
                wf_data = np.reshape(
                    wf_data,
                    [no_of_points_for_fft_dedisp * no_of_spectra_in_bunch, 1],
                    order='F')
                ''' # making figures
                fig = plt.figure(figsize=(9, 5))
                ax1 = fig.add_subplot(111)
                ax1.plot(wf_data, linestyle='-', linewidth='1.00', label='Initial waveform')
                ax1.legend(loc='upper right', fontsize=6)
                ax1.grid(b=True, which='both', color='silver', linestyle='-')
                ax1.set_ylabel('Intensity, a.u.', fontsize=6, fontweight='bold')
                pylab.savefig('06_Compensated_waveform_' + str(bunch) + '.png', bbox_inches='tight', dpi=160)
                plt.close('all')
                '''

                # Saving waveform data to wf32 file
                file_data = open(file_data_name, 'ab')
                file_data.write(
                    np.float32(wf_data).transpose().copy(order='C'))
                file_data.close()

                # !!! Saving time data to timeline file !!!

            # Rolling temp_array to put current data first
            buffer_array = np.roll(buffer_array, -max_shift)
            buffer_array[:, max_shift:] = 0

            bar.next()

        bar.finish()
        old_tl_file.close()
        new_tl_file.close()

    return file_data_name
def convert_wf32_to_dat_with_overlap(fname, no_of_points_for_fft_spectr,
                                     no_of_spectra_in_bunch, hanning_window):
    """
        function converts waveform data in .wf32 format to spectra in .dat format
        : fname : name of .wf32 file with waveform data
        : no_of_points_for_fft : number of points for FFT to provide necessary time-frequency resolution
        : return : file_data_name - name of .dat file with result spectra
    """

    # *** Data file header read ***
    [
        df_filename, df_filesize, df_system_name, df_obs_place, df_description,
        clock_freq, df_time_utc, channel, receiver_mode, mode, n_avr,
        time_resolution, fmin, fmax, df, frequency, freq_points_num,
        data_block_size
    ] = FileHeaderReaderJDS(fname, 0, 0)

    freq_points_num = int(no_of_points_for_fft_spectr / 2)

    with open(fname, 'rb') as file:
        # *** Data file header read ***
        file_header = file.read(1024)

        # *** Creating a binary file with spectra data for long data storage ***
        file_data_name = fname[:-5] + '.dat'
        file_data = open(file_data_name, 'wb')
        file_data.write(file_header)
        file_data.seek(574)  # FFT size place in header
        file_data.write(np.int32(no_of_points_for_fft_spectr).tobytes())
        file_data.seek(624)  # Lb place in header
        file_data.write(np.int32(0).tobytes())
        file_data.seek(628)  # Hb place in header
        file_data.write(np.int32(freq_points_num).tobytes())
        file_data.seek(632)  # Wb place in header
        file_data.write(np.int32(freq_points_num).tobytes())
        file_data.seek(636)
        file_data.write(np.int32(1).tobytes())  # Seem to work OK
        file_data.close()
        del file_header

        # Calculation of number of blocks and number of spectra in the file
        no_of_bunches_per_file = int(
            (df_filesize - 1024) /
            ((no_of_spectra_in_bunch + 0.5) * no_of_points_for_fft_spectr * 4))

        # Real time resolution of averaged spectra
        fine_clock_freq = int(clock_freq / 1000000.0) * 1000000.0
        real_spectra_dt = float(no_of_points_for_fft_spectr / fine_clock_freq)
        real_spectra_df = float(
            (fine_clock_freq / 2) / (no_of_points_for_fft_spectr / 2))

        print(' Number of spectra in bunch:                  ',
              no_of_spectra_in_bunch)
        print(' Sampling clock frequency:                    ',
              fine_clock_freq, ' Hz')
        print(' Number of bunches to read in file:           ',
              no_of_bunches_per_file)
        print(' Time resolution of calculated spectra:       ',
              round(real_spectra_dt * 1000, 3), ' ms')
        print(' Frequency resolution of calculated spectra:  ',
              round(real_spectra_df / 1000, 3), ' kHz \n')
        # print('\n  Reading data from file \n')

        file.seek(1024)  # Jumping to 1024 byte from file beginning

        # *** Creating a new timeline TXT file for results ***
        new_tl_file_name = file_data_name.split('_Data_',
                                                1)[0] + '_Timeline.txt'
        new_tl_file = open(
            new_tl_file_name,
            'w')  # Open and close to delete the file with the same name
        new_tl_file.close()

        # *** Reading timeline file ***
        old_tl_file_name = fname.split("_Data_", 1)[0] + '_Timeline.wtxt'
        old_tl_file = open(old_tl_file_name, 'r')
        new_tl_file = open(
            new_tl_file_name,
            'w')  # Open and close to delete the file with the same name

        # Making the variable for half length of the spectrum for convenience
        half_of_spectrum = int(no_of_points_for_fft_spectr / 2)

        # Making a small buffer vector to store the last half ot spectrum for the next loop step
        buffer = np.zeros(half_of_spectrum)

        bar = IncrementalBar(' Conversion from waveform to spectra: ',
                             max=no_of_bunches_per_file - 1,
                             suffix='%(percent)d%%')
        bar.start()

        for bunch in range(no_of_bunches_per_file - 1):

            # print('Bunch # ', bunch, ' of ', no_of_bunches_per_file - 1)

            # Read time from timeline file for the bunch
            time_scale_bunch = []
            for line in range(no_of_spectra_in_bunch):
                tmp = str(old_tl_file.readline())
                time_scale_bunch.append(tmp)  # append the current value
                time_scale_bunch.append(
                    tmp
                )  # append once more the same value for timing of fft with overlap
            # Saving time data to new file
            for i in range(len(time_scale_bunch)):
                new_tl_file.write((time_scale_bunch[i][:]) + '')

            # Reading and reshaping data of the bunch
            wf_data = np.fromfile(file,
                                  dtype='f4',
                                  count=no_of_spectra_in_bunch *
                                  no_of_points_for_fft_spectr)

            wf_data = np.concatenate((buffer, wf_data), axis=0)

            # Save new data from the end to the buffer
            buffer = wf_data[-half_of_spectrum:].copy()

            # Selecting the needed sequence of the data and reshaping to rectangular array
            wf_data_1 = np.reshape(
                wf_data[:-half_of_spectrum].copy(),
                [no_of_points_for_fft_spectr, no_of_spectra_in_bunch],
                order='F')
            wf_data_2 = np.reshape(
                wf_data[half_of_spectrum:].copy(),
                [no_of_points_for_fft_spectr, no_of_spectra_in_bunch],
                order='F')
            wf_data_1 = np.transpose(wf_data_1)
            wf_data_2 = np.transpose(wf_data_2)
            del wf_data

            # Merging 2 arrays into one rectangular array in the one by one order
            wf_data = np.zeros(
                (2 * no_of_spectra_in_bunch, no_of_points_for_fft_spectr))
            wf_data[0::2, :] = wf_data_1[:, :]
            wf_data[1::2, :] = wf_data_2[:, :]
            del wf_data_1, wf_data_2

            # Apply window to data for FFT
            if hanning_window:
                # window = np.hanning(no_of_points_for_fft_spectr)
                window = np.hamming(no_of_points_for_fft_spectr)
                wf_data[:] = wf_data[:] * window[:]
                del window

            # Preparing empty array for spectra
            spectra = np.zeros_like(wf_data, dtype=np.float64)

            # Calculation of spectra
            spectra[:] = np.power(np.abs(np.fft.fft(wf_data[:])), 2)
            # spectra[:] = np.abs(np.fft.fft(np.power(wf_data[:], 2))) # Does not work
            # spectra[:, i] = np.power(np.abs(np.fft.fft(wf_data[:, i])), 2)
            del wf_data

            # Storing only first (left) mirror part of spectra
            spectra = spectra[:, :int(no_of_points_for_fft_spectr / 2)]
            # spectra = spectra[: int(no_of_points_for_fft_spectr / 2), :]

            # At 33 MHz clock frequency the specter is upside down, to correct it we use flip up/down
            if int(clock_freq / 1000000) == 33:
                # spectra = np.flipud(spectra)
                spectra = np.fliplr(spectra)

            # Saving spectra data to dat file
            temp = spectra.copy(order='C')
            file_data = open(file_data_name, 'ab')
            file_data.write(np.float64(temp))
            file_data.close()

            bar.next()

        bar.finish()

    file.close()  # Close the data file
    return file_data_name
def pulsar_period_DM_compensated_pics(common_path, filename, pulsar_name,
                                      normalize_response, profile_pic_min,
                                      profile_pic_max, spectrum_pic_min,
                                      spectrum_pic_max, periods_per_fig,
                                      customDPI, colormap, save_strongest,
                                      threshold):

    current_time = time.strftime("%H:%M:%S")
    current_date = time.strftime("%d.%m.%Y")

    # Creating a folder where all pictures and results will be stored (if it doesn't exist)
    result_path = "RESULTS_pulsar_n_periods_pics_" + filename
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if save_strongest:
        best_result_path = result_path + '/Strongest_pulses'
        if not os.path.exists(best_result_path):
            os.makedirs(best_result_path)

    # Taking pulsar period from catalogue
    pulsar_ra, pulsar_dec, DM, p_bar = catalogue_pulsar(pulsar_name)

    # DAT file to be analyzed:
    filepath = common_path + filename

    # Timeline file to be analyzed:
    timeline_filepath = common_path + filename.split(
        '_Data_')[0] + '_Timeline.txt'

    # Opening DAT datafile
    file = open(filepath, 'rb')

    # Data file header read
    df_filesize = os.stat(filepath).st_size  # Size of file
    df_filepath = file.read(32).decode('utf-8').rstrip(
        '\x00')  # Initial data file name
    file.close()

    if df_filepath[-4:] == '.adr':

        [
            df_filepath, df_filesize, df_system_name, df_obs_place,
            df_description, CLCfrq, df_creation_timeUTC, ReceiverMode, Mode,
            sumDifMode, NAvr, time_resolution, fmin, fmax, df, frequency,
            FFTsize, SLine, Width, BlockSize
        ] = FileHeaderReaderADR(filepath, 0, 0)

        freq_points_num = len(frequency)

    if df_filepath[-4:] == '.jds':  # If data obtained from DSPZ receiver

        [
            df_filepath, df_filesize, df_system_name, df_obs_place,
            df_description, CLCfrq, df_creation_timeUTC, SpInFile,
            ReceiverMode, Mode, Navr, time_resolution, fmin, fmax, df,
            frequency, freq_points_num, dataBlockSize
        ] = FileHeaderReaderJDS(filepath, 0, 1)

    # ************************************************************************************
    #                             R E A D I N G   D A T A                                *
    # ************************************************************************************

    # Time line file reading
    timeline, dt_timeline = time_line_file_reader(timeline_filepath)

    # Calculation of the dimensions of arrays to read taking into account the pulsar period
    spectra_in_file = int(
        (df_filesize - 1024) /
        (8 * freq_points_num))  # int(df_filesize - 1024)/(2*4*freq_points_num)
    spectra_to_read = int(
        np.round((periods_per_fig * p_bar / time_resolution), 0))
    num_of_blocks = int(np.floor(spectra_in_file / spectra_to_read))

    print('   Pulsar period:                           ', p_bar, 's.')
    print('   Time resolution:                         ', time_resolution,
          's.')
    print('   Number of spectra to read in', periods_per_fig, 'periods:  ',
          spectra_to_read, ' ')
    print('   Number of spectra in file:               ', spectra_in_file, ' ')
    print('   Number of', periods_per_fig, 'periods blocks in file:      ',
          num_of_blocks, '\n')

    # Data reading and making figures
    print('\n\n  *** Data reading and making figures *** \n\n')

    data_file = open(filepath, 'rb')
    data_file.seek(
        1024, os.SEEK_SET
    )  # Jumping to 1024+number of spectra to skip byte from file beginning

    bar = IncrementalBar('   Making pictures of n periods: ',
                         max=num_of_blocks,
                         suffix='%(percent)d%%')
    bar.start()

    for block in range(num_of_blocks + 1):  # Main loop by blocks of data

        # bar.next()

        # current_time = time.strftime("%H:%M:%S")
        # print(' * Data block # ', block + 1, ' of ', num_of_blocks + 1, '  started at: ', current_time)

        # Reading the last block which is less then 3 periods
        if block == num_of_blocks:
            spectra_to_read = spectra_in_file - num_of_blocks * spectra_to_read

        # Reading and preparing block of data (3 periods)
        data = np.fromfile(data_file,
                           dtype=np.float64,
                           count=spectra_to_read * len(frequency))
        data = np.reshape(data, [len(frequency), spectra_to_read], order='F')
        data = 10 * np.log10(data)
        if normalize_response > 0:
            Normalization_dB(data.transpose(), len(frequency), spectra_to_read)

        # Preparing single averaged data profile for figure
        profile = data.mean(axis=0)[:]
        profile = profile - np.mean(profile)
        data = data - np.mean(data)

        # Time line
        fig_time_scale = timeline[block * spectra_to_read:(block + 1) *
                                  spectra_to_read]

        # Making result picture
        fig = plt.figure(figsize=(9.2, 4.5))
        rc('font', size=5, weight='bold')
        ax1 = fig.add_subplot(211)
        ax1.plot(profile,
                 color=u'#1f77b4',
                 linestyle='-',
                 alpha=1.0,
                 linewidth='0.60',
                 label='3 pulses time profile')
        ax1.legend(loc='upper right', fontsize=5)
        ax1.grid(b=True,
                 which='both',
                 color='silver',
                 linewidth='0.50',
                 linestyle='-')
        ax1.axis([0, len(profile), profile_pic_min, profile_pic_max])
        ax1.set_ylabel('Amplitude, AU', fontsize=6, fontweight='bold')
        ax1.set_title('File: ' + filename + '  Description: ' +
                      df_description + '  Resolution: ' +
                      str(np.round(df / 1000, 3)) + ' kHz and ' +
                      str(np.round(time_resolution * 1000, 3)) + ' ms.',
                      fontsize=5,
                      fontweight='bold')
        ax1.tick_params(axis='x',
                        which='both',
                        bottom=False,
                        top=False,
                        labelbottom=False)
        ax2 = fig.add_subplot(212)
        ax2.imshow(np.flipud(data),
                   aspect='auto',
                   cmap=colormap,
                   vmin=spectrum_pic_min,
                   vmax=spectrum_pic_max,
                   extent=[0, len(profile), frequency[0], frequency[-1]])
        ax2.set_xlabel('Time UTC (at the lowest frequency), HH:MM:SS.ms',
                       fontsize=6,
                       fontweight='bold')
        ax2.set_ylabel('Frequency, MHz', fontsize=6, fontweight='bold')
        text = ax2.get_xticks().tolist()
        for i in range(len(text) - 1):
            k = int(text[i])
            text[i] = fig_time_scale[k][11:23]
        ax2.set_xticklabels(text, fontsize=5, fontweight='bold')
        fig.subplots_adjust(hspace=0.05, top=0.91)
        fig.suptitle('Single pulses of ' + pulsar_name + ' (DM: ' + str(DM) +
                     r' $\mathrm{pc \cdot cm^{-3}}$' + ', Period: ' +
                     str(p_bar) + ' s.), fig. ' + str(block + 1) + ' of ' +
                     str(num_of_blocks + 1),
                     fontsize=7,
                     fontweight='bold')
        fig.text(0.80,
                 0.04,
                 'Processed ' + current_date + ' at ' + current_time,
                 fontsize=3,
                 transform=plt.gcf().transFigure)
        fig.text(0.09,
                 0.04,
                 'Software version: ' + Software_version +
                 ', [email protected], IRA NASU',
                 fontsize=3,
                 transform=plt.gcf().transFigure)
        pylab.savefig(result_path + '/' + filename + ' fig. ' +
                      str(block + 1) + ' - Combined picture.png',
                      bbox_inches='tight',
                      dpi=customDPI)

        # If the profile has points above threshold save picture also into separate folder
        if save_strongest and np.max(profile) > threshold:
            pylab.savefig(best_result_path + '/' + filename + ' fig. ' +
                          str(block + 1) + ' - Combined picture.png',
                          bbox_inches='tight',
                          dpi=customDPI)
        plt.close('all')

        bar.next()

    bar.finish()
    data_file.close()
Exemplo n.º 15
0
def laba3(db_file_name, count_range, schema, schema_data):
    results = {
        'linear': [],
        'binary': [],
        'binary+sort': [],
        'multimap': [],
        'hashtable_map_good': [],
        'hashtable_map_bad': [],
        'bad_collisions': [],
        'good_collisions': []
    }
    key = 'fio'
    max_count_iterations = 2
    iterations = len(count_range)
    bar = IncrementalBar('Countdown', max=iterations)
    bar.start()

    for count in count_range:
        bar.next()
        print('\n')

        for count_iterations in range(max_count_iterations):
            generate(db_file_name, count, schema, schema_data)
            fp_map = defaultdict(list)
            fp_list = load_fp_from_file(db_file_name)
            query_obj = random.choice(fp_list)
            query = getattr(query_obj, key)

            print('check lin')
            linear = check_time(linear_search)(fp_list, key, query)
            print('check sort+bin')
            sort_and_bin_search = check_time(sort_and_binary_seach)(fp_list,
                                                                    key, query)
            print('check bin')
            bin_search = check_time(binary_search)(fp_list, key, query)
            print('check multimap')
            map_search = check_time(fp_map.__getitem__)(query)

            print('check hashtable good')
            fp_custom_map_good = HashTable()
            for el in fp_list:
                el.set_hash_type('good')
                fp_map[getattr(el, key)].append(el)
                fp_custom_map_good.add(el)
            query_obj.set_hash_type('good')
            custom_map_good_search = check_time(fp_custom_map_good.get)(
                Hashes.good_hash(query))

            print('check hashtable bad')
            fp_custom_map_bad = HashTable()
            for el in fp_list:
                el.set_hash_type('bad')
                fp_custom_map_bad.add(el)
            query_obj.set_hash_type('bad')
            custom_map_bad_search = check_time(fp_custom_map_bad.get)(
                Hashes.bad_hash(query))

            results['linear'].append((count, linear))
            results['binary'].append((count, bin_search))
            results['binary+sort'].append((count, sort_and_bin_search))
            results['multimap'].append((count, map_search))
            results['hashtable_map_good'].append(
                (count, custom_map_good_search))
            results['hashtable_map_bad'].append((count, custom_map_bad_search))
            results['bad_collisions'].append(
                (count, fp_custom_map_bad.collision_count))
            results['good_collisions'].append(
                (count, fp_custom_map_good.collision_count))

    plot_graph(results, count_range, max_count_iterations)
    print('bad_collisions: ', results['bad_collisions'])
    print('good_collisions: ', results['good_collisions'])
    bar.finish()
    return results
Exemplo n.º 16
0
def convert_jds_wf_to_wf32(source_directory, result_directory, no_of_bunches_per_file):
    """
    function converts jds waveform data to wf32 waveform data for further processing (coherent dedispersion) and
    saves txt files with time data
    Input parameters:
        source_directory - directory where initial jds waveform data are stored
        result_directory - directory where new wf32 files will be stored
        no_of_bunches_per_file - number of data bunches per file to process (depends on RAM volume on the PC)
    Output parameters:
        result_wf32_files - list of results files
    """

    file_list = find_and_check_files_in_current_folder(source_directory, '.jds')

    # To print in console the header of first file
    print('\n  First file header parameters: \n')

    # *** Data file header read ***
    [df_filename, df_filesize, df_system_name, df_obs_place, df_description,
     clock_freq, df_creation_timeUTC, channel, receiver_mode, Mode, Navr, time_res, fmin, fmax,
     df, frequency, freq_points_num, data_block_size] = FileHeaderReaderJDS(source_directory + file_list[0], 0, 1)
    if Mode > 0:
        sys.exit('  ERROR!!! Data recorded in wrong mode! Waveform mode needed.\n\n    Program stopped!')

    result_wf32_files = []
    # Main loop by files start
    for file_no in range(len(file_list)):   # loop by files

        fname = source_directory + file_list[file_no]

        # Create long data files and copy first data file header to them
        if file_no == 0:

            with open(fname, 'rb') as file:
                # *** Data file header read ***
                file_header = file.read(1024)

            # *** Creating a name for long timeline TXT file ***
            tl_file_name = df_filename + '_Timeline.wtxt'
            tl_file = open(tl_file_name, 'w')  # Open and close to delete the file with the same name
            tl_file.close()

            # *** Creating a binary file with data for long data storage ***
            file_data_A_name = df_filename + '_Data_chA.wf32'
            result_wf32_files.append(file_data_A_name)
            file_data_A = open(file_data_A_name, 'wb')
            file_data_A.write(file_header)
            file_data_A.close()

            if channel == 2:
                file_data_B_name = df_filename + '_Data_chB.wf32'
                result_wf32_files.append(file_data_B_name)
                file_data_B = open(file_data_B_name, 'wb')
                file_data_B.write(file_header)
                file_data_B.close()

            del file_header

        # Calculation of number of blocks and number of spectra in the file
        if channel == 0 or channel == 1:    # Single channel mode
            no_of_spectra_in_bunch = int((df_filesize - 1024) / (no_of_bunches_per_file * 2 * data_block_size))
        else:                               # Two channels mode
            no_of_spectra_in_bunch = int((df_filesize - 1024) / (no_of_bunches_per_file * 4 * data_block_size))

        no_of_blocks_in_file = (df_filesize - 1024) / data_block_size

        if file_no == 0:
            print(' Number of blocks in file:               ', no_of_blocks_in_file)
            print(' Number of bunches to read in file:      ', no_of_bunches_per_file)
            print('\n  *** Reading data from file *** \n')

        # *******************************************************************************
        #                           R E A D I N G   D A T A                             *
        # *******************************************************************************

        with open(fname, 'rb') as file:
            file.seek(1024)  # Jumping to 1024 byte from file beginning

            # !!! Fake timing. Real timing to be done!!!
            TimeFigureScaleFig = np.linspace(0, no_of_bunches_per_file, no_of_bunches_per_file + 1)
            for i in range(no_of_bunches_per_file):
                TimeFigureScaleFig[i] = str(TimeFigureScaleFig[i])

            time_scale_bunch = []

            bar = IncrementalBar(' File ' + str(file_no + 1) + ' of ' + str(len(file_list)) + ' reading: ',
                                 max=no_of_bunches_per_file, suffix='%(percent)d%%')

            bar.start()

            for bunch in range(no_of_bunches_per_file):

                # bar.next()

                # Reading and reshaping all data with time data
                if channel == 0 or channel == 1:    # Single channel mode
                    wf_data = np.fromfile(file, dtype='i2', count=no_of_spectra_in_bunch * data_block_size)
                    wf_data = np.reshape(wf_data, [data_block_size, no_of_spectra_in_bunch], order='F')
                if channel == 2:                    # Two channels mode
                    wf_data = np.fromfile(file, dtype='i2', count=2 * no_of_spectra_in_bunch * data_block_size)
                    wf_data = np.reshape(wf_data, [data_block_size, 2 * no_of_spectra_in_bunch], order='F')

                # Timing
                timeline_block_str = jds_waveform_time(wf_data, clock_freq, data_block_size)
                if channel == 2:                    # Two channels mode
                    # Cut the timeline of second channel
                    timeline_block_str = timeline_block_str[0:int(len(timeline_block_str) / 2)]  
                for i in range(len(timeline_block_str)):
                    time_scale_bunch.append(df_creation_timeUTC[0:10] + ' ' + timeline_block_str[i])  # [0:12]

                # Deleting the time blocks from waveform data
                real_data_block_size = data_block_size - 4
                wf_data = wf_data[0: real_data_block_size, :]

                # Separation data into channels
                if channel == 0 or channel == 1:  # Single channel mode
                    wf_data_chA = np.reshape(wf_data, [real_data_block_size * no_of_spectra_in_bunch, 1], order='F')
                    del wf_data  # Deleting unnecessary array name just in case

                if channel == 2:  # Two channels mode

                    # Separating the data into two channels
                    wf_data = np.reshape(wf_data, [2 * real_data_block_size * no_of_spectra_in_bunch, 1], order='F')
                    wf_data_chA = wf_data[0: (2 * real_data_block_size * no_of_spectra_in_bunch): 2]  # A
                    wf_data_chB = wf_data[1: (2 * real_data_block_size * no_of_spectra_in_bunch): 2]  # B
                    del wf_data

                # Saving WF data to dat file
                file_data_A = open(file_data_A_name, 'ab')
                file_data_A.write(np.float32(wf_data_chA).transpose().copy(order='C'))
                file_data_A.close()
                if channel == 2:
                    file_data_B = open(file_data_B_name, 'ab')
                    file_data_B.write(np.float32(wf_data_chB).transpose().copy(order='C'))
                    file_data_B.close()

                # Saving time data to ling timeline file
                with open(tl_file_name, 'a') as tl_file:
                    for i in range(no_of_spectra_in_bunch):
                        tl_file.write((str(time_scale_bunch[i][:])) + ' \n')  # str

                bar.next()

            bar.finish()

        file.close()  # Close the data file
        del file_data_A
        if channel == 2:
            del file_data_B

    return result_wf32_files
Exemplo n.º 17
0
def download_chapter_images(app):
    app.progress = 0

    # download or generate cover
    app.book_cover = download_cover(app)
    if not app.book_cover:
        app.book_cover = generate_cover(app)
    # end if
    if not app.book_cover:
        logger.warn('No cover image')
    # end if

    image_count = 0
    futures_to_check = {}
    for chapter in app.chapters:
        if not chapter.get('body'):
            continue
        # end if

        soup = app.crawler.make_soup(chapter['body'])
        image_output_path = os.path.join(app.output_path, 'images')
        for img in soup.select('img'):
            full_url = app.crawler.absolute_url(img['src'],
                                                page_url=chapter['url'])
            future = app.crawler.executor.submit(download_image, app, full_url,
                                                 image_output_path)
            futures_to_check.setdefault(chapter['id'], [])
            futures_to_check[chapter['id']].append(future)
            image_count += 1
        # end for
    # end for

    if not futures_to_check:
        return
    # end if

    bar = IncrementalBar('Downloading images  ', max=image_count)
    if os.getenv('debug_mode') == 'yes':
        bar.next = lambda: None  # Hide in debug mode
        bar.finish()
    else:
        bar.start()
    # end if

    for chapter in app.chapters:
        if chapter['id'] not in futures_to_check:
            bar.next()
            continue
        # end if

        images = {}
        for future in futures_to_check[chapter['id']]:
            url, filename = future.result()
            bar.next()
            if filename:
                images[url] = filename
            # end if
        # end for

        soup = app.crawler.make_soup('<main>' + chapter['body'] + '</main>')
        for img in soup.select('img'):
            if img['src'] in images:
                filename = images[img['src']]
                img['src'] = 'images/%s' % filename
                img['style'] = 'float: left; margin: 15px; width: 100%;'
        # end for
        chapter['body'] = str(soup.select_one('main'))
    # end for

    bar.finish()
    print('Processed %d images' % image_count)
Exemplo n.º 18
0
def normalize_dat_file(directory, filename, no_of_spectra_in_bunch,
                       median_filter_window, show_aver_spectra):
    """
    function calculates the average spectrum in DAT file and normalizes all spectra in file to average spectra
    Input parameters:
        directory - name of directory with initial dat file
        filename - name of initial dat file
        no_of_spectra_in_bunch - number of spectra in bunch to read
        median_filter_window - window of median filter to process the average spectrum
        show_aver_spectra - boolean variable which indicates if the picture of average spectrum to be shown and
                            the script paused till the picture window is closed
    Output parameters:
        output_file_name -  name of result normalized .dat file
    """

    print(
        '\n   Preparations and calculation of the average spectrum to normalize... \n'
    )

    output_file_name = directory + 'Norm_' + filename
    filename = directory + filename

    # Opening DAT datafile
    file = open(filename, 'rb')

    # *** Data file header read ***
    df_filesize = os.stat(filename).st_size  # Size of file
    df_filename = file.read(32).decode('utf-8').rstrip(
        '\x00')  # Initial data file name
    file.close()

    if df_filename[-4:] == '.adr':

        [
            df_filename, df_filesize, df_system_name, df_obs_place,
            df_description, CLCfrq, df_creation_timeUTC, ReceiverMode, Mode,
            sumDifMode, NAvr, TimeRes, fmin, fmax, df, frequency, FFTsize,
            SLine, Width, BlockSize
        ] = FileHeaderReaderADR(filename, 0, 0)

    if df_filename[-4:] == '.jds':  # If data obtained from DSPZ receiver

        [
            df_filename, df_filesize, df_system_name, df_obs_place,
            df_description, CLCfrq, df_creation_timeUTC, SpInFile,
            ReceiverMode, Mode, Navr, TimeRes, fmin, fmax, df, frequency,
            FreqPointsNum, dataBlockSize
        ] = FileHeaderReaderJDS(filename, 0, 0)

    # Calculation of the dimensions of arrays to read
    nx = len(frequency)  # the first dimension of the array
    ny = int((
        (df_filesize - 1024) /
        (nx * 8)))  # the second dimension of the array: file size - 1024 bytes

    # Number of data blocks to read from file
    num_of_blocks = int(ny // no_of_spectra_in_bunch)

    # Read data from file by blocks and average it
    file = open(filename, 'rb')
    file.seek(1024)
    average_array = np.empty((nx, 0), float)
    for block in range(num_of_blocks):
        if block == (num_of_blocks - 1):
            spectra_num_in_bunch = ny - (num_of_blocks -
                                         1) * no_of_spectra_in_bunch
        else:
            spectra_num_in_bunch = no_of_spectra_in_bunch

        data = np.fromfile(file,
                           dtype=np.float64,
                           count=nx * spectra_num_in_bunch)
        data = np.reshape(data, [nx, spectra_num_in_bunch], order='F')
        tmp = np.empty((nx, 1), float)
        # tmp[:, 0] = data.mean(axis=1)[:]
        tmp[:, 0] = data.min(axis=1)[:]
        average_array = np.append(average_array, tmp, axis=1)  #

    # Average average spectra of all data blocks
    average_profile = average_array.mean(axis=1)

    init_average_profile = average_profile.copy()

    # # Make a figure of average spectrum (profile)
    # fig = plt.figure(figsize=(9, 5))
    # ax1 = fig.add_subplot(111)
    # ax1.plot(10 * np.log10(average_profile), linestyle='-', linewidth='1.00', label='Average spectra')
    # ax1.legend(loc='upper right', fontsize=6)
    # ax1.grid(b=True, which='both', color='silver', linestyle='-')
    # ax1.set_xlabel('Frequency points, num.', fontsize=6, fontweight='bold')
    # ax1.set_ylabel('Intensity, dB', fontsize=6, fontweight='bold')
    # pylab.savefig('Averaged_spectra_'+filename[:-4]+'_before_filtering.png', bbox_inches='tight', dpi=160)
    # plt.close('all')

    # Apply median filter to average profile
    average_profile = median_filter(average_profile, median_filter_window)
    med_average_profile = average_profile.copy()
    average_profile = average_filter(average_profile,
                                     median_filter_window + 20)

    # Make a figure of filtered average spectrum (profile)
    fig = plt.figure(figsize=(12, 8))
    ax1 = fig.add_subplot(111)
    ax1.plot(10 * np.log10(init_average_profile),
             linestyle='-',
             linewidth='1.50',
             label='Initial spectra',
             color='C0',
             alpha=0.6)
    ax1.plot(10 * np.log10(med_average_profile),
             linestyle='-',
             linewidth='1.25',
             label='Median spectra',
             color='C1',
             alpha=0.8)
    ax1.plot(10 * np.log10(average_profile),
             linestyle='-',
             linewidth='1.00',
             label='Median averaged spectra',
             color='C3')
    ax1.legend(loc='upper right', fontsize=6)
    ax1.grid(b=True, which='both', color='silver', linestyle='-')
    ax1.set_xlabel('Frequency points, num.', fontsize=6, fontweight='bold')
    ax1.set_ylabel('Intensity, dB', fontsize=6, fontweight='bold')
    pylab.savefig('Averaged_spectra_' + filename[:-4] + '_after_filtering.png',
                  bbox_inches='tight',
                  dpi=160)
    if show_aver_spectra:
        print('\n   Close the figure window to continue processing!!!\n')
        plt.show()
    plt.close('all')

    del init_average_profile, med_average_profile

    # Normalization
    print('   Spectra normalization... \n')
    file.seek(0)
    file_header = file.read(1024)
    normalized_file = open(output_file_name, 'wb')
    normalized_file.write(file_header)
    del file_header

    bar = IncrementalBar(' Normalizing of the DAT file: ',
                         max=num_of_blocks,
                         suffix='%(percent)d%%')
    bar.start()

    for block in range(num_of_blocks):

        if block == (num_of_blocks - 1):
            spectra_num_in_bunch = ny - (num_of_blocks -
                                         1) * no_of_spectra_in_bunch
        else:
            spectra_num_in_bunch = no_of_spectra_in_bunch

        data = np.fromfile(file,
                           dtype=np.float64,
                           count=nx * spectra_num_in_bunch)
        data = np.reshape(data, [nx, spectra_num_in_bunch], order='F')
        for j in range(spectra_num_in_bunch):
            data[:, j] = data[:, j] / average_profile[:]
        temp = data.transpose().copy(order='C')
        normalized_file.write(np.float64(temp))

        bar.next()

    file.close()
    normalized_file.close()
    bar.finish()

    # *** Creating a new timeline TXT file for results ***
    new_tl_file_name = output_file_name.split('_Data_', 1)[0] + '_Timeline.txt'
    new_tl_file = open(
        new_tl_file_name,
        'w')  # Open and close to delete the file with the same name
    new_tl_file.close()

    # *** Reading timeline file ***
    old_tl_file_name = filename.split('_Data_', 1)[0] + '_Timeline.txt'
    old_tl_file = open(old_tl_file_name, 'r')
    new_tl_file = open(new_tl_file_name, 'w')

    # Read time from timeline file
    time_scale_bunch = old_tl_file.readlines()

    # Saving time data to new file
    for j in range(len(time_scale_bunch)):
        new_tl_file.write((time_scale_bunch[j][:]) + '')

    old_tl_file.close()
    new_tl_file.close()

    return output_file_name