Пример #1
0
def test_filter_files_doc():
    from utilities.utilities import Utilities
    u = Utilities()
    data = ['file.txt', 'hello.doc', 'file.wav']
    expected = ['file.txt', 'file.wav']
    result = u.filter_files(data)
    assert result == expected
Пример #2
0
def test_get_ref_set():
    from utilities.utilities import Utilities
    u = Utilities()
    expected = {'2.txt', '3.txt'}
    data = ['hello.wav', 'foo.doc', 'bar.flac', '2.txt', '3.txt']
    result = u._get_ref_set(data)
    assert result == expected
Пример #3
0
def test_get_audio_set():
    from utilities.utilities import Utilities
    u = Utilities()
    expected = {'a.wav', 'b.wav', 'c.wav'}
    data = ['a.wav', 'foo.txt', 'c.txt', 'b.wav', 'c.wav', 'a.txt', 'b.txt']
    result = u.get_audio_set(data)
    assert result == expected
Пример #4
0
def test_get_root_filename_from_file_name():
    from utilities.utilities import Utilities
    u = Utilities()
    expected = 'file'
    data = 'file.doc'
    result = u.get_root_filename(data)
    assert result == expected
Пример #5
0
def test_filter_files_orphan_ref():
    from utilities.utilities import Utilities
    u = Utilities()
    data = ['blah.txt', 'blah.ogg', 'orphan.txt']
    expected = ['blah.txt', 'blah.ogg']
    result = u.filter_files(data)
    assert result == expected
Пример #6
0
 def test_clean_url(self):
     text = "vom 13. bis 16. mai findet in Coburg das 34. BMW Veteranentreffen statt," + \
            "Infos auch unter http://www.facebook.com/TourismusCoburg?v=app_2344061033&ref=ts#!/event.php" + \
            "?eid=110512678986182&index=1"
     cleaner = Utilities()
     result = "vom 13. bis 16. mai findet in Coburg das 34. BMW Veteranentreffen statt,Infos auch unter"
     self.assertEqual(cleaner.clean_url(text), result)
Пример #7
0
def test_is_valid_file_extension_true2():
    from utilities.utilities import Utilities
    u = Utilities()
    expected = True
    extension = 'MP3'
    result = u._is_valid_file_extension(extension)
    assert result == expected
Пример #8
0
def test_is_valid_file_extension_false():
    from utilities.utilities import Utilities
    u = Utilities()
    expected = False
    extension = 'doc'
    result = u._is_valid_file_extension(extension)
    assert result == expected
Пример #9
0
def test_get_count_of_word_instances():
    from utilities.utilities import Utilities
    u = Utilities()
    word_list = ['hello', 'hi', 'hello', 'there', 'hello']
    result = u.get_count_of_word_instances(word_list)
    expected = {'hello': 3, 'hi': 1, 'there': 1}
    assert result == expected
Пример #10
0
def test_get_root_filename_from_uri():
    from utilities.utilities import Utilities
    u = Utilities()
    expected = 'file'
    data = 'gs://foo/bar/baz/file.wav'
    result = u.get_root_filename(data)
    assert result == expected
Пример #11
0
def test_string_to_enum_speex_with_header_byte():
    from utilities.utilities import Utilities
    from google.cloud.speech_v1p1beta1 import enums
    u = Utilities()
    expected = enums.RecognitionConfig.AudioEncoding.SPEEX_WITH_HEADER_BYTE
    txt = 'SPEEX_WITH_HEADER_BYTE'
    result = u.string_to_enum(txt)
    assert result == expected
Пример #12
0
def test_string_to_enum_ogg_opus():
    from utilities.utilities import Utilities
    from google.cloud.speech_v1p1beta1 import enums
    u = Utilities()
    expected = enums.RecognitionConfig.AudioEncoding.OGG_OPUS
    txt = 'OGG_opus'
    result = u.string_to_enum(txt)
    assert result == expected
Пример #13
0
def test_string_to_enum_amr_wb():
    from utilities.utilities import Utilities
    from google.cloud.speech_v1p1beta1 import enums
    u = Utilities()
    expected = enums.RecognitionConfig.AudioEncoding.AMR_WB
    txt = 'amr_WB'
    result = u.string_to_enum(txt)
    assert result == expected
Пример #14
0
def test_string_to_enum_unspecified():
    from utilities.utilities import Utilities
    from google.cloud.speech_v1p1beta1 import enums
    u = Utilities()
    expected = enums.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED
    txt = ''
    result = u.string_to_enum(txt)
    assert result == expected
Пример #15
0
def test_append_uri_2():
    from utilities.utilities import Utilities
    u = Utilities()
    data = 'this.wav'
    uri = 'gs://foo/bar/'
    expected = uri + data
    result = u.append_uri(uri, data)
    assert result == expected
Пример #16
0
 def test_detect_language_langdetect(self):
     cleaner = Utilities()
     with open("comments.txt") as f:
         for line in f:
             line = line.replace('"', "")
             lang = cleaner.check_language_languagedetect(line)
             print line
             print lang
     pass
Пример #17
0
 def test_clean_multiple_punctuations(self):
     text1 = "schön zu sehen das sowas in coburg stattfindet...vielleicht besuche ich mal wieder die alte" + \
             " heimat.. gut zu wissen ;)"
     text2 = "Genau für Mich!!!!"
     cleaner = Utilities()
     result1 = "schön zu sehen das sowas in coburg stattfindet. vielleicht besuche ich mal wieder die alte" + \
               " heimat. gut zu wissen ;)"
     result2 = "Genau für Mich!"
     self.assertEqual(cleaner.clean_multiple_punctuations(text1), result1)
     self.assertEqual(cleaner.clean_multiple_punctuations(text2), result2)
Пример #18
0
def test_parse_uri_2():
    from utilities.utilities import Utilities
    u = Utilities()
    uri = 'gs://foo/bar/baz/test.flac'
    expected_bucket = 'foo'
    expected_folder = 'bar/baz'
    expected_file = 'test.flac'
    result_unused_scheme, result_bucket, result_unused_path, result_folder, result_file = u.parse_uri(
        uri)
    assert result_bucket == expected_bucket
    assert result_folder == expected_folder
    assert result_file == expected_file
Пример #19
0
 def __init__(self, browser='chrome'):
     if sys.platform == "win32":
         chrome = '\chromedriver.exe'
     else:
         chrome = '/chromedriver'
     utilities = Utilities()
     # path = utilities.move_up_directory(os.getcwd(), 1)
     currentFilePath = os.path.realpath(__file__)
     new_path = utilities.move_up_directory(currentFilePath, 1)
     if browser.lower() == 'chrome':
         self.driver_provider = webdriver.Chrome(new_path + chrome)
     elif browser.lower() == 'firefox':
         self.driver_provider = webdriver.Firefox()
 def read_ref(self, uri, txt_file):
     from google.cloud import storage as storage
     logger = logging.getLogger(__name__)
     client = storage.Client()
     bucket, folder = self._parse_uri(uri)
     b = client.bucket(bucket)
     path = f"{txt_file}"
     if len(folder) > 0:
         path = f"{folder}/{txt_file}"
     blob = b.get_blob(path)
     result = blob.download_as_string().decode('latin-1')
     r = result.replace('\n', '')
     r = str(r)
     r = r.lower()
     utilities = Utilities()
     r = utilities.strip_puc(text = r)
     logger.debug(f'REF STRIPPED: {r}')
     return r
Пример #21
0
def test_create_unique_root_2():
    from utilities.utilities import Utilities
    from model.configuration import Configuration
    from model.nlp import NLPModel
    u = Utilities()
    configuration = Configuration()
    nlp_model = NLPModel()
    root = '12345'
    configuration.set_model('video')
    configuration.set_use_enhanced(False)
    configuration.set_language_code('fr_FR')
    configuration.set_alternative_language_codes(['en-US', 'ru-RU'])
    configuration.set_speech_context('hi', 5)
    nlp_model.set_remove_stop_words(True)
    nlp_model.set_apply_stemming(False)
    nlp_model.set_expand_contractions(True)
    nlp_model.set_n2w(True)
    result = u.create_unique_root(root, configuration, nlp_model)
    expected = '12345_video_fr_FR_alts_applied_speech_adaptation_applied_boost_5_stop_words_removed_contractions_expanded_numbers_converted_2_words'
    assert result == expected
Пример #22
0
def test_get_extension_flac():
    from utilities.utilities import Utilities
    u = Utilities()
    expected = 'flac'
    result = u._get_extension('woooooo.flac')
    assert result == expected
Пример #23
0
import dash_table
import base64
import pandas as pd
import numpy as np
from file_handle import File_Handle
from SIR_model import SIR
from SIR_predict import SirPredict
from utilities.utilities import Utilities 
import matplotlib.pyplot as plt

#%%
#Instances
handle = File_Handle()
sirmodel = SIR()
sirpredict = SirPredict()
utl = Utilities()
fig = go.Figure()

#%%
#Downloading data
handle.download_censo_file()
file_status = handle.download_covid_file()

#%%
#Loading data
censo_df = pd.read_excel('data/ProyeccionMunicipios2005_2020.xls', sheet_name = 'Mpios',header=8)

censo_df['MPIO'] = np.where(censo_df['MPIO'] == 'Bogotá, D.C.', 'Bogotá D.C.', censo_df['MPIO'])
censo_df['MPIO'] = np.where(censo_df['MPIO'] == 'Cartagena', 'Cartagena de Indias', censo_df['MPIO'])

data_org = pd.read_csv('data/Casos_positivos_de_COVID-19_en_Colombia.csv')
    enc = args.encoding
    sample_rate_hertz = args.sample_rate_hertz
    language_codes = args.langs
    phrase_file_path = args.phrase_file
    boosts = [int(i) for i in args.boosts]

    if not no_zeros_for_boost:
        boosts.append(0)
    alternative_language_codes = args.alternative_languages
    encoding = args.encoding

    random_queue = args.random_queue
    use_fake_hyp = args.fake_hyp

    # init utilities
    utilities = Utilities()

    #
    #   Audit phrase file
    #
    phrases = list()
    if phrase_file_path:
        phrases = io_handler.read_file(phrase_file_path)

    if phrases:
        if no_zeros_for_boost:
            speech_context_runs = [True]
        else:
            speech_context_runs = [False, True]
        logger.debug(f'PHRASES: {phrases}')
    else:
    def get_hypothesis(self, uri, configuration):
        import time
        """Asynchronously transcribes the audio uri specified by the gcs_uri."""
        client = speech.SpeechClient()
        config = {
            "model":
            configuration.get_model(),
            "use_enhanced":
            configuration.get_use_enhanced(),
            "encoding":
            configuration.get_encoding(),
            "sample_rate_hertz":
            configuration.get_sample_rate_hertz(),
            "language_code":
            configuration.get_language_code(),
            "alternative_language_codes":
            configuration.get_alternative_language_codes(),
            "audio_channel_count":
            configuration.get_audio_channel_count(),
            "enable_separate_recognition_per_channel":
            configuration.get_enable_separate_recognition_per_channel(),
            "enable_speaker_diarization":
            configuration.get_enableSpeakerDiarization(),
            "diarization_speaker_count":
            configuration.get_diarizationSpeakerCount(),
            "enable_automatic_punctuation":
            configuration.get_enableAutomaticPunctuation(),
            "speech_contexts":
            configuration.get_speech_context()
        }

        audio = {"uri": uri}
        operation = object
        try:
            operation = client.long_running_recognize(config=config,
                                                      audio=audio)
        except google.api_core.exceptions.InvalidArgument as e:
            raise e
        count = 0
        sleep_time = 5
        while not operation.done() and count != 30000:
            print(
                f"{operation.metadata.progress_percent}% complete - updates every {sleep_time} seconds"
            )
            if count == 29999:
                raise TimeoutError("Time out processing audio")
            count += 1
            time.sleep(sleep_time)
        print(
            f"{operation.metadata.progress_percent}% complete - updates every {sleep_time} seconds"
        )

        response = operation.result(timeout=1200)

        transcript = str()
        for result in response.results:
            # First alternative is the most probable result
            transcript += " " + result.alternatives[0].transcript
        if not transcript:
            logger.debug('No transcript returned')
        utilities = Utilities()
        t = utilities.strip_puc(text=transcript)
        return t.lower()
Пример #26
0
 def test_clean_test(self):
     cleaner = Utilities()
     self.assertEqual('dass auto ist traumhaft.', cleaner.clean_text('dass auto ist traumhaft........'))
Пример #27
0
 def test_multiple_dots(self):
     cleaner = Utilities()
     self.assertEqual('dass auto ist traumhaft.', cleaner.clean_multiple_dots('dass auto ist traumhaft........'))
     test2 = "vom 13. bis 16. mai findet in Coburg das 34. BMW Veteranentreffen statt, Infos auch unter"
     self.assertEqual(test2, cleaner.clean_multiple_dots(test2))
Пример #28
0
 def test_clean_beginning_punct(self):
     cleaner = Utilities()
     self.assertEqual("Sonnebrille und wech",cleaner.clean_dots_beginning_of_text("...Sonnebrille und wech"))
Пример #29
0
 def test_clean_multiple_whitespaces(self):
     cleaner = Utilities()
     test1 = "UUU Nice Lets Play Rock n Roll  :))"
     self.assertEqual(cleaner.clean_multiple_whitespaces(test1), "UUU Nice Lets Play Rock n Roll :))")
Пример #30
0
 def test_clean_smileys(self):
     cleaner = Utilities()
     test1 = ":o))"
     test2 = "UUU Nice Lets Play Rock n Roll  :))"
     self.assertEqual(cleaner.clean_smileys(test1), ":o)")
     self.assertEqual(cleaner.clean_smileys(test2), "UUU Nice Lets Play Rock n Roll  :)")