def filter_aliases(row) -> list: """ Filters list of aliases to keep only the useful ones. It is used to remove all the noisy aliases given by tv that are useful for ASR. E.g. ['s r f 1', 'SRF 1', 'srf eins'] becomes ['srf 1'] """ aliases = row['aliases'] + [str(row['value'])] language = row['language'] regex = re.compile(r'\b[a-zA-Z]\b') for item in aliases: item = str(item) if regex.findall(item): # modified upper_alias = restore_abbreviations_in_text( text=item, uppercase=True).strip() aliases.remove(item) if upper_alias not in aliases: aliases.append(upper_alias) # remove norm duplication aliases_set = set([clean_string(x) for x in aliases]) norm2alas = {} for alas in aliases_set: norm = Processor().normalize_text(alas, language) if norm in norm2alas: if len(norm2alas[norm]) > len(alas): norm2alas[norm] = alas else: norm2alas[norm] = alas return list(norm2alas.values())
def make_item(value, item_type, lan, aliases=None): if aliases is None: aliases = [] item = { "value": value, "type": item_type, "language": lan, "normalizedValue": Processor().normalize_text(value, lan), "aliases": aliases } return item
def __init__(self, templates, entities, LRU_size=100, max_holder_amount=2, max_combo_amount=3000): self.tagging = 0 self.padding = 0 self.templates = templates self.entities = entities self.normalizer = Processor().normalize_text self.tags_entities_dic = LRUCache(size=LRU_size) self.max_combo_amount = max_combo_amount self.sentence_num = 0 self.max_holder_amount = max_holder_amount
def generate_NumSequence(language, amount=3000, max_length=12): entity_list = [] entity_type = "NumberSequence" for i in tqdm(range(amount)): length = random.randint(3, max_length) low = 10**length high = low * 10 - 1 value = str(random.randint(low, high)) item = { "type": entity_type, "language": language, "spoken": Processor().normalize_text(' '.join(list(value)), language), "written": value, "entities_dic": [] } entity_list.append(item) return entity_list
def __init__(self, parameters=None): if parameters is None: self.parameters = Parameters() else: self.parameters = parameters num_sources = Constants.DEFAULT_SOURCES num_processors = self.parameters.num_processors # Attributes initialization self.processors = [] self.sources = [] self.eventsList = PriorityQueue(0) # maxsize = 0 (infinite) self.previousTime = Constants.SIMULATION_INITIAL_TIME self.currentTime = Constants.SIMULATION_INITIAL_TIME self.idleProcessors = 0 self.serviceProcessors = 0 self.entitiesSystem = 0 self.service_per_shift = [] self.service_per_total = [] self.shift_durations = self.parameters.getParameters()[1] self.shift_next_time = self.shift_durations[0] self.shift_next_index = 1 # Instance creation self.queue = Queue(Constants.SLOTS_BUFFER) self.parking = Queue(Constants.SLOTS_QUEUE) self.random = Random() for _ in range(0, num_processors): self.processors.append(Processor(self)) for _ in range(0, num_sources): self.sources.append(Source(self)) # Dependency injection for source in self.sources: source.addOutput(self.queue) # source -> queue self.queue.addOutput(self.parking) # queue -> parking self.parking.addInput(self.queue) # parking <- queue for processor in self.processors: self.parking.addOutput(processor) # parking -> processor processor.addInput(self.parking) # processor <- parking self.output_file = None self.numberOfIdleProcessors = num_processors
def setUp(self): mock_core = Core() mock_core.canHostEntity = MagicMock(return_value=True) mock_core.nextArrival = MagicMock() mock_core.decreaseEntitiesSystem = MagicMock() self.processorObj = Processor(mock_core)
class TestProcessor(TestCase): def setUp(self): mock_core = Core() mock_core.canHostEntity = MagicMock(return_value=True) mock_core.nextArrival = MagicMock() mock_core.decreaseEntitiesSystem = MagicMock() self.processorObj = Processor(mock_core) def tearDown(self): self.processorObj = None def test_isIdle(self): self.assertTrue(self.processorObj.isIdle(), "The processor should be idle") def test_isIdle_notIdle(self): self.processorObj.hostedEntity = 1 self.assertFalse(self.processorObj.isIdle(), "The processor shouldn't be idle") def test_endService_empty_queue(self): mock_input = Queue() mock_input.getQueueLength = MagicMock(return_value=0) self.processorObj.addInput(mock_input) self.processorObj.hostedEntity = 1 self.assertFalse(self.processorObj.isIdle(), "The processor shouldn't be idle") self.processorObj.endService() self.assertTrue(self.processorObj.isIdle(), "The processor should be idle") def test_endService_non_empty_queue(self): mock_input = Queue() mock_input.getQueueLength = MagicMock(return_value=1) mock_input.getEntity = MagicMock() self.processorObj.addInput(mock_input) self.processorObj.hostedEntity = 1 self.assertFalse(self.processorObj.isIdle(), "The processor shouldn't be idle") self.processorObj.endService() self.assertFalse(self.processorObj.isIdle(), "The processor shouldn't be idle")
from src.WikiCrawler import Parser from src.Processor import Processor from src.ExtractKeyword import Comprehend, ConnectSql import argparse import sys if __name__ == '__main__': param_topic = sys.argv[1] parser = Parser(param_topic) # #Required to run only when runnning the program the first time parser.checkRequirements() parser.returnData() parser = Processor(param_topic) parser.processor() proc = Comprehend(param_topic) proc.extract_keywords() proc.load_to_data()
import io import os from math import floor from src.Processor import Processor from flask import Flask, request, Response, json, jsonify from flask_cors import CORS from flask.helpers import send_file, send_from_directory from PIL import Image app = Flask(__name__, static_folder='../app/build') CORS(app) proc = Processor() def imgArray2stream(imgArr): out = Image.fromarray(imgArr) stream = io.BytesIO() out.save(stream, 'PNG') stream.seek(0) return stream @app.route('/') def hello_world(): return send_from_directory(app.static_folder, "index.html") @app.route("/<path:path>") def static_proxy(path): """static folder serve""" file_name = path.split("/")[-1] dir_name = os.path.join(app.static_folder, "/".join(path.split("/")[:-1]))