Пример #1
0
def filter_aliases(row) -> list:
    """
    Filters list of aliases to keep only the useful ones.
    It is used to remove all the noisy aliases given by tv that are useful for ASR.

    E.g. ['s r f 1', 'SRF 1', 'srf eins'] becomes ['srf 1']
    """
    aliases = row['aliases'] + [str(row['value'])]
    language = row['language']

    regex = re.compile(r'\b[a-zA-Z]\b')
    for item in aliases:
        item = str(item)
        if regex.findall(item):  # modified
            upper_alias = restore_abbreviations_in_text(
                text=item, uppercase=True).strip()
            aliases.remove(item)
            if upper_alias not in aliases:
                aliases.append(upper_alias)

    # remove norm duplication
    aliases_set = set([clean_string(x) for x in aliases])
    norm2alas = {}
    for alas in aliases_set:
        norm = Processor().normalize_text(alas, language)
        if norm in norm2alas:
            if len(norm2alas[norm]) > len(alas):
                norm2alas[norm] = alas
        else:
            norm2alas[norm] = alas
    return list(norm2alas.values())
Пример #2
0
def make_item(value, item_type, lan, aliases=None):
    if aliases is None:
        aliases = []
    item = {
        "value": value,
        "type": item_type,
        "language": lan,
        "normalizedValue": Processor().normalize_text(value, lan),
        "aliases": aliases
    }
    return item
Пример #3
0
 def __init__(self,
              templates,
              entities,
              LRU_size=100,
              max_holder_amount=2,
              max_combo_amount=3000):
     self.tagging = 0
     self.padding = 0
     self.templates = templates
     self.entities = entities
     self.normalizer = Processor().normalize_text
     self.tags_entities_dic = LRUCache(size=LRU_size)
     self.max_combo_amount = max_combo_amount
     self.sentence_num = 0
     self.max_holder_amount = max_holder_amount
Пример #4
0
def generate_NumSequence(language, amount=3000, max_length=12):
    entity_list = []
    entity_type = "NumberSequence"
    for i in tqdm(range(amount)):
        length = random.randint(3, max_length)
        low = 10**length
        high = low * 10 - 1
        value = str(random.randint(low, high))
        item = {
            "type": entity_type,
            "language": language,
            "spoken": Processor().normalize_text(' '.join(list(value)),
                                                 language),
            "written": value,
            "entities_dic": []
        }
        entity_list.append(item)
    return entity_list
Пример #5
0
    def __init__(self, parameters=None):
        if parameters is None:
            self.parameters = Parameters()
        else:
            self.parameters = parameters
        num_sources = Constants.DEFAULT_SOURCES
        num_processors = self.parameters.num_processors
        # Attributes initialization
        self.processors = []
        self.sources = []
        self.eventsList = PriorityQueue(0)  # maxsize = 0 (infinite)
        self.previousTime = Constants.SIMULATION_INITIAL_TIME
        self.currentTime = Constants.SIMULATION_INITIAL_TIME
        self.idleProcessors = 0
        self.serviceProcessors = 0
        self.entitiesSystem = 0
        self.service_per_shift = []
        self.service_per_total = []
        self.shift_durations = self.parameters.getParameters()[1]
        self.shift_next_time = self.shift_durations[0]
        self.shift_next_index = 1

        # Instance creation
        self.queue = Queue(Constants.SLOTS_BUFFER)
        self.parking = Queue(Constants.SLOTS_QUEUE)
        self.random = Random()
        for _ in range(0, num_processors):
            self.processors.append(Processor(self))
        for _ in range(0, num_sources):
            self.sources.append(Source(self))
        # Dependency injection
        for source in self.sources:
            source.addOutput(self.queue)  # source -> queue
        self.queue.addOutput(self.parking)  # queue -> parking
        self.parking.addInput(self.queue)  # parking <- queue
        for processor in self.processors:
            self.parking.addOutput(processor)  # parking -> processor
            processor.addInput(self.parking)  # processor <- parking
        self.output_file = None
        self.numberOfIdleProcessors = num_processors
Пример #6
0
 def setUp(self):
     mock_core = Core()
     mock_core.canHostEntity = MagicMock(return_value=True)
     mock_core.nextArrival = MagicMock()
     mock_core.decreaseEntitiesSystem = MagicMock()
     self.processorObj = Processor(mock_core)
Пример #7
0
class TestProcessor(TestCase):
    def setUp(self):
        mock_core = Core()
        mock_core.canHostEntity = MagicMock(return_value=True)
        mock_core.nextArrival = MagicMock()
        mock_core.decreaseEntitiesSystem = MagicMock()
        self.processorObj = Processor(mock_core)

    def tearDown(self):
        self.processorObj = None

    def test_isIdle(self):
        self.assertTrue(self.processorObj.isIdle(),
                        "The processor should be idle")

    def test_isIdle_notIdle(self):
        self.processorObj.hostedEntity = 1
        self.assertFalse(self.processorObj.isIdle(),
                         "The processor shouldn't be idle")

    def test_endService_empty_queue(self):
        mock_input = Queue()
        mock_input.getQueueLength = MagicMock(return_value=0)
        self.processorObj.addInput(mock_input)
        self.processorObj.hostedEntity = 1

        self.assertFalse(self.processorObj.isIdle(),
                         "The processor shouldn't be idle")
        self.processorObj.endService()
        self.assertTrue(self.processorObj.isIdle(),
                        "The processor should be idle")

    def test_endService_non_empty_queue(self):
        mock_input = Queue()
        mock_input.getQueueLength = MagicMock(return_value=1)
        mock_input.getEntity = MagicMock()
        self.processorObj.addInput(mock_input)
        self.processorObj.hostedEntity = 1

        self.assertFalse(self.processorObj.isIdle(),
                         "The processor shouldn't be idle")
        self.processorObj.endService()
        self.assertFalse(self.processorObj.isIdle(),
                         "The processor shouldn't be idle")
Пример #8
0
from src.WikiCrawler import Parser
from src.Processor import Processor
from src.ExtractKeyword import Comprehend, ConnectSql
import argparse
import sys

if __name__ == '__main__':
    param_topic = sys.argv[1]
    parser = Parser(param_topic)
    # #Required to run only when runnning the program the first time
    parser.checkRequirements()
    parser.returnData()

    parser = Processor(param_topic)
    parser.processor()

    proc = Comprehend(param_topic)
    proc.extract_keywords()
    proc.load_to_data()
Пример #9
0
import io
import os
from math import floor
from src.Processor import Processor
from flask import Flask, request, Response, json, jsonify
from flask_cors import CORS
from flask.helpers import send_file, send_from_directory
from PIL import Image

app = Flask(__name__, static_folder='../app/build') 
CORS(app)

proc = Processor()

def imgArray2stream(imgArr):
    out = Image.fromarray(imgArr)
    stream = io.BytesIO()
    out.save(stream, 'PNG')
    stream.seek(0)
    return stream


@app.route('/')
def hello_world():
    return send_from_directory(app.static_folder, "index.html")

@app.route("/<path:path>")
def static_proxy(path):
    """static folder serve"""
    file_name = path.split("/")[-1]
    dir_name = os.path.join(app.static_folder, "/".join(path.split("/")[:-1]))