class BaseFactory(object):
    def __init__(self):
        self.mapper = Mapper()

    def add_trait(self, *args):
        for flavour in args:
            flavour(self)
    
    def __call__(self, data):
        if self.__doc__ is None:
            raise RuntimeError("Factories not implementing __call__ must provide a docstring")

        obj = ETtree.Element(self.__doc__)
        self.mapper.map(data).into(obj)
        return obj
Exemplo n.º 2
0
class Worker:
    def __init__(self, fs, name, addr, opts):
        self.addr = addr
        self.jt_addr = opts["jt_addr"]
        self.jt = ServerProxy(self.jt_addr)
        self.hb_timeout = 0.2  # heartbeat timeout in seconds
        self.on = True
        self.mapper = Mapper(opts, fs, "map" + name, addr)
        self.reducer = Reducer(fs, "reduce" + name, addr, opts,
                               RPCMapperClient())

    def start(self):
        print('Init worker')
        print('Start sending heartbeats to', self.jt_addr)
        _thread.start_new_thread(self._heartbeat, ())
        print('Server is ready')

    def _heartbeat(self):
        while self.on:
            try:
                self.jt.heartbeat(self.addr)
            except Exception as e:
                print(e)
            time.sleep(self.hb_timeout)

    # map data by applying some data function
    # task_id - unique task_id
    # reducers_count - number of reducers for the task
    # chunk_path - DFS path to the chunk file to map
    # map_script - DFS path to script of map function
    # restart_task - if True then restart map task even its already completed or executing now
    def map(self,
            task_id,
            rds_count,
            chunk_path,
            map_script,
            restart_task=False):
        return self.mapper.map(task_id, rds_count, chunk_path, map_script,
                               restart_task)

    # get status of task execution for the current task
    def get_status(self, task_id, chunk_path):
        return self.mapper.get_status(task_id, chunk_path)

    # read mapped data for specific region
    # task_id - unique task_id
    # region - is a integer region which is specified for the current reducer
    # Return dict {status: Status.ok, data: list of tuples}
    # if file not exists then status = Status.not_found
    # if file is empty then returns ok and empty list
    def read_mapped_data(self, task_id, region_number):
        return self.mapper.read_mapped_data(task_id, region_number)

    # signal from JT for starting reducing
    # task_id - unique task_id
    # region for which reducer is responsible
    # mappers which contain data for current task
    # path in DFS to files
    def reduce(self, task_id, region, mappers, script_path):
        return self.reducer.reduce(task_id, region, mappers, script_path)
Exemplo n.º 3
0
    def run(self, config_json, to_file=True):
        self.create_dirs()
        results = None

        try:
            config = Config(config_json, self.database_config)

            source_sparql = SPARQL(config, 'source')
            target_sparql = SPARQL(config, 'target')

            info_logger = InfoLogger(
                'InfoLogger', '{}_{}'.format(source_sparql.get_query_hash(),
                                             target_sparql.get_query_hash()))

            source_cache = Cache(info_logger, config, source_sparql, 'source')
            source_cache.create_cache()

            target_cache = Cache(info_logger, config, target_sparql, 'target')
            target_cache.create_cache()

            mapper = Mapper(info_logger, config, source_sparql, target_sparql)
            results = mapper.map(to_file)
        except ConfigNotValidError as e:
            results = "Config not valid"
            print(e)
        except HTTPError as e:
            print(e)
        except JSONDecodeError as e:
            print(e)

        return results
Exemplo n.º 4
0
def job(grouperNum, chunksQueue, listSaveStateNameGrouper, listListLastCallNum):
    print 'Starting worker ' + str(grouperNum)      
    while True:
        # Get new chunck to process
        chunk = chunksQueue.get()         
        # Work
        print 'Worker ' + str(grouperNum) + ' mapping chunk ' + str(chunk)
        MapIterator = MapChunkIterator(mapChunksNameGenerator(chunk)) # Iterator to iterate through the chunck        
        theContext = MapContext(groupChunksNameGenerator(chunk),MapIterator)        
        Mapper.map(theContext)
        print 'Worker ' + str(grouperNum) + ' grouping locally chunck ' + str(chunk)        
        idx = listListLastCallNum[grouperNum]+1        
        theGrouper = Grouper(grouperNum,idx,idx-1,directory);        
        listSaveStateNameGrouper[grouperNum] = theGrouper.group(theContext)
        listListLastCallNum[grouperNum] = idx ;      
        # "Close" chunk
        chunksQueue.task_done()
Exemplo n.º 5
0
class TestMapperMethod(unittest.TestCase):
    def setUp(self):
        self.mapper = Mapper("data/refFlatMm10.txt")

    def test_normal_case(self):
        cds_pos, aa_pos = self.mapper.map(101153495, "NM_146145")
        self.assertEqual(cds_pos, 3462)
        self.assertEqual(aa_pos, 1154)

    def test_coord_in_intron(self):
        cds_pos, aa_pos = self.mapper.map(101153494, "NM_146145")
        self.assertEqual(cds_pos, None)
        self.assertEqual(aa_pos, None)

    def test_refseq_id_not_in_file(self):
        cds_pos, aa_pos = self.mapper.map(101153495, "NM_899287")
        self.assertEqual(cds_pos, None)
        self.assertEqual(aa_pos, None)
Exemplo n.º 6
0
class TestMapperMethod(unittest.TestCase):

    def setUp(self):
        self.mapper = Mapper("data/refFlatMm10.txt")

    
    def test_normal_case(self):
        cds_pos, aa_pos = self.mapper.map(101153495, "NM_146145")
        self.assertEqual(cds_pos, 3462)
        self.assertEqual(aa_pos, 1154)


    def test_coord_in_intron(self):
        cds_pos, aa_pos = self.mapper.map(101153494, "NM_146145")
        self.assertEqual(cds_pos, None)
        self.assertEqual(aa_pos, None)


    def test_refseq_id_not_in_file(self):
        cds_pos, aa_pos = self.mapper.map(101153495, "NM_899287")
        self.assertEqual(cds_pos, None)
        self.assertEqual(aa_pos, None)
Exemplo n.º 7
0
    def run(self, Mapper, Reducer, data):
        #map
        mapper = Mapper()
        tuples = mapper.map(data)

        #combine
        combined = {}
        for k, v in tuples:
            if k not in combined:
                combined[k] = []
            combined[k].append(v)

        #reduce
        reducer = Reducer()
        output = reducer.reduce(combined)

        for line in output:
            print(line)