Python Factory примеры использования

Язык программирования: Python

Пространство имен/Пакет: pydoop.pipes

Класс/Тип: Factory

Примеров на hotexamples.com: 14

Python Factory - 14 примеров найдено. Это лучшие примеры Python кода для pydoop.pipes.Factory, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Factory(11)

createRecordReader(2)

createMapper(1)

createPartitioner(1)

createReducer(1)

Пример #1

Показать файл

Файл: test_record_reader.py Проект: onlynone/pydoop

 def test_record_reader_from_cpluplus(self):
     d = {
         'input_key': 'inputkey',
         'input_value': 'inputvalue',
         'input_split': 'inputsplit',
         'input_key_class': 'keyclass',
         'input_value_class': 'valueclass',
         'job_conf': {}
     }
     ctx = pp.get_MapContext_object(d)
     self.assertEqual(ctx.getInputKey(), d['input_key'])
     self.assertEqual(ctx.getInputValue(), d['input_value'])
     self.assertEqual(ctx.getInputSplit(), d['input_split'])
     self.assertEqual(ctx.getInputKeyClass(), d['input_key_class'])
     self.assertEqual(ctx.getInputValueClass(), d['input_value_class'])
     f = Factory(None, None, test_record_reader)
     rr = f.createRecordReader(ctx)
     for i in range(test_record_reader.NUMBER_RECORDS):
         (f, k, v) = pp.get_record_from_record_reader(rr)
         self.assertTrue(f)
         self.assertEqual(k, test_record_reader.KEY_FORMAT % (i + 1))
         self.assertEqual(v, test_record_reader.DEFAULT_VALUE)
         self.assertAlmostEqual(
             pp.get_progress_from_record_reader(rr),
             float(i + 1) / test_record_reader.NUMBER_RECORDS)
     (f, k, v) = pp.get_record_from_record_reader(rr)
     self.assertFalse(f)

Пример #2

Показать файл

Файл: test_factory.py Проект: onlynone/pydoop

 def test_factory_costructor(self):
     f = Factory(mapper, reducer)
     self.failUnless(isinstance(f.createMapper(self.m_ctx), mapper))
     self.failUnless(isinstance(f.createReducer(self.r_ctx), reducer))
     #--
     f = Factory(mapper, reducer, record_reader)
     self.failUnless(isinstance(f.createMapper(self.m_ctx), mapper))
     self.failUnless(isinstance(f.createReducer(self.r_ctx), reducer))
     self.failUnless(
         isinstance(f.createRecordReader(self.m_ctx), record_reader))

Пример #3

Показать файл

Файл: seqal_run.py Проект: ilveroluca/seal

def run_job():
    """
    Runs the Hadoop pipes task through Pydoop
    """
    from pydoop.pipes import runTask, Factory
    from seal.seqal.mapper import mapper
    from seal.seqal.reducer import reducer
    return runTask(Factory(mapper, reducer))

Пример #4

Показать файл

Файл: test_factory.py Проект: ZEMUSHKA/pydoop

 def test_factory_costructor(self):
   f = Factory(mapper, reducer)
   self.failUnless(isinstance(f.createMapper(self.m_ctx), mapper))
   self.failUnless(isinstance(f.createReducer(self.r_ctx), reducer))
   #--
   f = Factory(mapper, reducer, record_reader)
   self.failUnless(isinstance(f.createMapper(self.m_ctx), mapper))
   self.failUnless(isinstance(f.createReducer(self.r_ctx), reducer))
   self.failUnless(isinstance(f.createRecordReader(self.m_ctx), record_reader))

Пример #5

Показать файл

 def test_partitioner_from_cpluplus(self):
     d = {
         'input_key': 'inputkey',
         'input_value': 'inputvalue',
         'input_split': 'inputsplit',
         'input_key_class': 'keyclass',
         'input_value_class': 'valueclass',
         'job_conf': {}
     }
     ctx = pp.get_MapContext_object(d)
     self.assertEqual(ctx.getInputKey(), d['input_key'])
     self.assertEqual(ctx.getInputValue(), d['input_value'])
     self.assertEqual(ctx.getInputSplit(), d['input_split'])
     self.assertEqual(ctx.getInputKeyClass(), d['input_key_class'])
     self.assertEqual(ctx.getInputValueClass(), d['input_value_class'])
     f = Factory(None, None, partitioner_class=test_partitioner)
     p = f.createPartitioner(ctx)
     n_partitions = 4
     for i in range(10):
         k = 'key' + ('a' * i)
         self.assertEqual(
             partition_function(k, n_partitions),
             pp.get_partition_from_partitioner(p, k, n_partitions))

Пример #6

Показать файл

Файл: test_partitioner.py Проект: ZEMUSHKA/pydoop

 def test_partitioner_from_cpluplus(self):
   d = {'input_key' : 'inputkey',
        'input_value' : 'inputvalue',
        'input_split' : 'inputsplit',
        'input_key_class' : 'keyclass',
        'input_value_class' : 'valueclass',
        'job_conf' : {}
        }
   ctx = pp.get_MapContext_object(d)
   self.assertEqual(ctx.getInputKey(), d['input_key'])
   self.assertEqual(ctx.getInputValue(), d['input_value'])
   self.assertEqual(ctx.getInputSplit(), d['input_split'])
   self.assertEqual(ctx.getInputKeyClass(), d['input_key_class'])
   self.assertEqual(ctx.getInputValueClass(), d['input_value_class'])
   f = Factory(None, None, partitioner_class=test_partitioner)
   p = f.createPartitioner(ctx)
   n_partitions = 4
   for i in range(10):
     k = 'key' + ('a' * i)
     self.assertEqual(
       partition_function(k, n_partitions),
       pp.get_partition_from_partitioner(p, k, n_partitions)
       )

Пример #7

Показать файл

Файл: test_record_reader.py Проект: ZEMUSHKA/pydoop

 def test_record_reader_from_cpluplus(self):
   d = {'input_key' : 'inputkey',
        'input_value' : 'inputvalue',
        'input_split' : 'inputsplit',
        'input_key_class' : 'keyclass',
        'input_value_class' : 'valueclass',
        'job_conf' : {}}
   ctx = pp.get_MapContext_object(d)
   self.assertEqual(ctx.getInputKey(), d['input_key'])
   self.assertEqual(ctx.getInputValue(), d['input_value'])
   self.assertEqual(ctx.getInputSplit(), d['input_split'])
   self.assertEqual(ctx.getInputKeyClass(), d['input_key_class'])
   self.assertEqual(ctx.getInputValueClass(), d['input_value_class'])
   f = Factory(None, None, test_record_reader)
   rr = f.createRecordReader(ctx)
   for i in range(test_record_reader.NUMBER_RECORDS):
     (f, k, v) = pp.get_record_from_record_reader(rr)
     self.assertTrue(f)
     self.assertEqual(k, test_record_reader.KEY_FORMAT % (i+1))
     self.assertEqual(v, test_record_reader.DEFAULT_VALUE)
     self.assertAlmostEqual(pp.get_progress_from_record_reader(rr),
                            float(i+1)/test_record_reader.NUMBER_RECORDS)
   (f, k, v) = pp.get_record_from_record_reader(rr)
   self.assertFalse(f)

Пример #8

Показать файл

Файл: test_factory.py Проект: onlynone/pydoop

 def test_map_reduce_factory(self):
     import gc
     self.__check_ctx()
     mapper.call_history = []
     reducer.call_history = []
     mf = Factory(mapper, reducer)
     gc.collect()  # clean up existing references
     pp.try_factory_internal(mf)
     self.assertEqual(0, gc.collect())
     self.assertEqual(len(mapper.call_history), 2)
     self.assertEqual(len(reducer.call_history), 2)
     f = pp.TestFactory(mf)
     self.failUnless(isinstance(f.createMapper(self.m_ctx), mapper))
     self.failUnless(isinstance(f.createReducer(self.r_ctx), reducer))
     self.assertEqual(len(mapper.call_history), 3)
     self.assertEqual(len(reducer.call_history), 3)
     self.assertEqual(0, gc.collect())

Пример #9

Показать файл

import struct
from pydoop.pipes import Mapper, Reducer, Factory, runTask
from pydoop.utils import jc_configure_int


class FilterMapper(Mapper):
    """
  Process a wordcount output stream, emitting only records relative to
  words whose count is equal to or above the configured threshold.
  """
    def __init__(self, context):
        super(FilterMapper, self).__init__(context)
        jc = context.getJobConf()
        jc_configure_int(self, jc, "filter.occurrence.threshold", "threshold")

    def map(self, context):
        word, occurrence = (context.getInputKey(), context.getInputValue())
        occurrence = struct.unpack(">i", occurrence)[0]
        if occurrence >= self.threshold:
            context.emit(word, str(occurrence))


class FilterReducer(Reducer):
    def reduce(self, context):
        pass


if __name__ == "__main__":
    runTask(Factory(FilterMapper, FilterReducer))

Пример #10

Показать файл

Файл: __init__.py Проект: onlynone/pydoop

def run_task():
    return runTask(Factory(Mapper, Reducer, combiner_class=Reducer))

Пример #11

Показать файл

def run_task():
    return runTask(Factory(mapper, reducer))

Пример #12

Показать файл

Файл: distblast_pipes.py Проект: Pfiver/RNA-Seqlyze

def main(argv):
    runTask(Factory(FastaMapper, FastaReducer,
                    record_reader_class=FastaReader))

Пример #13

Показать файл

def run_task():
  return runTask(Factory(Mapper, Reducer))

Пример #14

Показать файл

#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# END_COPYRIGHT

import struct
from pydoop.pipes import Mapper, Reducer, Factory, runTask


class WordCountMapper(Mapper):
    def map(self, context):
        words = context.getInputValue().split()
        for w in words:
            context.emit(w, "1")


class WordCountReducer(Reducer):
    def reduce(self, context):
        s = 0
        while context.nextValue():
            s += int(context.getInputValue())
        context.emit(context.getInputKey(), struct.pack(">i", s))


if __name__ == "__main__":
    runTask(Factory(WordCountMapper, WordCountReducer))