Python MapReduce.execute примеры использования

Язык программирования: Python

Пространство имен/Пакет: MapReduce

Класс/Тип: MapReduce

Метод/Функция: execute

Примеров на hotexamples.com: 7

Python MapReduce.execute - 7 примеров найдено. Это лучшие примеры Python кода для MapReduce.MapReduce.execute, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MapReduce(4)

import_map_reduce(4)

execute(2)

wait_untill_finish(2)

map_reduce(1)

save_map_reduce(1)

write_dict(1)

write_dict_func(1)

Пример #1

Показать файл

from MapReduce import MapReduce

mr = MapReduce()


def mapper(record):
    # key: word
    # value: filename
    value = record[0]
    text = record[1]
    for key in text.split():
        mr.emit_intermediate(key, value)


def reducer(key, values):
    # key: word
    # value: list of filenames
    mr.emit((key, list(set(values))))


if __name__ == '__main__':
    import sys, json
    inputdata = open(sys.argv[1])
    # inputdata = open('./data/books.json')
    mr.execute(inputdata, mapper, reducer)

    with open('inverted_index.json', 'w') as outfile:
        json.dump(mr.result, outfile)

Пример #2

Показать файл

Файл: Join.py Проект: teja-t/MapReduce_Python

    order_id = record[1]

    #Mapper will be called on all the orders first(As the data file contains the records of orders before line_items)
    #As the output of mapper is fed to reducer,all the output to be displayed need to be present in the output of the Mapper.
    #So emit_intermdiate all the records with order_id as the key
    #So for every record the id,record is emmited
    if record_type == "order":
        mr.emit_intermediate(order_id,record)
    elif record_type == "line_item":
        mr.emit_intermediate(order_id,record)

#Reducer funtion is called for every record in the output of the map phase (here it is the global dictionary mr.intermediate)
def reducer(key,list_of_values):
    #for all list_of_values first field will be order record and all the others will be line_item records
    #ie list_of_values[0] will be the orders and list_of_values[1:n] will be the list_item records

    #Every order is emitted with all the list_items having the same order ID
    current = 1;
    while current < len(list_of_values):
        mr.emit((list_of_values[0],list_of_values[current]))
        current = current + 1;

#####################################################
if __name__ == '__main__':
  inputdata = open(sys.argv[1])
  #execute funtion steps
  #1.Mapper function for every record (Every mapper appends the output to a global dictionary by calling the functin mr.emit_intermediate ..Line number 18 )
  #2.Reducer is called for every record in the output of the map phase (Every reducer appends the data to the global array mr.result)
  #3.The final result array is printed
  mr.execute(inputdata, mapper, reducer)

Пример #3

Показать файл

Файл: son_phase2.py Проект: TanshiSharma/course-work

from MapReduce import MapReduce
map_reduce_obj = MapReduce()


def mapper(record):
    number_of_baskets=len(record)
    candidate_list=open(sys.argv[2])
    for candidate in candidate_list:
        count=0
        candidate=json.loads(candidate.strip())
        for candidate_chunk in record:
            if not set(candidate)-set(candidate_chunk):
               count+=1
        map_reduce_obj.emit_intermediate(tuple(candidate),(count,number_of_baskets))


def reduce(key,list_of_value):
    total_count=0
    total_baskets=0
    for item in list_of_value:
        total_count+=item[0]
        total_baskets+=item[1]
    threshold=ceil(total_baskets*0.3)
    if total_count>=threshold:
        map_reduce_obj.emit([list(key),total_count])


if __name__=='__main__':
    input_data = open(sys.argv[1])
    map_reduce_obj.execute(input_data, mapper, reduce)

Пример #4

Показать файл

Файл: dna.py Проект: ramovsky/datasci_course_materials

import sys
from MapReduce import MapReduce

mr = MapReduce()

def mapper(record):
    # key: document identifier
    # value: document contents
    key, seq = record
    mr.emit_intermediate(seq[:-10], 1)

# Part 3
def reducer(key, list_of_values):
    # key: word
    # value: list of occurrence counts
    mr.emit(key)

# Part 4
with open(sys.argv[1]) as f:
    mr.execute(f, mapper, reducer)

Пример #5

Показать файл

Файл: demo_MatrixVectorMultiplication.py Проект: neosky2142/PyMR

from MapperMatrixVector import MapperMatrixVector
from ReducerMatrixVector import ReducerMatrixVector
from MapReduce import MapReduce
from FileHelper import FileHelper

# Create instances for mapper and reducer
# Note that the vector is stored in the instance
theReducerMatrixVector = ReducerMatrixVector();
theMapperMatrixVector = MapperMatrixVector('dataFiles/b');

# the file where the matrix is stored
matrixFile = ['dataFiles/A'];

# MapReduce
theMapReducerMatrixVector = MapReduce(theMapperMatrixVector,theReducerMatrixVector,matrixFile,0,1)
resultDict = theMapReducerMatrixVector.execute();

# Write output
outFileFirectory = 'outputs/'
outfileName = 'matrixVectorResults.txt';
FileHelper.writeDictionnary(outFileFirectory+outfileName,resultDict)

Пример #6

Показать файл

Файл: demo_CountingWords.py Проект: neosky2142/PyMR

from MapperCountingWords import MapperCountingWords
from ReducerCountingWords import ReducerCountingWords
from MapReduce import MapReduce
from FileHelper import FileHelper

# Create instances for mapper and reducer
theMapper = MapperCountingWords();
theReducer = ReducerCountingWords();

# parse the file : one word/line
inFiles = ['dataFiles/text'];

# we can have more than one text file
inFileParsed = 'dataFiles/textParsed';
FileHelper.transformTextIntoListOfWords(inFiles,inFileParsed)

# MapReduce
theMapReducer = MapReduce(theMapper,theReducer,[inFileParsed],silent=-1,nThreads=5)
resultDict = theMapReducer.execute()

# Write output
outFileFirectory = 'outputs/'
outfileName = 'coutingWordsResults.txt';
FileHelper.writeDictionnary(outFileFirectory+outfileName,resultDict)

Пример #7

Показать файл

from MapReduce import MapReduce
import itertools
import sys

map_reduce_obj = MapReduce()


def mapper(record):
    map_reduce_obj.emit_intermediate(record[0], record[1])
    map_reduce_obj.emit_intermediate(record[1], record[0])


def reducer(key, list_of_values):
    value_group = list(itertools.combinations(list_of_values, 2))
    for value in value_group:
        value = list(value)
        value.sort()
        value.append(key)
        map_reduce_obj.emit(value)


if __name__ == '__main__':
    input_data = open(sys.argv[1])
    map_reduce_obj.execute(input_data, mapper, reducer)