Exemplo n.º 1
0
def main():
    friendships = open(sys.argv[1])
    MapReduce.execute(friendships, mapper, reducer)
Exemplo n.º 2
0
def main():
    # Assumes first argument is a file of json objects formatted as strings,
    #one per line.
    MapReduce.execute(open(sys.argv[1]), mapper2, reducer2)
Exemplo n.º 3
0
import sys
from MapReduce import *

fmr=MapReduce()

def reducer(person,count_list):
    fmr.emit((person,sum(count_list)))

def mapper(record):
    fmr.emit_intermediate(record[0],1)

if __name__=='__main__':
    fdata=open(sys.argv[1])
    fmr.execute(fdata,mapper,reducer)
Exemplo n.º 4
0
import sys
from MapReduce import *

indexmr = MapReduce()


def mapper(record):
    book = record[0]
    words = record[1].split()
    for word in words:
        indexmr.emit_intermediate(word, book)


def reducer(word, books):
    indexmr.emit((word, books))


if __name__ == '__main__':
    data = open(sys.argv[1])
    indexmr.execute(data, mapper, reducer)
from MapReduce import *
import sys

mr = MapReduce()

def mapper(record):
    mr.emit_intermediate((record[0], record[1]), 1)
    mr.emit_intermediate((record[1], record[0]), 1)

def reducer(key, list_of_values):
	if len(list_of_values) > 1:
		return
	mr.emit(key)

inputdata = open(sys.argv[1])
mr.execute(inputdata, mapper, reducer)
Exemplo n.º 6
0
import sys
from MapReduce import *

dnarm = MapReduce()


def mapper(record):
    kind = record[0]
    trimseq = record[1][0:-10]
    dnarm.emit_intermediate(trimseq, kind)


def reducer(trimseq, kinds):
    dnarm.emit((trimseq, kinds))


if __name__ == '__main__':
    DNAseq = open(sys.argv[1])
    dnarm.execute(DNAseq, mapper, reducer)
    print[i[1] for i in dnarm.result]
Exemplo n.º 7
0
def main():
    persons = open(sys.argv[1])
    MapReduce.execute(persons, mapper, reducer)
Exemplo n.º 8
0
def main():
    MapReduce.execute(open(sys.argv[1]), mapper, reducer)
Exemplo n.º 9
0
import sys
from MapReduce import *

joinmr=MapReduce()

def mapper(record):
    key=record[1]
    value=record
    joinmr.emit_intermediate(key,value)

def reducer(orderid,ord_list):
    order=[record[2:] for record in ord_list if record[0]=='order']
    item=[record[2:] for record in ord_list if record[0]=='line_item']
    for orec in order:
        for irec in item:
            joinmr.emit(tuple([orderid]+orec+irec))

if __name__=='__main__':
    jsdata=open(sys.argv[1])
    joinmr.execute(jsdata,mapper,reducer)
    count=0
    for i in joinmr.result:
        count+=1
        print 'record N0.',count,i
Exemplo n.º 10
0
import sys
from MapReduce import *

dnarm=MapReduce()

def mapper(record):
    kind=record[0]
    trimseq=record[1][0:-10]
    dnarm.emit_intermediate(trimseq,kind)

def reducer(trimseq,kinds):
    dnarm.emit((trimseq,kinds))

if __name__=='__main__':
    DNAseq=open(sys.argv[1])
    dnarm.execute(DNAseq,mapper,reducer)
    print [i[1] for i in dnarm.result]
Exemplo n.º 11
0
Arquivo: mult.py Projeto: wgwz/courses
def main():
    records_file = open(sys.argv[1])
    MapReduce.execute(records_file, mapper, reducer)
Exemplo n.º 12
0
import sys
from MapReduce import *

fmr = MapReduce()


def reducer(person, count_list):
    fmr.emit((person, sum(count_list)))


def mapper(record):
    fmr.emit_intermediate(record[0], 1)


if __name__ == '__main__':
    fdata = open(sys.argv[1])
    fmr.execute(fdata, mapper, reducer)
Exemplo n.º 13
0
def main():
    friendships = open(sys.argv[1])
    MapReduce.execute(friendships, mapper, reducer)
Exemplo n.º 14
0
    docid.add(record[0].encode('utf-8'))
    term.add(record[1].encode('utf-8'))
docid = list(docid)
term = list(term)
conn.close()

#pre-define the sizes of a&b, which are L*M&M*N
M = len(docid)
matmr = MapReduce()


def mapper(record):
    for m in range(M + 1):
        matmr.emit_intermediate((record[0], m), ('a', record[1], record[2]))
        matmr.emit_intermediate((m, record[0]), ('b', record[1], record[2]))


def reducer(key, klist):
    d = {}
    d['a'] = {}
    d['b'] = {}
    maxm = 0
    for t in klist:
        d[t[0]][t[1]] = t[2]
    mult = sum([d['a'].get(i, 0) * d['b'].get(i, 0) for i in term])
    matmr.emit((key[0], key[1], mult))


if __name__ == '__main__':
    matmr.execute(matrix, mapper, reducer)
Exemplo n.º 15
0
import sys
from MapReduce import *

indexmr=MapReduce()
def mapper(record):
    book=record[0]
    words=record[1].split()
    for word in words:
        indexmr.emit_intermediate(word,book)

def reducer(word,books):
    indexmr.emit((word,books))

if __name__=='__main__':
    data=open(sys.argv[1])
    indexmr.execute(data,mapper,reducer)
Exemplo n.º 16
0
import sys
from MapReduce import *

joinmr = MapReduce()


def mapper(record):
    key = record[1]
    value = record
    joinmr.emit_intermediate(key, value)


def reducer(orderid, ord_list):
    order = [record[2:] for record in ord_list if record[0] == 'order']
    item = [record[2:] for record in ord_list if record[0] == 'line_item']
    for orec in order:
        for irec in item:
            joinmr.emit(tuple([orderid] + orec + irec))


if __name__ == '__main__':
    jsdata = open(sys.argv[1])
    joinmr.execute(jsdata, mapper, reducer)
    count = 0
    for i in joinmr.result:
        count += 1
        print 'record N0.', count, i
Exemplo n.º 17
0
def main():
    dna = open(sys.argv[1])
    MapReduce.execute(dna, mapper, reducer)
Exemplo n.º 18
0
def main ():
    # Assumes first argument is a file of json objects formatted as strings,
    #one per line.
    MapReduce.execute(open(sys.argv[1]), mapper2, reducer2)
Exemplo n.º 19
0
def main():
    dna = open(sys.argv[1])
    MapReduce.execute(dna, mapper, reducer)
Exemplo n.º 20
0
for record in cursor:
    matrix.append(record)
    docid.add(record[0].encode('utf-8'))
    term.add(record[1].encode('utf-8'))
docid=list(docid)
term=list(term)
conn.close()

#pre-define the sizes of a&b, which are L*M&M*N
M=len(docid)
matmr=MapReduce()

def mapper(record):
    for m in range(M+1):
        matmr.emit_intermediate((record[0],m),('a',record[1],record[2]))
        matmr.emit_intermediate((m,record[0]),('b',record[1],record[2]))

def reducer(key,klist):
    d={}
    d['a']={}
    d['b']={}
    maxm=0
    for t in klist:
        d[t[0]][t[1]]=t[2]
    mult=sum([d['a'].get(i,0)*d['b'].get(i,0) for i in term])
    matmr.emit((key[0],key[1],mult))

if __name__=='__main__':
    matmr.execute(matrix,mapper,reducer)

Exemplo n.º 21
0
def main():
    persons = open(sys.argv[1])
    MapReduce.execute(persons, mapper, reducer)