def bic_using_mapreduce(self, iteration_bic_list, em_iters):
     mr_args = ['-v', '--strict-protocols', '-r', 'local','--input-protocol', 'pickle','--output-protocol','pickle','--protocol','pickle']
     input = []
     l = len(iteration_bic_list)
     for gmm1idx in range(l):
         for gmm2idx in range(gmm1idx+1, l):
             g1, d1 = iteration_bic_list[gmm1idx]
             g2, d2 = iteration_bic_list[gmm2idx]
             data = np.concatenate((d1,d2))
             an_item = protocol.write((gmm1idx,gmm2idx),(g1, g2, data, em_iters))
             input.append(an_item+"\n")     
 
     job = BICMRJob(args=mr_args).sandbox(stdin=input)
     runner = job.make_runner()
     runner.run()
     kv_pairs = map(job.parse_output_line, runner.stream_output())
     assert len(kv_pairs) == 1
     best_merged_gmm, merged_tuple, merged_tuple_indices, best_score = kv_pairs[0][1]
     
     ind1, ind2 = merged_tuple_indices
     g1, d1 = iteration_bic_list[ind1]
     g2, d2 = iteration_bic_list[ind2]
     data = np.concatenate((d1,d2))
     new_gmm, score = compute_distance_BIC(g1, g2, data, em_iters)
         
     return new_gmm, (g1, g2), merged_tuple_indices, best_score      
示例#2
0
 def internal_protocol(self):
     if self.options.internal_format == 'json':
         return StandardJSONProtocol()
     elif self.options.internal_format == 'pickle':
         return PickleProtocol()
     elif self.options.internal_format == 'raw':
         return RawProtocol()
示例#3
0
def diarize_all( infilenames ):
        mr_args = ['-v', '--strict-protocols',
            '-r', 'hadoop',
            '--input-protocol', 'pickle',
            '--output-protocol','pickle',
            '--protocol','pickle'
        ]
        task_args = [protocol.write(name, None)+"\n" for name in meeting_names]
        job = ClusterMRJob(args=mr_args).sandbox(stdin=task_args)
        runner = job.make_runner()
        runner.run()
 def train_using_mapreduce(self, init_training, em_iters):
     mr_args = ['-v', '--strict-protocols', '-r', 'hadoop','--input-protocol', 'pickle','--output-protocol','pickle','--protocol','pickle']    
     input = []
     count = 0
     for pair in init_training:
         input.append((count, pair, em_iters))
         count = count+1
     task_args = [protocol.write(pair, None)+"\n" for pair in input]
     job = TrainMRJob(args=mr_args).sandbox(stdin=task_args)
     runner = job.make_runner()        
     runner.run()
     kv_pairs = map(job.parse_output_line, runner.stream_output())
     #keys = map(lambda(k, v): k, kv_pairs)
     #print "Returned keys:", keys
     return map(lambda(k, v): v, kv_pairs)  
 def score_using_mapreduce(self, gmm_list):
     mr_args = ['-v', '--strict-protocols', '-r', 'hadoop','--input-protocol', 'pickle','--output-protocol','pickle','--protocol','pickle']
     input = []
     count = 0
     for g in gmm_list:
         input.append((count, g)) #, self.X))
         count = count + 1
     task_args = [protocol.write(g, None)+"\n" for g in input]
     job = ScoreMRJob(args=mr_args).sandbox(stdin=task_args)
     runner = job.make_runner()
     runner.run()
     kv_pairs = map(job.parse_output_line, runner.stream_output())
     #keys = map(lambda(k, v): k, kv_pairs)
     #print "Returned keys:", keys
     return map(lambda(k, v): v, kv_pairs)  
 def train_using_mapreduce(self, init_training, em_iters):
     mr_args = ['-v', '--strict-protocols', '-r', 'hadoop','--input-protocol', 'pickle','--output-protocol','pickle','--protocol','pickle']    
     input = []
     count = 0
     t = time.time()
     for pair in init_training:
         g, start, interval = pair
         #g.initialize_asp_mod()
         input.append((count, pair, em_iters))
         count = count+1
     task_args = [protocol.write(pair, None)+"\n" for pair in input]
     print "[train] preparation time:", time.time()-t
     t = time.time()
     job = TrainMRJob(args=mr_args).sandbox(stdin=task_args)
     runner = job.make_runner()
     print "[train] init mrjob:", time.time()-t        
     runner.run()
     kv_pairs = map(job.parse_output_line, runner.stream_output())
     #keys = map(lambda(k, v): k, kv_pairs)
     #print "Returned keys:", keys
     return map(lambda(k, v): v, kv_pairs)  
 def segment_using_mapreduce(self, gmm_list, map_input, em_iter):
     pickle.dump(gmm_list, open('self_gmmlist', 'w'))
     os.chmod("self_gmmlist", S_IRUSR | S_IWUSR | S_IXUSR | \
                              S_IRGRP | S_IXGRP |           \
                              S_IROTH | S_IXOTH             )
     pickle.dump(em_iter, open('self_em_iter', 'w'))
     os.chmod("self_em_iter", S_IRUSR | S_IWUSR | S_IXUSR | \
                              S_IRGRP | S_IXGRP |           \
                              S_IROTH | S_IXOTH             )
     
     mr_args = ['-v', '--strict-protocols', '-r', 'hadoop','--input-protocol', 'pickle','--output-protocol','pickle','--protocol','pickle']        
     task_args = [protocol.write(i, None)+"\n" for i in map_input]
     job = SegmentMRJob(args=mr_args).sandbox(stdin=task_args)
     runner = job.make_runner()
     runner.run()
     kv_pairs = map(job.parse_output_line, runner.stream_output())
     iter_bic_list = map(lambda(k, v): k, kv_pairs)
     iter_bic_dict = {}
     for pair in kv_pairs:
         (p, data_indices), g = pair
         iter_bic_dict[p] = data_indices
         gmm_list[p] = g             #Update trained GMMs
     return iter_bic_dict, iter_bic_list
示例#8
0
 def test_bad_data(self):
     self.assertCantDecode(PickleProtocol(), b'{@#$@#!^&*$%^')
示例#9
0
 def test_round_trip_with_trailing_tab(self):
     for k, v in PICKLE_KEYS_AND_VALUES:
         self.assertRoundTripWithTrailingTabOK(PickleProtocol(), k, v)
示例#10
0
 def test_round_trip(self):
     for k, v in PICKLE_KEYS_AND_VALUES:
         self.assertRoundTripOK(PickleProtocol(), k, v)
示例#11
0
 
all_meeting_names = [
'E001/HVC006045', 'E001/HVC006184', 'E001/HVC011409', 'E001/HVC022974', 'E001/HVC026971', 'E001/HVC027850', 'E001/HVC029485', 'E001/HVC031151', 'E001/HVC032158', 'E001/HVC036790', 'E001/HVC040785', 'E001/HVC042692', 'E001/HVC049437', 'E001/HVC064215', 'E001/HVC067270', 'E001/HVC068504', 'E001/HVC087057', 'E001/HVC090414', 'E001/HVC091740', 'E001/HVC095303', 'E001/HVC098807', 'E001/HVC103302', 'E001/HVC103932', 'E001/HVC105766', 'E001/HVC108320', 'E001/HVC109456', 'E001/HVC123288', 'E001/HVC134406', 'E001/HVC134634', 'E001/HVC146032', 'E001/HVC148793', 'E001/HVC152448', 'E001/HVC169936', 'E001/HVC174999', 'E001/HVC198184', 'E001/HVC201655', 'E001/HVC203988', 'E001/HVC218344', 'E001/HVC218765', 'E001/HVC240490', 'E001/HVC243157', 'E001/HVC255452', 'E001/HVC257987', 'E001/HVC259599', 'E001/HVC263724', 'E001/HVC268521', 'E001/HVC271903', 'E001/HVC283780', 'E001/HVC292292', 'E001/HVC293678', 'E001/HVC298568', 'E001/HVC309504', 'E001/HVC317804', 'E001/HVC319297', 'E001/HVC319600', 'E001/HVC320560', 'E001/HVC334254', 'E001/HVC359216', 'E001/HVC362146', 'E001/HVC364230', 'E001/HVC365963', 'E001/HVC377296', 'E001/HVC396658', 'E001/HVC397279', 'E001/HVC398674', 'E001/HVC402970', 'E001/HVC403628', 'E001/HVC417926', 'E001/HVC418407', 'E001/HVC425927', 'E001/HVC429818', 'E001/HVC434449', 'E001/HVC450989', 'E001/HVC460343', 'E001/HVC462262', 'E001/HVC462508', 'E001/HVC463620', 'E001/HVC467645', 'E001/HVC468477', 'E001/HVC473973', 'E001/HVC479361', 'E001/HVC486704', 'E001/HVC498099', 'E001/HVC499900', 'E001/HVC506183', 'E001/HVC507955', 'E001/HVC513054', 'E001/HVC515040', 'E001/HVC516366', 'E001/HVC520461', 'E001/HVC528762', 'E001/HVC528929', 'E001/HVC529613', 'E001/HVC532992', 'E001/HVC532993', 'E001/HVC539647', 'E001/HVC541506', 'E001/HVC542481', 'E001/HVC549861', 'E001/HVC553302', 'E001/HVC561733', 'E001/HVC562777', 'E001/HVC570651', 'E001/HVC573643', 'E001/HVC579741', 'E001/HVC591147', 'E001/HVC597104', 'E001/HVC602688', 'E001/HVC605240', 'E001/HVC606315', 'E001/HVC615626', 'E001/HVC616948', 'E001/HVC620320', 'E001/HVC631691', 'E001/HVC637907', 'E001/HVC646345', 'E001/HVC652043', 'E001/HVC658517', 'E001/HVC672564', 'E001/HVC676565', 'E001/HVC680956', 'E001/HVC681821', 'E001/HVC682794', 'E001/HVC683835', 'E001/HVC687278', 'E001/HVC690639', 'E001/HVC699748', 'E001/HVC705168', 'E001/HVC707236', 'E001/HVC711253', 'E001/HVC717615', 'E001/HVC718274', 'E001/HVC719228', 'E001/HVC726373', 'E001/HVC730049', 'E001/HVC730081', 'E001/HVC733376', 'E001/HVC742499', 'E001/HVC749106', 'E001/HVC766498', 'E001/HVC776608', 'E001/HVC786536', 'E001/HVC788532', 'E001/HVC789034', 'E001/HVC792251', 'E001/HVC803719', 'E001/HVC811971', 'E001/HVC822060', 'E001/HVC823377', 'E001/HVC823657', 'E001/HVC829585', 'E001/HVC834162', 'E001/HVC836424', 'E001/HVC846668', 'E001/HVC878334', 'E001/HVC881736', 'E001/HVC883718', 'E001/HVC887091', 'E001/HVC888814', 'E001/HVC891523', 
'E002/HVC013188', 'E002/HVC021844', 'E002/HVC027840', 'E002/HVC027879', 'E002/HVC028378', 'E002/HVC043460', 'E002/HVC050106', 'E002/HVC053039', 'E002/HVC055040', 'E002/HVC056946', 'E002/HVC057164', 'E002/HVC057244', 'E002/HVC067449', 'E002/HVC070937', 'E002/HVC075150', 'E002/HVC080067', 'E002/HVC081166', 'E002/HVC084897', 'E002/HVC087496', 'E002/HVC090118', 'E002/HVC090439', 'E002/HVC092481', 'E002/HVC110256', 'E002/HVC114107', 'E002/HVC115550', 'E002/HVC117893', 'E002/HVC118292', 'E002/HVC121841', 'E002/HVC127982', 'E002/HVC131494', 'E002/HVC132114', 'E002/HVC148914', 'E002/HVC164420', 'E002/HVC168889', 'E002/HVC188853', 'E002/HVC192860', 'E002/HVC198167', 'E002/HVC200173', 'E002/HVC217227', 'E002/HVC220724', 'E002/HVC232523', 'E002/HVC237868', 'E002/HVC239317', 'E002/HVC240101', 'E002/HVC240983', 'E002/HVC244418', 'E002/HVC256062', 'E002/HVC274889', 'E002/HVC280004', 'E002/HVC284062', 'E002/HVC284420', 'E002/HVC291067', 'E002/HVC297552', 'E002/HVC302964', 'E002/HVC305398', 'E002/HVC317191', 'E002/HVC318186', 'E002/HVC319130', 'E002/HVC329241', 'E002/HVC330358', 'E002/HVC330734', 'E002/HVC331229', 'E002/HVC331444', 'E002/HVC333068', 'E002/HVC333217', 'E002/HVC337634', 'E002/HVC337743', 'E002/HVC342710', 'E002/HVC357788', 'E002/HVC358793', 'E002/HVC371186', 'E002/HVC372625', 'E002/HVC378891', 'E002/HVC381000', 'E002/HVC381541', 'E002/HVC385947', 'E002/HVC386562', 'E002/HVC397476', 'E002/HVC406511', 'E002/HVC419946', 'E002/HVC425921', 'E002/HVC426636', 'E002/HVC448233', 'E002/HVC450975', 'E002/HVC454146', 'E002/HVC461207', 'E002/HVC468846', 'E002/HVC505164', 'E002/HVC508446', 'E002/HVC518505', 'E002/HVC524155', 'E002/HVC526932', 'E002/HVC528086', 'E002/HVC533619', 'E002/HVC543259', 'E002/HVC557970', 'E002/HVC562914', 'E002/HVC563762', 'E002/HVC568424', 'E002/HVC568645', 'E002/HVC569270', 'E002/HVC572422', 'E002/HVC572908', 'E002/HVC575229', 'E002/HVC575738', 'E002/HVC577808', 'E002/HVC588610', 'E002/HVC602589', 'E002/HVC613614', 'E002/HVC617138', 'E002/HVC620742', 'E002/HVC621080', 'E002/HVC622215', 'E002/HVC633752', 'E002/HVC639090', 'E002/HVC648237', 'E002/HVC650616', 'E002/HVC652771', 'E002/HVC669143', 'E002/HVC678577', 'E002/HVC685238', 'E002/HVC686035', 'E002/HVC686083', 'E002/HVC690403', 'E002/HVC693882', 'E002/HVC698972', 'E002/HVC699015', 'E002/HVC724149', 'E002/HVC725414', 'E002/HVC732985', 'E002/HVC738245', 'E002/HVC742544', 'E002/HVC765357', 'E002/HVC765442', 'E002/HVC768066', 'E002/HVC781040', 'E002/HVC781550', 'E002/HVC787358', 'E002/HVC788059', 'E002/HVC792707', 'E002/HVC800578', 'E002/HVC804688', 'E002/HVC805254', 'E002/HVC805769', 'E002/HVC820877', 'E002/HVC826976', 'E002/HVC827162', 'E002/HVC831492', 'E002/HVC843356', 'E002/HVC843604', 'E002/HVC848042', 'E002/HVC852977', 'E002/HVC860018', 'E002/HVC860974', 'E002/HVC862982', 'E002/HVC868918', 'E002/HVC875036', 'E002/HVC876313', 'E002/HVC885657', 'E002/HVC893844', 
'E003/HVC002471', 'E003/HVC010290', 'E003/HVC011142', 'E003/HVC013116', 'E003/HVC022125', 'E003/HVC026646', 'E003/HVC030573', 'E003/HVC050909', 'E003/HVC051190', 'E003/HVC056660', 'E003/HVC057936', 'E003/HVC079114', 'E003/HVC084129', 'E003/HVC088732', 'E003/HVC094992', 'E003/HVC097830', 'E003/HVC097953', 'E003/HVC113117', 'E003/HVC123174', 'E003/HVC134881', 'E003/HVC140256', 'E003/HVC142647', 'E003/HVC142782', 'E003/HVC160009', 'E003/HVC165613', 'E003/HVC170767', 'E003/HVC171466', 'E003/HVC173530', 'E003/HVC179653', 'E003/HVC207596', 'E003/HVC220088', 'E003/HVC232520', 'E003/HVC237814', 'E003/HVC241109', 'E003/HVC250919', 'E003/HVC260684', 'E003/HVC263516', 'E003/HVC264875', 'E003/HVC272944', 'E003/HVC275841', 'E003/HVC301641', 'E003/HVC311021', 'E003/HVC319510', 'E003/HVC319532', 'E003/HVC324098', 'E003/HVC333949', 'E003/HVC334179', 'E003/HVC339086', 'E003/HVC346678', 'E003/HVC349659', 'E003/HVC357301', 'E003/HVC359764', 'E003/HVC359906', 'E003/HVC366975', 'E003/HVC367820', 'E003/HVC381826', 'E003/HVC390470', 'E003/HVC392637', 'E003/HVC401883', 'E003/HVC406294', 'E003/HVC425997', 'E003/HVC437054', 'E003/HVC452915', 'E003/HVC464072', 'E003/HVC465110', 'E003/HVC474294', 'E003/HVC475360', 'E003/HVC484876', 'E003/HVC489318', 'E003/HVC493860', 'E003/HVC505619', 'E003/HVC512371', 'E003/HVC512747', 'E003/HVC516601', 'E003/HVC521057', 'E003/HVC525411', 'E003/HVC543004', 'E003/HVC544293', 'E003/HVC548415', 'E003/HVC564280', 'E003/HVC569017', 'E003/HVC576054', 'E003/HVC576211', 'E003/HVC580162', 'E003/HVC580360', 'E003/HVC586546', 'E003/HVC588380', 'E003/HVC590331', 'E003/HVC594719', 'E003/HVC614231', 'E003/HVC617286', 'E003/HVC621642', 'E003/HVC644560', 'E003/HVC648942', 'E003/HVC650537', 'E003/HVC650710', 'E003/HVC659528', 'E003/HVC699256', 'E003/HVC706735', 'E003/HVC711312', 'E003/HVC714194', 'E003/HVC732336', 'E003/HVC735666', 'E003/HVC736473', 'E003/HVC737048', 'E003/HVC737610', 'E003/HVC766947', 'E003/HVC770038', 'E003/HVC779856', 'E003/HVC788750', 'E003/HVC813558', 'E003/HVC816785', 'E003/HVC819685', 'E003/HVC832818', 'E003/HVC835953', 'E003/HVC850333', 'E003/HVC858361', 'E003/HVC859587', 
'E004/HVC002194', 'E004/HVC006498', 'E004/HVC017259', 'E004/HVC017564', 'E004/HVC020563', 'E004/HVC028110', 'E004/HVC032638', 'E004/HVC032883', 'E004/HVC037683', 'E004/HVC041266', 'E004/HVC046116', 'E004/HVC053311', 'E004/HVC062818', 'E004/HVC064403', 'E004/HVC069023', 'E004/HVC072350', 'E004/HVC074600', 'E004/HVC080138', 'E004/HVC086950', 'E004/HVC087373', 'E004/HVC096396', 'E004/HVC099975', 'E004/HVC104148', 'E004/HVC118260', 'E004/HVC122310', 'E004/HVC128242', 'E004/HVC129154', 'E004/HVC132154', 'E004/HVC135119', 'E004/HVC141088', 'E004/HVC143703', 'E004/HVC144025', 'E004/HVC145707', 'E004/HVC151805', 'E004/HVC156787', 'E004/HVC160818', 'E004/HVC169716', 'E004/HVC173306', 'E004/HVC187134', 'E004/HVC191915', 'E004/HVC199590', 'E004/HVC207621', 'E004/HVC216485', 'E004/HVC217451', 'E004/HVC224183', 'E004/HVC227619', 'E004/HVC227629', 'E004/HVC238985', 'E004/HVC239981', 'E004/HVC255020', 'E004/HVC256953', 'E004/HVC257153', 'E004/HVC260610', 'E004/HVC270837', 'E004/HVC273688', 'E004/HVC289649', 'E004/HVC290509', 'E004/HVC314148', 'E004/HVC314622', 'E004/HVC320733', 'E004/HVC344261', 'E004/HVC344981', 'E004/HVC352641', 'E004/HVC354277', 'E004/HVC357043', 'E004/HVC373590', 'E004/HVC386315', 'E004/HVC387362', 'E004/HVC388379', 'E004/HVC393854', 'E004/HVC408909', 'E004/HVC418580', 'E004/HVC421799', 'E004/HVC454515', 'E004/HVC459112', 'E004/HVC477496', 'E004/HVC478853', 'E004/HVC501898', 'E004/HVC501995', 'E004/HVC504909', 'E004/HVC516867', 'E004/HVC533832', 'E004/HVC534412', 'E004/HVC534905', 'E004/HVC552683', 'E004/HVC564882', 'E004/HVC566346', 'E004/HVC566365', 'E004/HVC573726', 'E004/HVC602555', 'E004/HVC606182', 'E004/HVC607421', 'E004/HVC618343', 'E004/HVC622940', 'E004/HVC631950', 'E004/HVC636230', 'E004/HVC641088', 'E004/HVC647848', 'E004/HVC657915', 'E004/HVC665516', 'E004/HVC670060', 'E004/HVC673789', 'E004/HVC679045', 'E004/HVC689791', 'E004/HVC691867', 'E004/HVC694478', 'E004/HVC696623', 'E004/HVC701564', 'E004/HVC726255', 'E004/HVC729977', 'E004/HVC734829', 'E004/HVC742023', 'E004/HVC756575', 'E004/HVC757202', 'E004/HVC774967', 'E004/HVC775405', 'E004/HVC796410', 'E004/HVC804283', 'E004/HVC808980', 'E004/HVC830046', 'E004/HVC832838', 'E004/HVC841343', 'E004/HVC868773', 'E004/HVC872705', 'E004/HVC877282', 
'E005/HVC001815', 'E005/HVC004300', 'E005/HVC005890', 'E005/HVC005964', 'E005/HVC012200', 'E005/HVC022138', 'E005/HVC023564', 'E005/HVC035487', 'E005/HVC040882', 'E005/HVC043377', 'E005/HVC045935', 'E005/HVC046304', 'E005/HVC046313', 'E005/HVC057844', 'E005/HVC063690', 'E005/HVC064074', 'E005/HVC065058', 'E005/HVC073105', 'E005/HVC074172', 'E005/HVC077818', 'E005/HVC088989', 'E005/HVC096784', 'E005/HVC100716', 'E005/HVC108443', 'E005/HVC123055', 'E005/HVC125425', 'E005/HVC128008', 'E005/HVC138841', 'E005/HVC139164', 'E005/HVC143935', 'E005/HVC144790', 'E005/HVC146766', 'E005/HVC161378', 'E005/HVC164396', 'E005/HVC166364', 'E005/HVC175371', 'E005/HVC184502', 'E005/HVC191544', 'E005/HVC191750', 'E005/HVC196318', 'E005/HVC208915', 'E005/HVC212733', 'E005/HVC218372', 'E005/HVC226879', 'E005/HVC231593', 'E005/HVC231921', 'E005/HVC232608', 'E005/HVC233099', 'E005/HVC241878', 'E005/HVC242191', 'E005/HVC247384', 'E005/HVC248604', 'E005/HVC252280', 'E005/HVC266789', 'E005/HVC267120', 'E005/HVC274623', 'E005/HVC277969', 'E005/HVC277973', 'E005/HVC294218', 'E005/HVC294628', 'E005/HVC297115', 'E005/HVC311423', 'E005/HVC327234', 'E005/HVC333813', 'E005/HVC341111', 'E005/HVC353976', 'E005/HVC355538', 'E005/HVC359409', 'E005/HVC366824', 'E005/HVC377327', 'E005/HVC377628', 'E005/HVC378325', 'E005/HVC387498', 'E005/HVC390322', 'E005/HVC390861', 'E005/HVC395138', 'E005/HVC396063', 'E005/HVC406059', 'E005/HVC406916', 'E005/HVC421398', 'E005/HVC444188', 'E005/HVC456084', 'E005/HVC463904', 'E005/HVC469637', 'E005/HVC485044', 'E005/HVC488890', 'E005/HVC513723', 'E005/HVC520194', 'E005/HVC524550', 'E005/HVC529203', 'E005/HVC536276', 'E005/HVC560786', 'E005/HVC562810', 'E005/HVC576475', 'E005/HVC579327', 'E005/HVC584095', 'E005/HVC587910', 'E005/HVC591407', 'E005/HVC601029', 'E005/HVC611258', 'E005/HVC612003', 'E005/HVC624891', 'E005/HVC634081', 'E005/HVC638068', 'E005/HVC642035', 'E005/HVC645251', 'E005/HVC647293', 'E005/HVC649471', 'E005/HVC652674', 'E005/HVC659154', 'E005/HVC692549', 'E005/HVC718045', 'E005/HVC722400', 'E005/HVC731770', 'E005/HVC734839', 'E005/HVC740175', 'E005/HVC741044', 'E005/HVC741067', 'E005/HVC746183', 'E005/HVC767025', 'E005/HVC769178', 'E005/HVC774615', 'E005/HVC776198', 'E005/HVC781384', 'E005/HVC783530', 'E005/HVC788725', 'E005/HVC790602', 'E005/HVC791390', 'E005/HVC792096', 'E005/HVC802868', 'E005/HVC818339', 'E005/HVC825954', 'E005/HVC828442', 'E005/HVC839042', 'E005/HVC849628', 'E005/HVC851359', 'E005/HVC854141', 'E005/HVC862010', 'E005/HVC866028', 'E005/HVC880356', 
'E006/HVC003127', 'E006/HVC014770', 'E006/HVC026344', 'E006/HVC028739', 'E006/HVC044384', 'E006/HVC046682', 'E006/HVC047785', 'E006/HVC048803', 'E006/HVC049355', 'E006/HVC053019', 'E006/HVC055364', 'E006/HVC057950', 'E006/HVC059513', 'E006/HVC060224', 'E006/HVC062957', 'E006/HVC065131', 'E006/HVC075202', 'E006/HVC079072', 'E006/HVC089951', 'E006/HVC094890', 'E006/HVC096167', 'E006/HVC102455', 'E006/HVC105744', 'E006/HVC108273', 'E006/HVC108839', 'E006/HVC114661', 'E006/HVC117742', 'E006/HVC135090', 'E006/HVC136327', 'E006/HVC136448', 'E006/HVC136655', 'E006/HVC139877', 'E006/HVC142274', 'E006/HVC143596', 'E006/HVC149161', 'E006/HVC154429', 'E006/HVC154997', 'E006/HVC160292', 'E006/HVC174418', 'E006/HVC176309', 'E006/HVC180590', 'E006/HVC183888', 'E006/HVC184345', 'E006/HVC186186', 'E006/HVC187429', 'E006/HVC210336', 'E006/HVC213284', 'E006/HVC219748', 'E006/HVC220136', 'E006/HVC229233', 'E006/HVC229611', 'E006/HVC237575', 'E006/HVC238240', 'E006/HVC258411', 'E006/HVC278220', 'E006/HVC279776', 'E006/HVC279987', 'E006/HVC280065', 'E006/HVC283720', 'E006/HVC284110', 'E006/HVC286248', 'E006/HVC289166', 'E006/HVC307350', 'E006/HVC325101', 'E006/HVC325801', 'E006/HVC327718', 'E006/HVC329883', 'E006/HVC330657', 'E006/HVC333739', 'E006/HVC334553', 'E006/HVC342037', 'E006/HVC342555', 'E006/HVC342574', 'E006/HVC344602', 'E006/HVC353470', 'E006/HVC355527', 'E006/HVC361500', 'E006/HVC363634', 'E006/HVC364802', 'E006/HVC384707', 'E006/HVC391819', 'E006/HVC397337', 'E006/HVC401377', 'E006/HVC405887', 'E006/HVC410673', 'E006/HVC411997', 'E006/HVC418818', 'E006/HVC422800', 'E006/HVC426052', 'E006/HVC428254', 'E006/HVC454277', 'E006/HVC456896', 'E006/HVC456955', 'E006/HVC457943', 'E006/HVC458485', 'E006/HVC459157', 'E006/HVC460515', 'E006/HVC462990', 'E006/HVC468328', 'E006/HVC474932', 'E006/HVC476364', 'E006/HVC488561', 'E006/HVC493576', 'E006/HVC497605', 'E006/HVC502408', 'E006/HVC505966', 'E006/HVC514846', 'E006/HVC515444', 'E006/HVC518852', 'E006/HVC522828', 'E006/HVC523689', 'E006/HVC528640', 'E006/HVC528925', 'E006/HVC531593', 'E006/HVC533182', 'E006/HVC535101', 'E006/HVC553695', 'E006/HVC554261', 'E006/HVC556677', 'E006/HVC560384', 'E006/HVC568379', 'E006/HVC569472', 'E006/HVC582072', 'E006/HVC600875', 'E006/HVC602088', 'E006/HVC611698', 'E006/HVC612697', 'E006/HVC614602', 'E006/HVC618594', 'E006/HVC620168', 'E006/HVC620202', 'E006/HVC621902', 'E006/HVC622327', 'E006/HVC624578', 'E006/HVC635304', 'E006/HVC639480', 'E006/HVC646979', 'E006/HVC648989', 'E006/HVC652121', 'E006/HVC656513', 'E006/HVC662543', 'E006/HVC662906', 'E006/HVC665754', 'E006/HVC681179', 'E006/HVC687355', 'E006/HVC688682', 'E006/HVC690097', 'E006/HVC690730', 'E006/HVC698488', 'E006/HVC701531', 'E006/HVC703233', 'E006/HVC713044', 'E006/HVC735580', 'E006/HVC747595', 'E006/HVC749712', 'E006/HVC767782', 'E006/HVC772466', 'E006/HVC787057', 'E006/HVC789721', 'E006/HVC807168', 'E006/HVC807393', 'E006/HVC809384', 'E006/HVC810540', 'E006/HVC828816', 'E006/HVC829086', 'E006/HVC834685', 'E006/HVC849484', 'E006/HVC859116', 'E006/HVC861803', 'E006/HVC869061', 'E006/HVC878724', 'E006/HVC893705', 
'E007/HVC002028', 'E007/HVC012055', 'E007/HVC013620', 'E007/HVC024720', 'E007/HVC043528', 'E007/HVC046804', 'E007/HVC047222', 'E007/HVC064992', 'E007/HVC066613', 'E007/HVC071231', 'E007/HVC071929', 'E007/HVC072033', 'E007/HVC110332', 'E007/HVC125071', 'E007/HVC138940', 'E007/HVC139070', 'E007/HVC141438', 'E007/HVC142099', 'E007/HVC143661', 'E007/HVC146985', 'E007/HVC149805', 'E007/HVC153203', 'E007/HVC154413', 'E007/HVC163490', 'E007/HVC163994', 'E007/HVC164997', 'E007/HVC168070', 'E007/HVC187575', 'E007/HVC188603', 'E007/HVC191548', 'E007/HVC192125', 'E007/HVC210278', 'E007/HVC215067', 'E007/HVC228860', 'E007/HVC230859', 'E007/HVC238978', 'E007/HVC249786', 'E007/HVC264359', 'E007/HVC265527', 'E007/HVC265937', 'E007/HVC268972', 'E007/HVC274749', 'E007/HVC282737', 'E007/HVC291968', 'E007/HVC309653', 'E007/HVC310828', 'E007/HVC340352', 'E007/HVC346671', 'E007/HVC361601', 'E007/HVC364077', 'E007/HVC368925', 'E007/HVC371431', 'E007/HVC373987', 'E007/HVC392785', 'E007/HVC394071', 'E007/HVC408493', 'E007/HVC417876', 'E007/HVC426050', 'E007/HVC443800', 'E007/HVC444390', 'E007/HVC452893', 'E007/HVC457923', 'E007/HVC464994', 'E007/HVC472300', 'E007/HVC487636', 'E007/HVC504147', 'E007/HVC504627', 'E007/HVC506256', 'E007/HVC506708', 'E007/HVC520328', 'E007/HVC521115', 'E007/HVC521502', 'E007/HVC531510', 'E007/HVC532935', 'E007/HVC533613', 'E007/HVC533864', 'E007/HVC556310', 'E007/HVC563159', 'E007/HVC567830', 'E007/HVC569046', 'E007/HVC571239', 'E007/HVC579877', 'E007/HVC580516', 'E007/HVC601352', 'E007/HVC614661', 'E007/HVC621478', 'E007/HVC637664', 'E007/HVC637836', 'E007/HVC662682', 'E007/HVC673234', 'E007/HVC678921', 'E007/HVC689995', 'E007/HVC716604', 'E007/HVC717579', 'E007/HVC721325', 'E007/HVC742055', 'E007/HVC742181', 'E007/HVC742545', 'E007/HVC750473', 'E007/HVC758802', 'E007/HVC792601', 'E007/HVC795210', 'E007/HVC817982', 'E007/HVC820027', 'E007/HVC821163', 'E007/HVC829421', 'E007/HVC848757', 'E007/HVC854779', 'E007/HVC861416', 'E007/HVC899701', 
'E008/HVC004051', 'E008/HVC005798', 'E008/HVC005874', 'E008/HVC011105', 'E008/HVC012286', 'E008/HVC014581', 'E008/HVC017202', 'E008/HVC019944', 'E008/HVC020580', 'E008/HVC021811', 'E008/HVC032223', 'E008/HVC037095', 'E008/HVC040572', 'E008/HVC047131', 'E008/HVC048862', 'E008/HVC055437', 'E008/HVC062456', 'E008/HVC064306', 'E008/HVC066754', 'E008/HVC070871', 'E008/HVC071362', 'E008/HVC072107', 'E008/HVC073050', 'E008/HVC073259', 'E008/HVC075167', 'E008/HVC078609', 'E008/HVC084661', 'E008/HVC085246', 'E008/HVC086395', 'E008/HVC086844', 'E008/HVC088744', 'E008/HVC089067', 'E008/HVC098263', 'E008/HVC098339', 'E008/HVC098605', 'E008/HVC099921', 'E008/HVC106354', 'E008/HVC114668', 'E008/HVC115622', 'E008/HVC121847', 'E008/HVC126738', 'E008/HVC138871', 'E008/HVC153321', 'E008/HVC153396', 'E008/HVC153976', 'E008/HVC156244', 'E008/HVC168209', 'E008/HVC168844', 'E008/HVC174976', 'E008/HVC188465', 'E008/HVC191671', 'E008/HVC197105', 'E008/HVC209292', 'E008/HVC209449', 'E008/HVC233537', 'E008/HVC235229', 'E008/HVC235474', 'E008/HVC236928', 'E008/HVC236994', 'E008/HVC237313', 'E008/HVC240969', 'E008/HVC241917', 'E008/HVC247689', 'E008/HVC250104', 'E008/HVC251703', 'E008/HVC253652', 'E008/HVC256814', 'E008/HVC261480', 'E008/HVC285748', 'E008/HVC287957', 'E008/HVC289024', 'E008/HVC289712', 'E008/HVC290955', 'E008/HVC295907', 'E008/HVC303213', 'E008/HVC306071', 'E008/HVC315009', 'E008/HVC319727', 'E008/HVC325910', 'E008/HVC346407', 'E008/HVC351212', 'E008/HVC351591', 'E008/HVC352558', 'E008/HVC355532', 'E008/HVC362422', 'E008/HVC364287', 'E008/HVC370913', 'E008/HVC380981', 'E008/HVC384901', 'E008/HVC386264', 'E008/HVC392154', 'E008/HVC418232', 'E008/HVC420960', 'E008/HVC427713', 'E008/HVC434994', 'E008/HVC439950', 'E008/HVC452328', 'E008/HVC456758', 'E008/HVC456920', 'E008/HVC460769', 'E008/HVC463463', 'E008/HVC464389', 'E008/HVC467483', 'E008/HVC471030', 'E008/HVC478214', 'E008/HVC487106', 'E008/HVC494777', 'E008/HVC500518', 'E008/HVC502461', 'E008/HVC510399', 'E008/HVC511325', 'E008/HVC515906', 'E008/HVC531345', 'E008/HVC534388', 'E008/HVC537752', 'E008/HVC541280', 'E008/HVC543101', 'E008/HVC545765', 'E008/HVC546453', 'E008/HVC546707', 'E008/HVC558248', 'E008/HVC559646', 'E008/HVC559709', 'E008/HVC562342', 'E008/HVC567771', 'E008/HVC569240', 'E008/HVC569268', 'E008/HVC569396', 'E008/HVC585862', 'E008/HVC586750', 'E008/HVC592637', 'E008/HVC594882', 'E008/HVC600649', 'E008/HVC605623', 'E008/HVC606232', 'E008/HVC621517', 'E008/HVC659085', 'E008/HVC672425', 'E008/HVC682478', 'E008/HVC694783', 'E008/HVC695405', 'E008/HVC697570', 'E008/HVC699906', 'E008/HVC715420', 'E008/HVC716306', 'E008/HVC722460', 'E008/HVC727354', 'E008/HVC727831', 'E008/HVC742203', 'E008/HVC742398', 'E008/HVC747733', 'E008/HVC768606', 'E008/HVC768848', 'E008/HVC774243', 'E008/HVC774972', 'E008/HVC786239', 'E008/HVC792125', 'E008/HVC797077', 'E008/HVC800632', 'E008/HVC815912', 'E008/HVC827774', 'E008/HVC834599', 'E008/HVC842950', 'E008/HVC852334', 'E008/HVC856872', 'E008/HVC858734', 'E008/HVC859814', 'E008/HVC869454', 'E008/HVC874477', 'E008/HVC878542', 'E008/HVC878780', 'E008/HVC894474', 'E008/HVC898969'
]

def preprocess(names):
    tuples = map(lambda(x): (x, os.path.getsize('/u/drspeech/data/Aladdin/corpora/trecvid2011/events/'+x+'.htk')), names)
    return map(lambda(name, size): name, sorted(tuples, key=lambda(x): x[1], reverse=True))

if __name__ == '__main__':
    mr_args = ['-v', '--strict-protocols', '-r', 'hadoop','--input-protocol', 'pickle','--output-protocol','pickle','--protocol','pickle']
    meeting_names = all_meeting_names[:1000]
    meeting_names = preprocess(meeting_names)
    print "Processing {0} input files".format(len(meeting_names))
    task_args = [protocol.write(name, None)+"\n" for name in meeting_names]
    
    start = time.time()
    job = ClusterMRJob(args=mr_args).sandbox(stdin=task_args)
    runner = job.make_runner()        
    runner.run()

    print "Tasks done. Total execution time:", time.time()-start, "seconds."
示例#12
0
import time
import os.path

from cluster_map import ClusterMRJob
from mrjob.protocol import PickleProtocol as protocol

 
# video file names, not all included for brevity
all_meeting_names = ['E002/HVC686083', 'E002/HVC690403', 'E002/HVC693882', 'E002/HVC698972', 'E002/HVC699015', 'E002/HVC724149', 'E002/HVC725414', 'E002/HVC732985', 'E002/HVC738245', 'E002/HVC742544', 'E002/HVC765357', 'E002/HVC765442', 'E002/HVC768066', 'E002/HVC781040', 'E002/HVC781550', 'E002/HVC787358', 'E002/HVC788059', 'E002/HVC792707', 'E002/HVC800578', 'E002/HVC804688', 'E002/HVC805254', 'E002/HVC805769', 'E002/HVC820877', 'E002/HVC826976', 'E002/HVC827162', 'E002/HVC831492', 'E002/HVC843356', 'E002/HVC843604', 'E002/HVC848042', 'E002/HVC852977', 'E002/HVC860018', 'E002/HVC860974', 'E002/HVC862982', 'E002/HVC868918', 'E002/HVC875036', 'E002/HVC876313', 'E002/HVC885657', 'E002/HVC893844']

def preprocess(names):
    tuples = map(lambda(x): (x, os.path.getsize('/u/drspeech/data/Aladdin/corpora/trecvid2011/events/'+x+'.htk')), names)
    return map(lambda(name, size): name, sorted(tuples, key=lambda(x): x[1], reverse=True))

if __name__ == '__main__':
    mr_args = ['-v', '--strict-protocols', '-r', 'hadoop','--input-protocol', 'pickle','--output-protocol','pickle','--protocol','pickle']
    meeting_names = all_meeting_names[:250]
    meeting_names = preprocess(meeting_names)
    print "Processing {0} input files".format(len(meeting_names))
    task_args = [protocol.write(name, None)+"\n" for name in meeting_names]
    
    start = time.time()
    job = ClusterMRJob(args=mr_args).sandbox(stdin=task_args)
    runner = job.make_runner()        
    runner.run()

    print "Tasks done. Total execution time:", time.time()-start, "seconds."