locs = list(disps) locs.append(big.shape[0]) # par_print(comm, 'This is my big shape %s' % (str(big.shape))) my_start = int(locs[rank]) my_end = int(locs[rank + 1]) my_chunk = big.ix[my_start:my_end] # par_print(comm, 'These are my chunk stats %s' % (my_chunk.__str__()[38:69])) my_save_name = '##chunk%s.dat##' % (rank) my_chunk.save(my_save_name) par_print(comm, "my save name %s" % (my_save_name)) comm.Barrier() # Make sure root stops here to have all files saved # Let the root process finish it off if rank == 0: tempbig = pd.DataFrame() for i in range(size): temp_name = '##chunk%s.dat##' % (i) temp_chunk = pd.DataFrame.load(temp_name) tempbig = pd.concat([tempbig, temp_chunk]) os.remove(temp_name) tempbig.save('tempbig.db') big.save('big.db')
# Split it up and do the work! cts, disps = gatv_scatv_tuples(size, big.shape[0]) locs = list(disps) locs.append(big.shape[0]) my_start = int(locs[rank]) my_end = int(locs[rank + 1]) my_chunk = big.ix[my_start:my_end] first_tick = str(my_chunk.index[0]) last_tick = str(my_chunk.index[-1]) msg = 'About to gather %s data for tickers %s to %s' par_print(comm, msg % ('Equity', first_tick, last_tick)) my_chunk = my_chunk.apply(equity_data, axis=1) par_print(comm, 'Finished to gathering Equity Data') if rank == 0: print('\n\nTotal time after equity %.4f\n\n' % (time() - start_time)) par_print(comm, msg % ('Options', first_tick, last_tick)) my_opts = fill_option_data(my_chunk) par_print(comm, 'Finished to gathering Options Data') if rank == 0: print('\n\nTotal time after options %.4f\n\n' % (time() - start_time)) my_chunk = my_chunk.join(my_opts)