header.write('\n'.encode()) header.write('# {}\n'.format(name).encode()) if (create): directive.create(name, size) if (erase): directive.erase_p(name) if (type == 'partitionImage'): if (chunkSize > 0): print('[i] Splitting ...') chunks = utils.splitFile(imageFile, tmpDir, chunksize=chunkSize) else: # It will contain whole image as a single chunk chunks = utils.splitFile(imageFile, tmpDir, chunksize=0) for index, inputChunk in enumerate(chunks): print('[i] Processing chunk: {}'.format(inputChunk)) (name1, ext1) = os.path.splitext(inputChunk) if lzo: outputChunk = name1 + '.lzo' print('[i] LZO: {} -> {}'.format( inputChunk, outputChunk)) utils.lzo(inputChunk, outputChunk) else: outputChunk = inputChunk
if __name__ == '__main__': sys.stdout = TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') today=datetime.now() # the path of your source file and destination file pathName="./" # be sure to modify the following to reflect your file name (absolute path) #fileName=pathName+"./SME_Closed.csv" fileName="./1.csv" #fileName="./2.csv" #fileName="./need_to_get_company_id.txt" # how many threads you'd like to execute splitFileNum=3 splitFile(fileName, pathName, splitFileNum) # get 營業項目編號, 營業項目描述 url0="http://data.gcis.nat.gov.tw/od/data/api/236EE382-4942-41A9-BD03-CA0709025E7C" co0=ConnectionObject(url0, True) url1="http://lasai.org/od/data/api/426D5542-5F05-43EB-83F9-F1300F14E1F1" co1=ConnectionObject(url1, False) threads=[] for index in range(0, splitFileNum): threads.append(IndustryCategoryGetter(index, fileName+"_"+str(index), 'parse_category_'+str(index)+'.csv', co0, co1)) index=0 for index in range(0, splitFileNum): threads[index].start() index=0