from BaseThread import BaseThread import threading import time # 多執行緒的前工作 def my_thread_job(): with sem: print("{} runing".format("hi")) time.sleep(1) # 多執行緒的後工作 def cb(argv1, argv2): with sem: print("{} {}".format(argv1, argv2)) sem=threading.Semaphore(4) for i in range(5): BaseThread( name = 'test', target=my_thread_job, callback=cb, callback_args=("hello","word") ).start()
append_data(append_fileName, data) except: with open("error_content", 'a+', encoding='utf8') as f: f.write(url) f.write("\n") print("error") else: print("{} done".format(childNumber)) nYC = newYorkCrawl() year = sys.argv[1] month = sys.argv[2] fileName = "NYC_{}_{}.json".format(year, month) append_fileName = "NYC_{}_{}.txt".format(year, month) data = nYC.openJson(fileName) print('{},{},{}'.format(year, month, data['response']['meta']['hits'])) print('{},{},{}'.format(year, month, len(data['response']['docs']))) maxItem = len(data['response']['docs']) sem = threading.Semaphore(100) for child in range(0, maxItem): if child != 0 and child % 1000 == 0: time.sleep(200) tmp = data['response']['docs'][child] BaseThread(name='test', target=my_thread_job, callback=cb, callback_args=(tmp, child)).start()
# 多執行緒的前工作 def my_thread_job(): with sem: print("work") # 多執行緒的後工作 def cb(year, month): with sem: fileName = 'NYC_{}_{}.json'.format(year, month) filePath = 'NYC_{}_{}.json'.format(year, month) monthData = nYC.get_monthData(year, month) nYC.write_monthData(fileName, monthData) data = nYC.openJson(fileName) print('{},{},{}'.format(year, month, data['response']['meta']['hits'])) print('{},{},{}'.format(year, month, len(data['response']['docs']))) # testing nYC = newYorkCrawl() year = sys.argv[1] maxMonth = sys.argv[2] maxMonth = int(maxMonth) sem = threading.Semaphore(4) for month in range(1, maxMonth): BaseThread(name='test', target=my_thread_job, callback=cb, callback_args=(year, month)).start()