from BaseThread import BaseThread
import threading
import time

# 多執行緒的前工作
def my_thread_job():
    with sem:
        print("{} runing".format("hi"))
        time.sleep(1)
# 多執行緒的後工作
def cb(argv1, argv2):
    with sem:
        print("{} {}".format(argv1, argv2))


sem=threading.Semaphore(4)

for i in range(5):
    BaseThread(
    name = 'test',
    target=my_thread_job,
    callback=cb,
    callback_args=("hello","word")
    ).start()
示例#2
0
            append_data(append_fileName, data)
        except:
            with open("error_content", 'a+', encoding='utf8') as f:
                f.write(url)
                f.write("\n")
            print("error")
        else:
            print("{} done".format(childNumber))


nYC = newYorkCrawl()
year = sys.argv[1]
month = sys.argv[2]

fileName = "NYC_{}_{}.json".format(year, month)
append_fileName = "NYC_{}_{}.txt".format(year, month)

data = nYC.openJson(fileName)
print('{},{},{}'.format(year, month, data['response']['meta']['hits']))
print('{},{},{}'.format(year, month, len(data['response']['docs'])))
maxItem = len(data['response']['docs'])

sem = threading.Semaphore(100)
for child in range(0, maxItem):
    if child != 0 and child % 1000 == 0:
        time.sleep(200)
    tmp = data['response']['docs'][child]
    BaseThread(name='test',
               target=my_thread_job,
               callback=cb,
               callback_args=(tmp, child)).start()
示例#3
0
# 多執行緒的前工作
def my_thread_job():
    with sem:
        print("work")


# 多執行緒的後工作
def cb(year, month):
    with sem:
        fileName = 'NYC_{}_{}.json'.format(year, month)
        filePath = 'NYC_{}_{}.json'.format(year, month)
        monthData = nYC.get_monthData(year, month)
        nYC.write_monthData(fileName, monthData)
        data = nYC.openJson(fileName)
        print('{},{},{}'.format(year, month, data['response']['meta']['hits']))
        print('{},{},{}'.format(year, month, len(data['response']['docs'])))


# testing

nYC = newYorkCrawl()
year = sys.argv[1]
maxMonth = sys.argv[2]
maxMonth = int(maxMonth)

sem = threading.Semaphore(4)
for month in range(1, maxMonth):
    BaseThread(name='test',
               target=my_thread_job,
               callback=cb,
               callback_args=(year, month)).start()