def main(): sem = Semaphore(10) # 控制并发数 t_list = [MyThread(i, sem) for i in range(1000)] for t in t_list: t.start() for t in t_list: t.join()
class Pool(object): def __init__(self): self.semaphore = Semaphore(config.settings.QUEUE_SIZE) def queue_producer(self, producer): """Yields items as soon as the semaphore allows.""" try: for item in producer: self.semaphore.acquire() yield item except: logger.exception("Error in producer parallel task") def queue_consumer(self, consumer): """Returns item consumption function that signals the semaphore.""" def consumer_function(item): self.semaphore.release() try: consumer(item) except: logger.exception("Error in consumer parallel task") return consumer_function def parallelize(self, consumer, producer): """Implements a queued production of items to paralelize, limits RAM usage. imap() uses correctly the generator, is more memory efficient imap_unordered() does not wait on each item to be processed Args: consumer (function): Ingest and process items producer (generator): Yields items to be consumed """ logger.info("Starting paralelization") self.pool = ThreadPool(config.settings.NUM_CONCURRENT_WORKERS) self.pool.imap_unordered(self.queue_consumer(consumer), self.queue_producer(producer)) self.pool.close() self.pool.join() logger.info("Finishing paralelization")
def hySpeechRecognition(self, filename, content): resultQueue = Queue.Queue() s = Semaphore(0) t_api = thread(target=self.request_ifly_api, args=( filename, content, s, resultQueue, )) t_sdk = thread(target=self.request_ifly_sdk, args=( filename, content, s, resultQueue, )) t_api.setDaemon(True) t_sdk.setDaemon(True) t_sdk.start() t_api.start() s.acquire() result = resultQueue.get() try: return result except Exception as e: print(e) s.acquire() result = resultQueue.get() return result
def train(layer, logger, shapes, args, e, data_size, trainloader): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(layer.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) optimizer.zero_grad() layer.train() batch_idx = 0 def backward_rank2(): residual = None batch_idx = 0 grad_recv1 = torch.zeros(shapes[2]) dist.recv(tensor=grad_recv1, src=3) while True: print(" backward batch_idx:" + str(batch_idx)) grad_recv1 = grad_recv1.cuda(2) try: inputs, outputs = outputs_queue.get(block=True, timeout=4) except Empty: print("empty........") break inputs.requires_grad_() outputs.backward(grad_recv1) if batch_idx % 3 == 0: optimizer.step() optimizer.zero_grad() batch_idx += 1 if data_size == batch_idx: transfer(4, inputs.grad.cpu(), None) print("backend In send..") break grad_recv1 = transfer(4, inputs.grad.cpu(), shapes[2]) #shapes[1] print("backward send.......") print("backard end....") def backward_rank1(): residual = None batch_idx = 0 grad_recv1 = torch.zeros(shapes[1]) dist.recv(tensor=grad_recv1, src=2) while True: print(" backward batch_idx:" + str(batch_idx)) grad_recv1 = grad_recv1.cuda(1) try: inputs, outputs = outputs_queue.get(block=True, timeout=4) except Empty: print("empty........") break inputs.requires_grad_() outputs.backward(grad_recv1) if batch_idx % 3 == 0: optimizer.step() optimizer.zero_grad() batch_idx += 1 if data_size == batch_idx: transfer(5, inputs.grad.cpu(), None) print("backend In send..") break grad_recv1 = transfer(5, inputs.grad.cpu(), shapes[1]) #shapes[1] print("backward send.......") print("backard end....") def backward_rank0(semaphore): batch_idx = 0 grad_recv = torch.zeros(shapes[0]) dist.recv(tensor=grad_recv, src=1) while True: grad_recv = grad_recv.cuda(0) print(" backwardbatch_idx:" + str(batch_idx)) try: loss = outputs_queue.get(block=True, timeout=4) except Empty: print("empty........") break loss.backward(grad_recv) if batch_idx % 3 == 0: # print("step: " + str(batch_idx)) optimizer.step() optimizer.zero_grad() batch_idx += 1 if data_size == batch_idx: print("eq...") break grad_recv = transfer(6, None, shapes[0]) #shapes[0] print("backward send.....") print("backward end..") if dist.get_rank() == 0: outputs_queue = ThreadQueue(args.buffer_size) semaphore = Semaphore(args.buffer_size) back_process = Process(target=backward_rank0, args=(semaphore, )) back_process.start() for batch_idx, (inputs, targets) in enumerate(trainloader): print("batch: " + str(batch_idx)) inputs = inputs.cuda(0) outputs = layer(inputs) outputs_queue.put(outputs) #outputs = q_act(outputs, char=True) transfer(dist.get_rank(), outputs.cpu(), None) print("send........") print("start to end....") back_process.join() e.set() print("end....") elif dist.get_rank() == 1: outputs_queue = ThreadQueue(args.buffer_size) back_process = Process(target=backward_rank1, args=()) rec_val = torch.zeros(shapes[0]) dist.recv(tensor=rec_val, src=0) #fix bug.. back_process.start() for index, (_, targets) in enumerate(trainloader): print("batch_idx:" + str(index)) rec_val = rec_val.cuda(1) rec_val.requires_grad_() outputs = layer(rec_val) outputs_queue.put([rec_val, outputs]) if index == data_size - 1: transfer(dist.get_rank(), outputs.cpu(), None) print("the last send........") continue rec_val = transfer(dist.get_rank(), outputs.cpu(), shapes[0]) print("send.................") print("start to end....") back_process.join() e.wait() print("end......") elif dist.get_rank() == 2: outputs_queue = ThreadQueue(args.buffer_size) back_process = Process(target=backward_rank2, args=()) rec_val = torch.zeros(shapes[1]) dist.recv(tensor=rec_val, src=1) back_process.start() for index, (_, targets) in enumerate(trainloader): print("batch_idx:" + str(index)) rec_val = rec_val.cuda(2) rec_val.requires_grad_() outputs = layer(rec_val) outputs_queue.put([rec_val, outputs]) if index == data_size - 1: transfer(dist.get_rank(), outputs.cpu(), None) print("the last send........") continue rec_val = transfer(dist.get_rank(), outputs.cpu(), shapes[1]) print("send.................") print("start to end....") back_process.join() e.wait() print("end......") elif dist.get_rank() == 3: rec_val = None residual = None train_loss = 0 correct = 0 total = 0 criterion.cuda(3) rec_val = torch.zeros(shapes[2]) dist.recv(tensor=rec_val, src=2) for batch_idx, (_, targets) in enumerate(trainloader): rec_val = rec_val.cuda(3) rec_val.requires_grad_() outputs = layer(rec_val) # start to backward.... targets = targets.cuda(3) loss = criterion(outputs, targets) loss.backward() quantize_grad = rec_val.grad.cpu() if batch_idx % 3 == 0: optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() progress_bar( batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) optimizer.zero_grad() else: progress_bar( batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) logger.error("train:" + str(train_loss / (batch_idx + 1))) acc_str = "tacc: %.3f" % (100. * correct / total, ) logger.error(acc_str) if batch_idx == data_size - 1: transfer(dist.get_rank(), quantize_grad, None) continue rec_val = transfer(dist.get_rank(), quantize_grad, shapes[2]) #print("\n start to end....") e.wait() print("end....")
else: continue with open('products.txt', 'a', encoding="utf-8") as wFile: for product in products: wFile.write(product + '\n') return else: return None if __name__ == '__main__': semaphore = Semaphore(3) show_banner() try: ua = UserAgent() except BaseException: ua = UserAgent(use_cache_server=False) all_categories_url = 'https://www.hepsiburada.com/tum-kategoriler' print(Back.RESET + Fore.BLUE + '\n' + 'Process was started successfully!') while True: try: auto_shutdown = input( Fore.GREEN +
from time import sleep from multiprocessing.dummy import Pool as ThreadPool, Semaphore sem = Semaphore(5) # 限制最大连接数为5 def goto_wc(i): global sem with sem: print(f"[线程{i}]上厕所") sleep(0.1) def main(): p = ThreadPool() p.map_async(goto_wc, range(50)) p.close() p.join() if __name__ == '__main__': main()
def train(layer, logger, shapes, args, e, data_size, trainloader): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(layer.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) optimizer.zero_grad() layer.train() batch_idx = 0 def backward_rank1(): residual = None batch_idx = 0 ten_len = tensor_len(shapes[1]) #grad_recv1 = torch.zeros(shapes[1], dtype=torch.int8) #grad_recv1 = torch.HalfTensor(torch.Size(shapes[1])) grad_recv1 = torch.zeros(ten_len + 2) #shapes[1] dist.recv(tensor=grad_recv1, src=2) while True: print(" backward batch_idx:" + str(batch_idx)) #grad_recv1 = unpack(grad_recv1.cuda(), shapes[1]) #grad_recv1 = dequantize(grad_recv1.cuda().float()) grad_recv1 = de_piecewise_quantize(grad_recv1.cuda(), shapes[1]) #grad_recv1 = grad_recv1.cuda() try: inputs, outputs = outputs_queue.get(block=True, timeout=4) except Empty: print("empty........") break inputs.requires_grad_() outputs.backward(grad_recv1) #inputs_grad = quantize(inputs.grad, char=True).cpu() #inputs_grad, residual = compress(inputs.grad, residual=residual) inputs_grad, residual = piecewise_quantize(inputs.grad, logger=logger, residual=residual) #inputs_grad = inputs_grad.cpu() #inputs_grad = inputs.grad.cpu() if batch_idx % 2 == 0: optimizer.step() optimizer.zero_grad() batch_idx += 1 if data_size == batch_idx: transfer2(3, inputs_grad, None) print("backend In send..") break grad_recv1 = transfer2(3, inputs_grad, ten_len + 2) #shapes[1] print("backward send.......") print("backard end....") def backward_rank0(semaphore): batch_idx = 0 ten_len = tensor_len(shapes[0]) grad_recv = torch.zeros(ten_len + 2) #grad_recv = torch.zeros(shapes[0], dtype=torch.int8) #grad_recv = torch.HalfTensor(torch.Size(shapes[0])) dist.recv(tensor=grad_recv, src=1) while True: #semaphore.release() #grad_recv = dequantize(grad_recv.cuda().float()) grad_recv = de_piecewise_quantize(grad_recv.cuda(), shapes[0]) #grad_recv = unpack(grad_recv.cuda(), shapes[0]) print(" backwardbatch_idx:" + str(batch_idx)) # grad_recv = grad_recv.cuda() try: loss = outputs_queue.get(block=True, timeout=4) except Empty: print("empty........") break loss.backward(grad_recv) if batch_idx % 2 == 0: # print("step: " + str(batch_idx)) optimizer.step() optimizer.zero_grad() batch_idx += 1 if data_size == batch_idx: print("eq...") break grad_recv = transfer2(4, None, ten_len + 2) #shapes[0] print("backward send.....") print("backward end..") if dist.get_rank() == 0: outputs_queue = ThreadQueue(args.buffer_size) semaphore = Semaphore(args.buffer_size) back_process = Process(target=backward_rank0, args=(semaphore, )) back_process.start() for batch_idx, (inputs, targets) in enumerate(trainloader): #semaphore.acquire() print("batch: " + str(batch_idx)) inputs = inputs.cuda() outputs = layer(inputs) # outputs_queue.put(outputs) outputs = q_act(outputs, char=True) transfer(dist.get_rank(), outputs.cpu(), None) print("send........") print("start to end....") back_process.join() e.set() print("end....") elif dist.get_rank() == 1: outputs_queue = ThreadQueue(args.buffer_size) back_process = Process(target=backward_rank1, args=()) rec_val = torch.zeros(shapes[0], dtype=torch.int8) dist.recv(tensor=rec_val, src=0) #fix bug.. back_process.start() for index, (_, targets) in enumerate(trainloader): print("batch_idx:" + str(index)) rec_val = dq_act(rec_val) rec_val = rec_val.cuda() rec_val.requires_grad_() outputs = layer(rec_val) outputs_queue.put([rec_val, outputs]) outputs = q_act(outputs, char=True) if index == data_size - 1: transfer(dist.get_rank(), outputs.cpu(), None) print("the last send........") continue rec_val = transfer(dist.get_rank(), outputs.cpu(), shapes[0]) print("send.................") print("start to end....") back_process.join() e.wait() print("end......") elif dist.get_rank() == 2: rec_val = None residual = None train_loss = 0 correct = 0 total = 0 correct_5 = 0 correct_1 = 0 criterion.cuda() if not torch.is_tensor(rec_val): rec_val = torch.zeros(shapes[1], dtype=torch.int8) dist.recv(tensor=rec_val, src=1) for batch_idx, (_, targets) in enumerate(trainloader): rec_val = dq_act(rec_val) rec_val = rec_val.cuda() rec_val.requires_grad_() outputs = layer(rec_val) # start to backward.... targets = targets.cuda() loss = criterion(outputs, targets) loss.backward() #quantize_grad = quantize(rec_val.grad, char=True).cpu() # for_view = rec_val.grad.view(-1).tolist() # logger.error("grad: " + str(for_view)) #quantize_grad, residual = compress(rec_val.grad, residual=residual) quantize_grad, residual = piecewise_quantize(rec_val.grad, logger=logger, residual=residual) #quantize_grad = quantize_grad.cpu() #quantize_grad = rec_val.grad.cpu() if batch_idx % 2 == 0: optimizer.step() train_loss += loss.item() #_, predicted = outputs.max(1) #total += targets.size(0) #correct += predicted.eq(targets).sum().item() _, predicted = outputs.topk(5, 1, True, True) total += targets.size(0) targets = targets.view(targets.size(0), -1).expand_as(predicted) correct = predicted.eq(targets).float() correct_5 += correct[:, :5].sum() correct_1 += correct[:, :1].sum() progress_bar( batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct_5 / total, correct_5, total)) optimizer.zero_grad() else: progress_bar( batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct_5 / total, correct_5, total)) logger.error("train:" + str(train_loss / (batch_idx + 1))) acc_str = "tacc1: %.3f" % (100. * correct_1 / total, ) logger.error(acc_str) acc_str5 = "tacc5: %.3f" % (100. * correct_5 / total, ) logger.error(acc_str5) if batch_idx == data_size - 1: transfer(dist.get_rank(), quantize_grad, None) continue rec_val = transfer(dist.get_rank(), quantize_grad, shapes[1]) #print("\n start to end....") e.wait() print("end....")
def train(layer, logger, args, grad_queue, grad_queue2, targets_queue, e, data_size, trainloader, start_event, start_event2): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(layer.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) optimizer.zero_grad() layer.train() def backward_rank0(semaphore, start_event2): start_event2.wait() batch_idx = 0 while True: try: semaphore.release() print("before grad recv") grad_recv = torch.zeros([args.batch_size, 256, 4, 4], dtype=torch.int8) dist.recv(tensor=grad_recv, src=1) print("after grad recv...") except RuntimeError as error: print("backward runtime error") break grad_recv = dequantize(grad_recv.cuda(0).float()) loss = outputs_queue.get(block=False) loss.backward(grad_recv) if batch_idx % args.buffer_size == 0: optimizer.step() optimizer.zero_grad() batch_idx += 1 def backward_rank1(semaphore, start_event, start_event2): start_event.wait() batch_idx = 0 while True: try: #semaphore.release() print("before grad recv...") grad_recv1 = torch.zeros([args.batch_size, 512, 2, 2], dtype=torch.int8) dist.recv(tensor=grad_recv1, src=2) print("after grad recv.....") except RuntimeError as error: print("backward runtime error") send_opt = dist.isend(tensor=torch.zeros(0), dst=0) send_opt.wait() break grad_recv1 = dequantize(grad_recv1.cuda(0).float()) inputs, outputs = outputs_queue.get(block=False) inputs.requires_grad_() outputs.backward(grad_recv1) if batch_idx % args.buffer_size == 0: optimizer.step() optimizer.zero_grad() inputs_grad = quantize(inputs.grad, char=True).cpu() print(inputs_grad.size()) if batch_idx == 0: start_event2.set() #send_opt = dist.isend(tensor=inputs_grad, dst=0) #send_opt.wait() dist.send(tensor=inputs_grad, dst=0) batch_idx += 1 if dist.get_rank() == 0: criterion.cuda(0) outputs_queue = ThreadQueue(args.buffer_size) semaphore = Semaphore(args.buffer_size) back_process = Process(target=backward_rank0, args=(semaphore, start_event2)) back_process.start() for batch_idx, (inputs, targets) in enumerate(trainloader): semaphore.acquire() print("batch: " + str(batch_idx)) inputs, targets = inputs.cuda(0), targets outputs = layer(inputs) targets_queue.put(targets.numpy()) outputs_queue.put(outputs) send_opt = dist.isend(tensor=q_act(outputs, char=True).cpu(), dst=1) send_opt.wait() print("send....") print("start to end..") send_opt = dist.isend(tensor=torch.zeros(0), dst=1) send_opt.wait() back_process.join() e.set() elif dist.get_rank() == 1: batch_idx = 0 criterion.cuda(0) outputs_queue = ThreadQueue(10) semaphore = Semaphore(args.buffer_size - 1) back_process = Process(target=backward_rank1, args=(semaphore, start_event, start_event2)) back_process.start() while True: try: print("before semaphore......") #semaphore.acquire() rec_val = torch.zeros([args.batch_size, 256, 4, 4], dtype=torch.int8) dist.recv(tensor=rec_val, src=0) print("after recv.....") except RuntimeError as error: print("runtime errror") send_opt = dist.isend(tensor=torch.zeros(0), dst=2) send_opt.wait() back_process.join() e.wait() break print("before dq...") rec_val = dq_act(rec_val) rec_val = rec_val.cuda(0) rec_val.requires_grad_() print("before output......") outputs = layer(rec_val) # if batch_idx % args.buffer_size == 0: # optimizer.step() # optimizer.zero_grad() print("before queue") outputs_queue.put([rec_val, outputs]) print("after queue") #send_opt = dist.isend(tensor=q_act(outputs, char=True).cpu(), dst=2) #send_opt.wait() dist.send(tensor=q_act(outputs, char=True).cpu(), dst=2) batch_idx += 1 print("send end...") elif dist.get_rank() == 2: batch_idx = 0 train_loss = 0 correct = 0 total = 0 criterion.cuda(0) while True: try: #print("before recv....") rec_val = torch.zeros([args.batch_size, 512, 2, 2], dtype=torch.int8) dist.recv(tensor=rec_val, src=1) #print("after recv.....") except RuntimeError as error: #traceback.format_exc(error) send_opt = dist.isend(tensor=torch.zeros(0), dst=1) send_opt.wait() e.wait() break rec_val = dq_act(rec_val) rec_val = rec_val.cuda(0) rec_val.requires_grad_() outputs = layer(rec_val) targets = targets_queue.get(block=True, timeout=2) targets = torch.from_numpy(targets).cuda(0) loss = criterion(outputs, targets) loss.backward() if batch_idx % args.buffer_size == 0: optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() progress_bar(batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) optimizer.zero_grad() else: progress_bar(batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) #if batch_idx % 10 == 0: logger.error("train:" + str(train_loss / (batch_idx + 1))) acc_str = "tacc: %.3f" % (100. * correct / total,) logger.error(acc_str) if batch_idx == 0: start_event.set() quantize_grad = quantize(rec_val.grad, char=True) #send_opt = dist.isend(tensor=quantize_grad.cpu(), dst=1) #send_opt.wait() dist.send(tensor=quantize_grad.cpu(), dst=1) batch_idx += 1
def __init__(self): self.semaphore = Semaphore(config.settings.QUEUE_SIZE)