Python ThreadQueue示例

编程语言: Python

命名空间/包名称: multiprocessing.dummy

类/类型: ThreadQueue

hotexamples.com的示例: 9

Python ThreadQueue - 已找到9个示例。这些是从开源项目中提取的最受好评的multiprocessing.dummy.ThreadQueue现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

ThreadQueue(9)

常用方法

ThreadQueue (9)

示例#1

显示文件

文件： add.py 项目： joe-antognini/zoia

def _add_doi(metadata, identifier, citekey):
    """Add an entry from a DOI."""
    info_messages = []
    with StatusMessage('Querying DOI metadata...') as message:
        if metadata.doi_exists(identifier):
            raise ZoiaAddException(f'DOI {identifier} already exists.')

        # Query Semantic Scholar to get the corresponding arxiv ID (if there is
        # one) in a separate thread.
        arxiv_queue = ThreadQueue()
        arxiv_process = ThreadProcess(
            target=lambda q, x: q.put(requests.get(x)),
            args=(
                arxiv_queue,
                f'https://api.semanticscholar.org/v1/paper/{identifier}',
            ),
        )
        arxiv_process.start()

        doi_metadata = _get_doi_metadata(identifier)

        metadatum = zoia.backend.metadata.Metadatum.from_dict(doi_metadata)

        if citekey is None:
            citekey = zoia.parse.citekey.create_citekey(metadata, metadatum)

        paper_dir = os.path.join(metadata.config.library_root, citekey)
        os.mkdir(paper_dir)

        message.update(
            'Querying Semantic Scholar for corresponding arXiv ID...')
        arxiv_metadata_response = arxiv_queue.get()
        arxiv_process.join()

        arxiv_metadata = json.loads(arxiv_metadata_response.text)

        if (arxiv_id := arxiv_metadata.get('arxivId')) is not None:
            doi_metadata['arxiv_id'] = arxiv_id
            message.update('Downloading PDF from arXiv...')
            pdf_response = requests.get(
                f'https://arxiv.org/pdf/{arxiv_id}.pdf')

            if pdf_response.status_code == 200:
                with open(os.path.join(paper_dir, 'document.pdf'), 'wb') as fp:
                    fp.write(pdf_response.content)
                doi_metadata['pdf_md5'] = hashlib.md5(
                    pdf_response.content).hexdigest()
            else:
                info_messages.append('Was unable to fetch a PDF')

        metadata[citekey] = doi_metadata

示例#2

显示文件

文件： add.py 项目： joe-antognini/zoia

def _add_arxiv_id(metadata, identifier, citekey=None):
    info_messages = []
    with StatusMessage('Querying arXiv...') as message:
        if metadata.arxiv_id_exists(identifier):
            raise ZoiaAddException(f'arXiv paper {identifier} already exists.')

        # Downloading the PDF can take a while, so start it early in a separate
        # thread.
        pdf_queue = ThreadQueue()
        pdf_process = ThreadProcess(
            target=lambda q, x: q.put(requests.get(x)),
            args=(pdf_queue, f'https://arxiv.org/pdf/{identifier}.pdf'),
        )
        pdf_process.start()

        arxiv_metadata = _get_arxiv_metadata(identifier)

        if 'doi' in arxiv_metadata:
            message.update('Querying DOI information...')
            arxiv_metadata.update(_get_doi_metadata(arxiv_metadata['doi']))

        if citekey is None:
            metadatum = zoia.backend.metadata.Metadatum.from_dict(
                arxiv_metadata)
            citekey = zoia.parse.citekey.create_citekey(metadata, metadatum)
        paper_dir = os.path.join(metadata.config.library_root, citekey)
        os.mkdir(paper_dir)

        message.update(text='Downloading PDF...')
        pdf = pdf_queue.get()
        pdf_process.join()

        if pdf.status_code == 200:
            with open(os.path.join(paper_dir, 'document.pdf'), 'wb') as fp:
                fp.write(pdf.content)
            md5_hash = hashlib.md5(pdf.content).hexdigest()
            arxiv_metadata['pdf_md5'] = md5_hash
            if metadata.pdf_md5_hash_exists(md5_hash):
                raise ZoiaAddException(
                    f'arXiv paper {identifier} already exists.')
        else:
            info_messages.append('Was unable to fetch a PDF')

        metadata[citekey] = arxiv_metadata

    return citekey, metadatum, info_messages

示例#3

显示文件

def train(layer, logger, shapes, args, e, data_size, trainloader):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(layer.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    optimizer.zero_grad()
    layer.train()
    batch_idx = 0

    def backward_rank2():
        residual = None
        batch_idx = 0

        grad_recv1 = torch.zeros(shapes[2])
        dist.recv(tensor=grad_recv1, src=3)
        while True:
            print(" backward batch_idx:" + str(batch_idx))
            grad_recv1 = grad_recv1.cuda(2)
            try:
                inputs, outputs = outputs_queue.get(block=True, timeout=4)
            except Empty:
                print("empty........")
                break
            inputs.requires_grad_()
            outputs.backward(grad_recv1)
            if batch_idx % 3 == 0:
                optimizer.step()
                optimizer.zero_grad()
            batch_idx += 1
            if data_size == batch_idx:
                transfer(4, inputs.grad.cpu(), None)
                print("backend In send..")
                break
            grad_recv1 = transfer(4, inputs.grad.cpu(), shapes[2])  #shapes[1]
            print("backward send.......")
        print("backard end....")

    def backward_rank1():
        residual = None
        batch_idx = 0

        grad_recv1 = torch.zeros(shapes[1])
        dist.recv(tensor=grad_recv1, src=2)
        while True:
            print(" backward batch_idx:" + str(batch_idx))
            grad_recv1 = grad_recv1.cuda(1)
            try:
                inputs, outputs = outputs_queue.get(block=True, timeout=4)
            except Empty:
                print("empty........")
                break
            inputs.requires_grad_()
            outputs.backward(grad_recv1)
            if batch_idx % 3 == 0:
                optimizer.step()
                optimizer.zero_grad()
            batch_idx += 1
            if data_size == batch_idx:
                transfer(5, inputs.grad.cpu(), None)
                print("backend In send..")
                break
            grad_recv1 = transfer(5, inputs.grad.cpu(), shapes[1])  #shapes[1]
            print("backward send.......")
        print("backard end....")

    def backward_rank0(semaphore):
        batch_idx = 0
        grad_recv = torch.zeros(shapes[0])
        dist.recv(tensor=grad_recv, src=1)
        while True:
            grad_recv = grad_recv.cuda(0)
            print(" backwardbatch_idx:" + str(batch_idx))
            try:
                loss = outputs_queue.get(block=True, timeout=4)
            except Empty:
                print("empty........")
                break

            loss.backward(grad_recv)
            if batch_idx % 3 == 0:
                # print("step: " + str(batch_idx))
                optimizer.step()
                optimizer.zero_grad()
            batch_idx += 1
            if data_size == batch_idx:
                print("eq...")
                break
            grad_recv = transfer(6, None, shapes[0])  #shapes[0]
            print("backward send.....")
        print("backward end..")

    if dist.get_rank() == 0:
        outputs_queue = ThreadQueue(args.buffer_size)
        semaphore = Semaphore(args.buffer_size)
        back_process = Process(target=backward_rank0, args=(semaphore, ))
        back_process.start()
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            print("batch: " + str(batch_idx))
            inputs = inputs.cuda(0)
            outputs = layer(inputs)
            outputs_queue.put(outputs)
            #outputs = q_act(outputs, char=True)
            transfer(dist.get_rank(), outputs.cpu(), None)
            print("send........")
        print("start to end....")

        back_process.join()
        e.set()
        print("end....")

    elif dist.get_rank() == 1:

        outputs_queue = ThreadQueue(args.buffer_size)
        back_process = Process(target=backward_rank1, args=())

        rec_val = torch.zeros(shapes[0])
        dist.recv(tensor=rec_val, src=0)
        #fix bug..
        back_process.start()
        for index, (_, targets) in enumerate(trainloader):
            print("batch_idx:" + str(index))
            rec_val = rec_val.cuda(1)
            rec_val.requires_grad_()
            outputs = layer(rec_val)
            outputs_queue.put([rec_val, outputs])
            if index == data_size - 1:
                transfer(dist.get_rank(), outputs.cpu(), None)
                print("the last send........")
                continue
            rec_val = transfer(dist.get_rank(), outputs.cpu(), shapes[0])
            print("send.................")
        print("start to end....")
        back_process.join()

        e.wait()
        print("end......")

    elif dist.get_rank() == 2:

        outputs_queue = ThreadQueue(args.buffer_size)
        back_process = Process(target=backward_rank2, args=())

        rec_val = torch.zeros(shapes[1])
        dist.recv(tensor=rec_val, src=1)
        back_process.start()
        for index, (_, targets) in enumerate(trainloader):
            print("batch_idx:" + str(index))
            rec_val = rec_val.cuda(2)
            rec_val.requires_grad_()
            outputs = layer(rec_val)
            outputs_queue.put([rec_val, outputs])
            if index == data_size - 1:
                transfer(dist.get_rank(), outputs.cpu(), None)
                print("the last send........")
                continue
            rec_val = transfer(dist.get_rank(), outputs.cpu(), shapes[1])
            print("send.................")
        print("start to end....")
        back_process.join()
        e.wait()
        print("end......")

    elif dist.get_rank() == 3:

        rec_val = None
        residual = None
        train_loss = 0
        correct = 0
        total = 0
        criterion.cuda(3)

        rec_val = torch.zeros(shapes[2])
        dist.recv(tensor=rec_val, src=2)
        for batch_idx, (_, targets) in enumerate(trainloader):
            rec_val = rec_val.cuda(3)
            rec_val.requires_grad_()
            outputs = layer(rec_val)
            # start to backward....
            targets = targets.cuda(3)
            loss = criterion(outputs, targets)
            loss.backward()

            quantize_grad = rec_val.grad.cpu()
            if batch_idx % 3 == 0:
                optimizer.step()
                train_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
                progress_bar(
                    batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                    (train_loss /
                     (batch_idx + 1), 100. * correct / total, correct, total))
                optimizer.zero_grad()
            else:
                progress_bar(
                    batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                    (train_loss /
                     (batch_idx + 1), 100. * correct / total, correct, total))
            logger.error("train:" + str(train_loss / (batch_idx + 1)))
            acc_str = "tacc: %.3f" % (100. * correct / total, )
            logger.error(acc_str)
            if batch_idx == data_size - 1:
                transfer(dist.get_rank(), quantize_grad, None)
                continue
            rec_val = transfer(dist.get_rank(), quantize_grad, shapes[2])

        #print("\n start to end....")
        e.wait()
        print("end....")

示例#4

显示文件

文件： worker_with_process.py 项目： CoderXDY/XdyPipe

def pipe_dream(layer, logger, args, backward_event, targets_queue, e,
               data_size, trainloader):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(layer.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    layer.train()

    if dist.get_rank() == 0:
        criterion.cuda(0)
        output_queue = ThreadQueue(2)
        data_iter = iter(trainloader)
        batch_idx = 0
        while True:
            try:
                if output_queue.qsize() == 2:
                    backward_event.wait()
                    optimizer.zero_grad()
                    grad = torch.zeros([args.batch_size, 128, 16, 16])
                    dist.recv(tensor=grad, src=1)
                    outputs = output_queue.get()
                    outputs.backward(grad.cuda(0))
                    optimizer.step()
                    backward_event.clear()
                    continue
                else:
                    inputs, targets = next(data_iter)
                    inputs = inputs.cuda(0)
                    targets_queue.put(targets.numpy(), block=False)
                    outputs = layer(inputs)
                    send_opt = dist.isend(tensor=outputs.cpu(), dst=1)
                    send_opt.wait()
                    output_queue.put(outputs)
                    batch_idx += 1
            except StopIteration as stop_e:
                send_opt = dist.isend(tensor=torch.zeros(0), dst=1)
                send_opt.wait()
                while output_queue.qsize() > 0:
                    #backward_event.wait()
                    optimizer.zero_grad()
                    grad = torch.zeros([args.batch_size, 128, 16, 16])
                    dist.recv(tensor=grad, src=1)
                    outputs = output_queue.get()
                    outputs.backward(grad.cuda(0))
                    optimizer.step()
                    #backward_event.clear()
                break
    elif dist.get_rank() == 1:
        batch_idx = 0
        train_loss = 0
        correct = 0
        total = 0
        criterion.cuda(1)
        while True:
            print("while........................")
            try:
                rec_val = torch.zeros([args.batch_size, 128, 16, 16])
                dist.recv(tensor=rec_val, src=0)
                print("recv.......")
            except RuntimeError as error:
                print("runtime........................")
                #e.wait()
                break
            rec_val = rec_val.cuda(1)
            rec_val.requires_grad_()
            optimizer.zero_grad()
            outputs = layer(rec_val)
            targets = targets_queue.get(block=True, timeout=2)
            targets = torch.from_numpy(targets).cuda(1)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            progress_bar(
                batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                (train_loss /
                 (batch_idx + 1), 100. * correct / total, correct, total))
            if not backward_event.is_set():
                print("set.....")
                backward_event.set()
            send_opt = dist.isend(tensor=rec_val.grad.cpu(), dst=0)
            print("send.....")
            if batch_idx % 10 == 0:
                logger.error("train:" + str(train_loss / (batch_idx + 1)))

            batch_idx += 1

示例#5

显示文件

文件： ours.py 项目： CoderXDY/XdyPipe

def train(layer, logger, args, grad_queue, targets_queue, e, data_size, trainloader, start_event):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(layer.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    optimizer.zero_grad()
    layer.train()

    def backward():
        start_event.wait()
        batch_idx = 0
        while True:
            try:
                grad = grad_queue.get(block=True, timeout=1)
                #quantize_package = grad_queue.get(block=True, timeout=1)
                #grad = dequantize(quantize_package, [args.batch_size, 256, 4, 4])
                #grad = grad.cuda()
                #grad = torch.from_numpy(grad)
                #grad = dense(grad, [args.batch_size, 256, 4, 4]).cuda(0)
                #grad = torch.from_numpy(grad).cuda(0).float()
                grad = torch.from_numpy(grad.astype(np.float32)).cuda(0)
                #grad = dequantize(grad, [args.batch_size, 256, 4, 4])
                grad = dequantize(grad)

            except Empty as empty:
                print("backward empty.....")
                break
            loss = outputs_queue.get(block=False)
            loss.backward(grad)
            if batch_idx % args.buffer_size == 0:
                optimizer.step()
                optimizer.zero_grad()
            batch_idx += 1


    if dist.get_rank() == 0:
        criterion.cuda(0)
        outputs_queue = ThreadQueue(args.buffer_size)
        back_process = Process(target=backward)
        back_process.start()
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            print("batch: " + str(batch_idx))
            inputs, targets = inputs.cuda(0), targets
            outputs = layer(inputs)
            send_opt = dist.isend(tensor=q_act(outputs, char=True).cpu(), dst=1)
            # if batch_idx < 30:
            send_opt.wait()
            targets_queue.put(targets.numpy())
            outputs_queue.put(outputs)
            print("send....")
        send_opt = dist.isend(tensor=torch.zeros(0), dst=1)
        send_opt.wait()
        back_process.join()
        e.set()
    elif dist.get_rank() == 1:
        batch_idx = 0
        train_loss = 0
        correct = 0
        total = 0
        criterion.cuda(1)
        residual = None
        while True:
            try:
                rec_val = torch.zeros([args.batch_size, 256, 4, 4], dtype=torch.int8)
                dist.recv(tensor=rec_val, src=0)
            except RuntimeError as error:
                e.wait()
                break
            rec_val = dq_act(rec_val)
            rec_val = rec_val.cuda(1)
            rec_val.requires_grad_()
            outputs = layer(rec_val)
            targets = targets_queue.get(block=True, timeout=2)
            targets = torch.from_numpy(targets).cuda(1)
            loss = criterion(outputs, targets)
            loss.backward()
            #spare_grad, residual = sparse2(rec_val.grad, 0.01, True, residual)
            #grad_queue.put(spare_grad.cpu().numpy())
            #print('before grad put')
            #grad_queue.put(rec_val.grad.cpu().half().numpy())
            #print('after grad put')
            #quantize_grad = quantize(rec_val.grad, num_bits=args.bit, half=True)
            #grad_queue.put(quantize_grad.cpu().numpy())
            #quantize_package = quantize(rec_val.grad, num_bits=args.bit, byte=True)
            #grad_queue.put(quantize_package)
            quantize_grad = quantize(rec_val.grad)
            grad_queue.put(quantize_grad.cpu().numpy().astype(np.int8))
            if batch_idx == 0:
                start_event.set()
            if batch_idx % args.buffer_size == 0:
                optimizer.step()
                train_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
                progress_bar(batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                             % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
                optimizer.zero_grad()
            else:
                progress_bar(batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                             % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
            #if batch_idx % 10 == 0:
            logger.error("train:" + str(train_loss / (batch_idx + 1)))

            batch_idx += 1
            acc_str = "tacc: %.3f" % (100. * correct / total,)
        logger.error(acc_str)

示例#6

显示文件

    DstSession = sessionmaker(bind=db_engine, autoflush=False)
    dstssn = DstSession()

    if True:  # settings.TWEETS:
        try:
            command = sys.argv[1]
            print(command)
        except IndexError:
            command = ''

        if command == 'location':
            ISLOCATION = True
        else:
            ISLOCATION = False

        user_queue = ThreadQueue()

        # load excel file for input
        fname = 'word_list.xlsx'
        wb = load_workbook(fname)
        ws = wb.active
        ii = i = 2

        while True:
            if not ws.cell(row=i, column=1).value:
                break
            t1 = str(ws.cell(row=i, column=4).value).lower().strip(' ')
            t2 = str(ws.cell(row=i, column=5).value).lower().strip(' ')
            t1 = re.sub(' 00:00:00', '', t1)
            t2 = re.sub(' 00:00:00', '', t2)
            permno = str(ws.cell(row=i, column=1).value).lower().strip(' ')

示例#7

显示文件

文件： worker.py 项目： CoderXDY/XdyPipe

def train(layer, logger, args, grad_queue, targets_queue, e, data_size,
          trainloader):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(layer.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    optimizer.zero_grad()
    layer.train()

    def backward():
        batch_idx = 0
        while True:
            try:
                grad = grad_queue.get(block=True, timeout=1)
                #grad = torch.from_numpy(grad)
                #grad = dense(grad, [args.batch_size, 128, 16, 16]).cuda(0)
                grad = torch.from_numpy(grad).cuda(0).float()
            except Empty as empty:
                print("backward empty.....")
                break
            loss = outputs_queue.get(block=False)
            loss.backward(grad)
            if batch_idx % 2 == 0:
                optimizer.step()
                optimizer.zero_grad()
            batch_idx += 1

    if dist.get_rank() == 0:
        criterion.cuda(0)
        start_flag = True
        outputs_queue = ThreadQueue(20)
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            print("batch: " + str(batch_idx))
            inputs, targets = inputs.cuda(0), targets
            outputs = layer(inputs)
            outputs_queue.put(outputs)
            print('put......')
            targets_queue.put(targets.numpy())
            print(outputs.cpu().size())
            send_opt = dist.isend(tensor=outputs.cpu(), dst=1)
            if batch_idx % 10 == 0:
                send_opt.wait()
            #send_opt.wait()
            print("send....")
            if start_flag and grad_queue.qsize() > 0:
                start_flag = False
                back_process = Process(target=backward)
                back_process.start()
        send_opt = dist.isend(tensor=torch.zeros(0), dst=1)
        send_opt.wait()
        back_process.join()
        e.set()
    elif dist.get_rank() == 1:
        batch_idx = 0
        train_loss = 0
        correct = 0
        total = 0
        criterion.cuda(1)
        residual = None
        while True:
            try:
                rec_val = torch.zeros([args.batch_size, 128, 16, 16])
                dist.recv(tensor=rec_val, src=0)
                print("recv.......")
            except RuntimeError as error:
                e.wait()
                break
            rec_val = rec_val.cuda(1)
            rec_val.requires_grad_()
            outputs = layer(rec_val)
            targets = targets_queue.get(block=True, timeout=2)
            targets = torch.from_numpy(targets).cuda(1)
            loss = criterion(outputs, targets)
            loss.backward()
            #spare_grad, residual = sparse2(rec_val.grad, 0.01, True, residual)
            #grad_queue.put(spare_grad.cpu().numpy())
            grad_queue.put(rec_val.grad.cpu().half().numpy())
            if batch_idx % 2 == 0:
                optimizer.step()
                train_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
                progress_bar(
                    batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                    (train_loss /
                     (batch_idx + 1), 100. * correct / total, correct, total))
                optimizer.zero_grad()
            else:
                progress_bar(
                    batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                    (train_loss /
                     (batch_idx + 1), 100. * correct / total, correct, total))
            if batch_idx % 10 == 0:
                logger.error("train:" + str(train_loss / (batch_idx + 1)))

            batch_idx += 1

示例#8

显示文件

def train(layer, logger, shapes, args, e, data_size, trainloader):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(layer.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    optimizer.zero_grad()
    layer.train()
    batch_idx = 0

    def backward_rank1():
        residual = None
        batch_idx = 0
        ten_len = tensor_len(shapes[1])
        #grad_recv1 = torch.zeros(shapes[1], dtype=torch.int8)
        #grad_recv1 = torch.HalfTensor(torch.Size(shapes[1]))
        grad_recv1 = torch.zeros(ten_len + 2)  #shapes[1]
        dist.recv(tensor=grad_recv1, src=2)
        while True:
            print(" backward batch_idx:" + str(batch_idx))
            #grad_recv1 = unpack(grad_recv1.cuda(), shapes[1])
            #grad_recv1 = dequantize(grad_recv1.cuda().float())
            grad_recv1 = de_piecewise_quantize(grad_recv1.cuda(), shapes[1])
            #grad_recv1 = grad_recv1.cuda()
            try:
                inputs, outputs = outputs_queue.get(block=True, timeout=4)
            except Empty:
                print("empty........")
                break
            inputs.requires_grad_()
            outputs.backward(grad_recv1)

            #inputs_grad = quantize(inputs.grad, char=True).cpu()
            #inputs_grad, residual = compress(inputs.grad, residual=residual)
            inputs_grad, residual = piecewise_quantize(inputs.grad,
                                                       logger=logger,
                                                       residual=residual)
            #inputs_grad = inputs_grad.cpu()
            #inputs_grad = inputs.grad.cpu()
            if batch_idx % 2 == 0:
                optimizer.step()
                optimizer.zero_grad()
            batch_idx += 1
            if data_size == batch_idx:
                transfer2(3, inputs_grad, None)
                print("backend In send..")
                break
            grad_recv1 = transfer2(3, inputs_grad, ten_len + 2)  #shapes[1]
            print("backward send.......")
        print("backard end....")

    def backward_rank0(semaphore):
        batch_idx = 0
        ten_len = tensor_len(shapes[0])
        grad_recv = torch.zeros(ten_len + 2)
        #grad_recv = torch.zeros(shapes[0], dtype=torch.int8)
        #grad_recv = torch.HalfTensor(torch.Size(shapes[0]))
        dist.recv(tensor=grad_recv, src=1)
        while True:
            #semaphore.release()

            #grad_recv = dequantize(grad_recv.cuda().float())
            grad_recv = de_piecewise_quantize(grad_recv.cuda(), shapes[0])
            #grad_recv = unpack(grad_recv.cuda(), shapes[0])
            print(" backwardbatch_idx:" + str(batch_idx))
            # grad_recv = grad_recv.cuda()
            try:
                loss = outputs_queue.get(block=True, timeout=4)
            except Empty:
                print("empty........")
                break

            loss.backward(grad_recv)
            if batch_idx % 2 == 0:
                # print("step: " + str(batch_idx))
                optimizer.step()
                optimizer.zero_grad()
            batch_idx += 1
            if data_size == batch_idx:
                print("eq...")
                break
            grad_recv = transfer2(4, None, ten_len + 2)  #shapes[0]
            print("backward send.....")
        print("backward end..")

    if dist.get_rank() == 0:
        outputs_queue = ThreadQueue(args.buffer_size)
        semaphore = Semaphore(args.buffer_size)
        back_process = Process(target=backward_rank0, args=(semaphore, ))
        back_process.start()
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            #semaphore.acquire()
            print("batch: " + str(batch_idx))
            inputs = inputs.cuda()
            outputs = layer(inputs)  #
            outputs_queue.put(outputs)
            outputs = q_act(outputs, char=True)
            transfer(dist.get_rank(), outputs.cpu(), None)
            print("send........")
        print("start to end....")

        back_process.join()
        e.set()
        print("end....")

    elif dist.get_rank() == 1:

        outputs_queue = ThreadQueue(args.buffer_size)
        back_process = Process(target=backward_rank1, args=())

        rec_val = torch.zeros(shapes[0], dtype=torch.int8)
        dist.recv(tensor=rec_val, src=0)
        #fix bug..
        back_process.start()
        for index, (_, targets) in enumerate(trainloader):
            print("batch_idx:" + str(index))
            rec_val = dq_act(rec_val)
            rec_val = rec_val.cuda()
            rec_val.requires_grad_()
            outputs = layer(rec_val)
            outputs_queue.put([rec_val, outputs])
            outputs = q_act(outputs, char=True)
            if index == data_size - 1:
                transfer(dist.get_rank(), outputs.cpu(), None)
                print("the last send........")
                continue
            rec_val = transfer(dist.get_rank(), outputs.cpu(), shapes[0])
            print("send.................")
        print("start to end....")
        back_process.join()

        e.wait()
        print("end......")

    elif dist.get_rank() == 2:

        rec_val = None
        residual = None
        train_loss = 0
        correct = 0
        total = 0
        correct_5 = 0
        correct_1 = 0
        criterion.cuda()
        if not torch.is_tensor(rec_val):
            rec_val = torch.zeros(shapes[1], dtype=torch.int8)
            dist.recv(tensor=rec_val, src=1)
        for batch_idx, (_, targets) in enumerate(trainloader):
            rec_val = dq_act(rec_val)
            rec_val = rec_val.cuda()
            rec_val.requires_grad_()
            outputs = layer(rec_val)
            # start to backward....
            targets = targets.cuda()
            loss = criterion(outputs, targets)
            loss.backward()
            #quantize_grad = quantize(rec_val.grad, char=True).cpu()
            # for_view = rec_val.grad.view(-1).tolist()
            # logger.error("grad: " + str(for_view))
            #quantize_grad, residual = compress(rec_val.grad, residual=residual)
            quantize_grad, residual = piecewise_quantize(rec_val.grad,
                                                         logger=logger,
                                                         residual=residual)
            #quantize_grad = quantize_grad.cpu()
            #quantize_grad = rec_val.grad.cpu()
            if batch_idx % 2 == 0:
                optimizer.step()
                train_loss += loss.item()
                #_, predicted = outputs.max(1)
                #total += targets.size(0)
                #correct += predicted.eq(targets).sum().item()
                _, predicted = outputs.topk(5, 1, True, True)
                total += targets.size(0)
                targets = targets.view(targets.size(0),
                                       -1).expand_as(predicted)
                correct = predicted.eq(targets).float()
                correct_5 += correct[:, :5].sum()
                correct_1 += correct[:, :1].sum()
                progress_bar(
                    batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                    (train_loss / (batch_idx + 1), 100. * correct_5 / total,
                     correct_5, total))
                optimizer.zero_grad()
            else:
                progress_bar(
                    batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                    (train_loss / (batch_idx + 1), 100. * correct_5 / total,
                     correct_5, total))
            logger.error("train:" + str(train_loss / (batch_idx + 1)))
            acc_str = "tacc1: %.3f" % (100. * correct_1 / total, )
            logger.error(acc_str)
            acc_str5 = "tacc5: %.3f" % (100. * correct_5 / total, )
            logger.error(acc_str5)
            if batch_idx == data_size - 1:
                transfer(dist.get_rank(), quantize_grad, None)
                continue
            rec_val = transfer(dist.get_rank(), quantize_grad, shapes[1])

        #print("\n start to end....")
        e.wait()
        print("end....")

示例#9

显示文件

def train(layer, logger, args, grad_queue, grad_queue2, targets_queue, e, data_size, trainloader, start_event, start_event2):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(layer.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    optimizer.zero_grad()
    layer.train()

    def backward_rank0(semaphore, start_event2):
        start_event2.wait()
        batch_idx = 0
        while True:
            try:
                semaphore.release()
                print("before grad recv")
                grad_recv = torch.zeros([args.batch_size, 256, 4, 4], dtype=torch.int8)
                dist.recv(tensor=grad_recv, src=1)
                print("after grad recv...")
            except RuntimeError as error:
                print("backward runtime error")
                break
            grad_recv = dequantize(grad_recv.cuda(0).float())
            loss = outputs_queue.get(block=False)
            loss.backward(grad_recv)
            if batch_idx % args.buffer_size == 0:
                optimizer.step()
                optimizer.zero_grad()
            batch_idx += 1


    def backward_rank1(semaphore, start_event, start_event2):

        start_event.wait()

        batch_idx = 0
        while True:
            try:
                #semaphore.release()
                print("before grad recv...")
                grad_recv1 = torch.zeros([args.batch_size, 512, 2, 2], dtype=torch.int8)
                dist.recv(tensor=grad_recv1, src=2)
                print("after grad recv.....")
            except RuntimeError as error:
                print("backward runtime error")
                send_opt = dist.isend(tensor=torch.zeros(0), dst=0)
                send_opt.wait()
                break
            grad_recv1 = dequantize(grad_recv1.cuda(0).float())
            inputs, outputs = outputs_queue.get(block=False)
            inputs.requires_grad_()
            outputs.backward(grad_recv1)
            if batch_idx % args.buffer_size == 0:
                optimizer.step()
                optimizer.zero_grad()

            inputs_grad = quantize(inputs.grad, char=True).cpu()
            print(inputs_grad.size())
            if batch_idx == 0:
                start_event2.set()
            #send_opt = dist.isend(tensor=inputs_grad, dst=0)
            #send_opt.wait()
            dist.send(tensor=inputs_grad, dst=0)
            batch_idx += 1


    if dist.get_rank() == 0:
        criterion.cuda(0)
        outputs_queue = ThreadQueue(args.buffer_size)
        semaphore = Semaphore(args.buffer_size)
        back_process = Process(target=backward_rank0, args=(semaphore, start_event2))
        back_process.start()
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            semaphore.acquire()
            print("batch: " + str(batch_idx))
            inputs, targets = inputs.cuda(0), targets
            outputs = layer(inputs)
            targets_queue.put(targets.numpy())
            outputs_queue.put(outputs)
            send_opt = dist.isend(tensor=q_act(outputs, char=True).cpu(), dst=1)
            send_opt.wait()

            print("send....")
        print("start to end..")
        send_opt = dist.isend(tensor=torch.zeros(0), dst=1)
        send_opt.wait()
        back_process.join()
        e.set()
    elif dist.get_rank() == 1:
        batch_idx = 0
        criterion.cuda(0)
        outputs_queue = ThreadQueue(10)
        semaphore = Semaphore(args.buffer_size - 1)
        back_process = Process(target=backward_rank1, args=(semaphore, start_event, start_event2))
        back_process.start()
        while True:
            try:
                print("before semaphore......")
                #semaphore.acquire()
                rec_val = torch.zeros([args.batch_size, 256, 4, 4], dtype=torch.int8)
                dist.recv(tensor=rec_val, src=0)
                print("after recv.....")
            except RuntimeError as error:
                print("runtime errror")
                send_opt = dist.isend(tensor=torch.zeros(0), dst=2)
                send_opt.wait()
                back_process.join()
                e.wait()
                break
            print("before dq...")
            rec_val = dq_act(rec_val)
            rec_val = rec_val.cuda(0)
            rec_val.requires_grad_()
            print("before output......")
            outputs = layer(rec_val)
            # if batch_idx % args.buffer_size == 0:
            #     optimizer.step()
            #     optimizer.zero_grad()
            print("before queue")
            outputs_queue.put([rec_val, outputs])
            print("after queue")
            #send_opt = dist.isend(tensor=q_act(outputs, char=True).cpu(), dst=2)
            #send_opt.wait()
            dist.send(tensor=q_act(outputs, char=True).cpu(), dst=2)
            batch_idx += 1
            print("send end...")

    elif dist.get_rank() == 2:
        batch_idx = 0
        train_loss = 0
        correct = 0
        total = 0
        criterion.cuda(0)

        while True:
            try:
                #print("before recv....")
                rec_val = torch.zeros([args.batch_size, 512, 2, 2], dtype=torch.int8)
                dist.recv(tensor=rec_val, src=1)
                #print("after recv.....")
            except RuntimeError as error:
                #traceback.format_exc(error)
                send_opt = dist.isend(tensor=torch.zeros(0), dst=1)
                send_opt.wait()
                e.wait()
                break
            rec_val = dq_act(rec_val)
            rec_val = rec_val.cuda(0)
            rec_val.requires_grad_()
            outputs = layer(rec_val)
            targets = targets_queue.get(block=True, timeout=2)
            targets = torch.from_numpy(targets).cuda(0)
            loss = criterion(outputs, targets)
            loss.backward()

            if batch_idx % args.buffer_size == 0:
                optimizer.step()
                train_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
                progress_bar(batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                             % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
                optimizer.zero_grad()
            else:
                progress_bar(batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                             % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
            #if batch_idx % 10 == 0:
            logger.error("train:" + str(train_loss / (batch_idx + 1)))
            acc_str = "tacc: %.3f" % (100. * correct / total,)
            logger.error(acc_str)
            if batch_idx == 0:
                start_event.set()
            quantize_grad = quantize(rec_val.grad, char=True)
            #send_opt = dist.isend(tensor=quantize_grad.cpu(), dst=1)
            #send_opt.wait()
            dist.send(tensor=quantize_grad.cpu(), dst=1)
            batch_idx += 1