Exemplo n.º 1
0
    def __init__(self, data):
        self.name = util.get_string(data, "name")
        self.pid = util.get_string(data, "pid")

        self.start_time = util.get_date(data, "start_time")
        self.time_created = util.get_date(data, "time_created")
        self.time_started = util.get_date(data, "time_started")
        self.time_completed = util.get_date(data, "time_completed")

        self.url = util.get_string(data, "url")
        self.url_status = util.get_string(data, "url_status")
        self.url_headers = util.get_string(data, "url_headers")
        self.url_content = util.get_string(data, "url_content")
        self.status = util.get_string(data, "status")

        util.ensure(self.name, "name")
        util.ensure(self.start_time, "start_time")
        util.ensure(self.url, "url")
        util.ensure(self.status, "status")
Exemplo n.º 2
0
    def __init__(self, data):
        self.name = util.get_string(data, "name")
        self.pid = util.get_string(data, "pid")

        self.start_time = util.get_date(data, "start_time")
        self.time_created = util.get_date(data, "time_created")
        self.time_started = util.get_date(data, "time_started")
        self.time_completed = util.get_date(data, "time_completed")

        self.url = util.get_string(data, "url")
        self.url_status = util.get_string(data, "url_status")
        self.url_headers = util.get_string(data, "url_headers")
        self.url_content = util.get_string(data, "url_content")
        self.status = util.get_string(data, "status")

        util.ensure(self.name, "name")
        util.ensure(self.start_time, "start_time")
        util.ensure(self.url, "url")
        util.ensure(self.status, "status")
Exemplo n.º 3
0
def printRec(rType, rLen, rData, rCount, rOffset):

    #attempt to lookup the description for this record
    try:
        rec = recDict[rType]
        rName, rDesc, rFmt, rFNames = rec

        print "[%d]Record %s [%#x (%d)] offset %#x (%d), len %#x (%d) (%s)" % (
            rCount, rName, rType, rType, rOffset, rOffset, rLen, rLen, rDesc)

        fieldCount = offset = 0

        #skip last element due to trailing '%' in format string
        for fmt in rFmt.split("%")[:-1]:

            nLeft = rLen - offset
            fieldName = rFNames[fieldCount]

            if fmt == "1":
                if ensure(fieldName, 1, nLeft):
                    val = struct.unpack("B", rData[offset:offset + 1])[0]
                    print "        BYTE %s = %#x (%d)" % (fieldName, val, val)
                    offset += 1
            elif fmt == "2":
                if ensure(fieldName, 2, nLeft):
                    val = struct.unpack("H", rData[offset:offset + 2])[0]
                    print "        WORD %s = %#x (%d)" % (fieldName, val, val)
                    offset += 2
            elif fmt == "4":
                if ensure(fieldName, 4, nLeft):
                    val = struct.unpack("I", rData[offset:offset + 4])[0]
                    print "        DWORD %s = %#x (%d)" % (fieldName, val, val)
                    offset += 4
            elif fmt[0] == "f":
                dataLen = int(fmt[1:])
                if dataLen > nLeft:
                    print "        Warning, field %s is longer (%d) than data left (%d). Dumping what's left:" % \
                                (fieldName, dataLen, nLeft)
                    sys.stdout.write(hexdump(rData[offset:], indent=12))
                else:
                    print "        Field %s is %#x (%d) bytes, dumping:" % (
                        fieldName, dataLen, dataLen)
                    sys.stdout.write(
                        hexdump(rData[offset:offset + dataLen], indent=12))

                offset += dataLen
            elif fmt == "v":
                print "        Field '%s' is variable length, dumping rest of record:" % (
                    fieldName)
                sys.stdout.write(hexdump(rData[offset:], indent=12))
                break
            elif fmt[0] == "[":
                try:
                    handler = extraPrinters[fmt]
                    o = handler(rData[offset:], nLeft)
                    if o == -1:
                        break
                    else:
                        offset += o
                except KeyError:
                    print "Error: no handler defined for custom format '%s'" % (
                        fmt)
            else:
                print "ERROR:Invalid format in record format string [%s]. Developer error!!" % (
                    f)
                sys.exit(1)

            fieldCount += 1
    except KeyError:
        print "WARNING:No record description for id %#x (%d) len %#x (%d)" % (
            rType, rType, rLen, rLen)
        sys.stdout.write(hexdump(rData, indent=8))

    return
Exemplo n.º 4
0
def go(arg):

    tbw = SummaryWriter(log_dir=arg.tb_dir)

    transform = Compose([
        Lambda(lambda x: CenterCrop(min(x.size))(x)),
        Resize(size=(arg.img_size, arg.img_size)),
        ToTensor()
    ])

    imdir = arg.data_dir + os.sep + 'val2017'
    anfile = arg.data_dir + os.sep + 'annotations' + os.sep + 'captions_val2017.json'

    coco_data = coco.CocoCaptions(root=imdir,
                                  annFile=anfile,
                                  transform=transform)

    ## Make a dictionary

    util.ensure(arg.cache_dir)
    if os.path.isfile(arg.cache_dir + os.sep + 'i2w.pkl'):
        with open(arg.cache_dir + os.sep + 'i2w.pkl', 'rb') as file:
            i2w = pickle.load(file)
        with open(arg.cache_dir + os.sep + 'w2i.pkl', 'rb') as file:
            w2i = pickle.load(file)
        print('Word indices loaded.')
    else:
        print('Creating word indices')  # Why is this so slow?

        dist = Counter()
        for i in tqdm.trange(len(coco_data)):
            for caption in coco_data[i][1]:
                dist.update(util.tokenize(caption))

        vocab = dist.most_common(arg.max_vocab - len(EXTRA_SYMBOLS))

        i2w = EXTRA_SYMBOLS + [w[0] for w in vocab]
        w2i = {word: ix for ix, word in enumerate(i2w)}

        with open(arg.cache_dir + os.sep + 'i2w.pkl', 'wb') as file:
            pickle.dump(i2w, file)
        with open(arg.cache_dir + os.sep + 'w2i.pkl', 'wb') as file:
            pickle.dump(w2i, file)

    vocab_size = len(i2w)
    print('vocabulary size', vocab_size)
    print('top 100 words:', i2w[:100])

    def decode(indices):

        sentence = ''
        for id in indices:
            # if id == PAD:
            #     break
            sentence += i2w[id] + ' '

        return sentence

    ## Set up the models
    embedding = torch.nn.Embedding(num_embeddings=vocab_size,
                                   embedding_dim=arg.embedding_size)

    if arg.mode != Mode.style:
        img_enc = models.ImEncoder(in_size=(arg.img_size, arg.img_size),
                                   zsize=arg.latent_size)
        img_dec = models.ImDecoder(in_size=(arg.img_size, arg.img_size),
                                   zsize=arg.latent_size)

        seq_enc = models.SeqEncoder(vocab_size=vocab_size,
                                    embedding=embedding,
                                    zsize=arg.latent_size)
        seq_dec = models.SeqDecoder(vocab_size=vocab_size,
                                    embedding=embedding,
                                    zsize=arg.latent_size)

        mods = [img_enc, img_dec, seq_enc, seq_dec]
    else:
        img_enc = models.ImEncoder(in_size=(arg.img_size, arg.img_size),
                                   zsize=arg.latent_size)
        img_sty = models.ImEncoder(in_size=(arg.img_size, arg.img_size),
                                   zsize=arg.latent_size)
        img_dec = models.ImDecoder(in_size=(arg.img_size, arg.img_size),
                                   zsize=arg.latent_size * 2)

        seq_enc = models.SeqEncoder(vocab_size=vocab_size,
                                    embedding=embedding,
                                    zsize=arg.latent_size)
        seq_sty = models.SeqEncoder(vocab_size=vocab_size,
                                    embedding=embedding,
                                    zsize=arg.latent_size)
        seq_dec = models.SeqDecoder(vocab_size=vocab_size,
                                    embedding=embedding,
                                    zsize=arg.latent_size * 2)

        mods = [img_enc, img_dec, img_sty, seq_enc, seq_dec, seq_sty]

    if torch.cuda.is_available():
        for model in mods:
            model.cuda()

    #- The standard dataloader approach doesn't seem to work with the captions, so we'll do our own batching.
    #  It's a little slower, probably, but it won't be the bottleneck
    params = []
    for model in mods:
        params.extend(model.parameters())
    optimizer = Adam(params, lr=arg.lr)

    instances_seen = 0

    for e in range(arg.epochs):
        print('epoch', e)
        for fr in tqdm.trange(0, len(coco_data), arg.batch_size):
            if arg.instance_limit is not None and fr > arg.instance_limit:
                break

            to = min(len(coco_data), fr + arg.batch_size)

            images = []
            captions = []

            for i in range(fr, to):
                images.append(coco_data[i][0].unsqueeze(0))
                captions.append(random.choice(
                    coco_data[i]
                    [1]))  # we choose one of the available captions at random

            imbatch = torch.cat(images, dim=0)
            b, c, w, h = imbatch.size()

            capbatch = []  # to integer sequence
            for caption in captions:
                capbatch.append(util.intseq(util.tokenize(caption), w2i))

            capbatch, lengths = util.pad(capbatch)

            # Created shifted versions
            b, s = capbatch.size()

            # Input for the decoder
            cap_teacher = torch.cat(
                [torch.ones(b, 1, dtype=torch.long), capbatch], dim=1)
            cap_out = torch.cat(
                [capbatch, torch.zeros(b, 1, dtype=torch.long)], dim=1)

            lengths = torch.LongTensor(lengths)

            if torch.cuda.is_available():
                imbatch = imbatch.cuda()

                capbatch = capbatch.cuda()
                cap_teacher = cap_teacher.cuda()
                cap_out = cap_out.cuda()

                lengths = lengths.cuda()

            imbatch = Variable(imbatch)
            capbatch = Variable(capbatch)
            cap_teacher = Variable(cap_teacher)
            cap_out = Variable(cap_out)
            lengths = Variable(lengths)

            zimg = img_enc(imbatch)
            zcap = seq_enc(capbatch, lengths)

            kl_img = util.kl_loss(*zimg)
            kl_cap = util.kl_loss(*zcap)

            zimg_sample = util.sample(*zimg)
            zcap_sample = util.sample(*zcap)

            if arg.mode == Mode.style:
                zimg_sty = img_sty(imbatch)
                zcap_sty = seq_sty(capbatch, lengths)

                kl_img_sty = util.kl_loss(*zimg_sty)
                kl_cap_sty = util.kl_loss(*zcap_sty)

                zimg_sample_sty = util.sample(*zimg_sty)
                zcap_sample_sty = util.sample(*zcap_sty)

                zimg_sample = torch.cat([zimg_sample, zimg_sample_sty], dim=1)
                zcap_sample = torch.cat([zcap_sample, zcap_sample_sty], dim=1)

            rec_imgimg = img_dec(zimg_sample)
            rl_imgimg = binary_cross_entropy(rec_imgimg, imbatch,
                                             reduce=False).view(b,
                                                                -1).sum(dim=1)

            rec_capcap = seq_dec(zcap_sample, cap_teacher,
                                 lengths + 1).transpose(1, 2)
            rl_capcap = nll_loss(rec_capcap, cap_out,
                                 reduce=False).view(b, -1).sum(dim=1)

            if arg.mode != Mode.independent:
                rec_capimg = img_dec(zcap_sample)
                rl_capimg = binary_cross_entropy(rec_capimg,
                                                 imbatch,
                                                 reduce=False).view(
                                                     b, -1).sum(dim=1)

                rec_imgcap = seq_dec(zimg_sample, cap_teacher,
                                     lengths + 1).transpose(1, 2)
                rl_imgcap = nll_loss(rec_imgcap, cap_out,
                                     reduce=False).view(b, -1).sum(dim=1)

            loss_img = rl_imgimg + kl_img
            loss_cap = rl_capcap + kl_cap

            if arg.mode == Mode.coupled:
                loss_img = loss_img + rl_capimg + kl_img
                loss_cap = loss_cap + rl_imgcap + kl_cap

            if arg.mode == Mode.style:
                loss_img = loss_img + kl_img_sty
                loss_cap = loss_cap + kl_cap_sty

            loss = loss_img.mean() + loss_cap.mean()

            #- backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            instances_seen += b

            tbw.add_scalar('score/img/kl', float(kl_img.mean()),
                           instances_seen)
            tbw.add_scalar('score/imgimg/rec', float(rl_imgimg.mean()),
                           instances_seen)
            tbw.add_scalar('score/cap/kl', float(kl_cap.mean()),
                           instances_seen)
            tbw.add_scalar('score/capcap/rec', float(rl_capcap.mean()),
                           instances_seen)
            tbw.add_scalar('score/loss', float(loss), instances_seen)

            if arg.mode != Mode.independent:
                tbw.add_scalar('score/capimg/rec', float(rl_capimg.mean()),
                               instances_seen)
                tbw.add_scalar('score/imgcap/rec', float(rl_imgcap.mean()),
                               instances_seen)

        # Interpolate
        zpairs = []
        for r in range(REP):

            print('Interpolation, repeat', r)

            l = arg.latent_size if arg.mode != Mode.style else arg.latent_size * 2
            z1, z2 = torch.randn(2, l)
            if torch.cuda.is_available():
                z1, z2 = z1.cuda(), z2.cuda()

            zpairs.append((z1, z2))

            zs = util.slerp(z1, z2, 10)

            print('== sentences (temp={}) =='.format(TEMPS[r]))
            sentences = seq_dec.sample(z=zs, temperature=TEMPS[r])

            for s in sentences:
                print('   ', decode(s))

        print('== images ==')

        util.interpolate(zpairs, img_dec, name='interpolate.{}'.format(e))
Exemplo n.º 5
0
def printRec(rType, rLen, rData, rCount, rOffset):
    
    #attempt to lookup the description for this record
    try:
        rec = recDict[rType]
        rName, rDesc, rFmt, rFNames = rec

        print "[%d]Record %s [%#x (%d)] offset %#x (%d), len %#x (%d) (%s)" % (
                rCount, rName, rType, rType, rOffset, rOffset, rLen, rLen, rDesc)
        
        fieldCount = offset = 0
        
        #skip last element due to trailing '%' in format string
        for fmt in rFmt.split("%")[:-1]:

            nLeft = rLen - offset
            fieldName = rFNames[fieldCount]

            if fmt == "1":
                if ensure(fieldName, 1, nLeft):
                    val = struct.unpack("B", rData[offset:offset+1])[0]
                    print "        BYTE %s = %#x (%d)" % (fieldName, val, val)
                    offset += 1
            elif fmt == "2":
                if ensure(fieldName, 2, nLeft):
                    val = struct.unpack("H", rData[offset:offset+2])[0]
                    print "        WORD %s = %#x (%d)" % (fieldName, val, val)
                    offset += 2
            elif fmt == "4":
                if ensure(fieldName, 4, nLeft):
                    val = struct.unpack("I", rData[offset:offset+4])[0]
                    print "        DWORD %s = %#x (%d)" % (fieldName, val, val)
                    offset += 4
            elif fmt[0] == "f":
                dataLen = int(fmt[1:])
                if dataLen > nLeft:
                    print "        Warning, field %s is longer (%d) than data left (%d). Dumping what's left:" % \
                                (fieldName, dataLen, nLeft)
                    sys.stdout.write(hexdump(rData[offset:], indent=12))
                else:
                    print "        Field %s is %#x (%d) bytes, dumping:" % (fieldName, dataLen,
                                                                        dataLen)
                    sys.stdout.write(hexdump(rData[offset:offset+dataLen], indent=12))
                
                offset += dataLen
            elif fmt == "v":
                print "        Field '%s' is variable length, dumping rest of record:" % (fieldName)
                sys.stdout.write(hexdump(rData[offset:], indent=12))
                break
            elif fmt[0] == "[":
                try:
                    handler = extraPrinters[fmt]
                    o = handler(rData[offset:], nLeft)
                    if o == -1:
                        break
                    else:
                        offset += o
                except KeyError:
                    print "Error: no handler defined for custom format '%s'" % (fmt)
            else:
                print "ERROR:Invalid format in record format string [%s]. Developer error!!" % (f)
                sys.exit(1)
            
            fieldCount += 1
    except KeyError:
        print "WARNING:No record description for id %#x (%d) len %#x (%d)" % (rType, rType, rLen, rLen)
        sys.stdout.write(hexdump(rData, indent=8))
    
    return