示例#1
0
            t1 = time
            while (time < t1 + duty * period):
                power = increase_power(power, peak_power, slew_rate)
                res = calc_req(power, 1.0)
                of.write(",".join([str(time), str(res), str(power)]) + "\n")
                time += timestep
    return True


total = len(period) * len(slew_rate) * len(amplitude)
total = 0
for i in range(len(period)):
    for j in range(len(slew_rate)):
        for k in range(len(amplitude)):
            if (arg_check(period[i], slew_rate[j], min_power,
                          min_power + amplitude[k], timestep, duration,
                          outpath, i, j, k)):
                total += 1
pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=total).start()

total = 0
for i in range(len(period)):
    for j in range(len(slew_rate)):
        for k in range(len(amplitude)):
            if (trace(period[i], slew_rate[j], min_power,
                      min_power + amplitude[k], timestep, duration, outpath, i,
                      j, k)):
                total += 1
            pbar.update(total)
pbar.finish()
示例#2
0
def load_sequences_into_memory(dataset_top_dir, type_name):
    X_data = []
    y_data = []

    num_seq_dirs = 0
    class_wise_totals = {}

    data_set_type_dir = os.path.join(dataset_top_dir, type_name)
    #print('Processing ' + data_set_type_dir)
    class_names = os.listdir(data_set_type_dir)
    #print('Found class_names ' + str(class_names))
    for class_name in class_names:
        num_sequences_for_class = 0
        class_dir = os.path.join(data_set_type_dir, class_name)
        #print('Processing ' + class_dir)
        data_set_names = os.listdir(class_dir)
        #print('Found datasets ' + str(data_set_names))
        for data_set_name in data_set_names:
            data_set_dir = os.path.join(class_dir, data_set_name)
            #print('Processing data_set_dir=' + data_set_dir)
            person_dir_names = os.listdir(data_set_dir)
            # print('Found person dirs=' + str(person_dir_names))
            for person_dir_name in person_dir_names:
                person_dir = os.path.join(data_set_dir, person_dir_name)
                # print('\nProcessing person_dir_name=' + person_dir_name)

                sequence_dir_names = os.listdir(person_dir)
                n = len(sequence_dir_names)
                num_seq_dirs += n
                num_sequences_for_class += n

        class_wise_totals[class_name] = num_sequences_for_class

    print('Loading ' + str(num_seq_dirs) + ' sequences into memory for  ' +
          data_set_type_dir)
    print('Class-wise totals:' + str(class_wise_totals))

    widgets = [
        ETA(),
        progressbar.Bar('>', '[', ']'),
        Percentage(),
        RotatingMarker()
    ]
    bar = progressbar.ProgressBar(maxval=num_seq_dirs, widgets=widgets)

    bar.start()
    i = 0
    bar.update(i)

    data_set_type_dir = os.path.join(dataset_top_dir, type_name)
    class_names = os.listdir(data_set_type_dir)
    for class_name in class_names:
        class_dir = os.path.join(data_set_type_dir, class_name)
        data_set_names = os.listdir(class_dir)
        for data_set_name in data_set_names:
            data_set_dir = os.path.join(class_dir, data_set_name)
            person_dir_names = os.listdir(data_set_dir)
            for person_dir_name in person_dir_names:
                person_dir = os.path.join(data_set_dir, person_dir_name)

                sequence_dir_names = os.listdir(person_dir)

                for sequence_dir_name in sequence_dir_names:
                    sequence_dir = os.path.join(person_dir, sequence_dir_name)
                    facial_landmark_file_names = sorted(
                        os.listdir(sequence_dir))
                    facial_landmark_file_names = facial_landmark_file_names[
                        25:50]
                    # this should not happen if the data preparation has happened correctly
                    if len(facial_landmark_file_names) != FRAME_SEQ_LEN:
                        print('WARNING: Ignoring sequence dir ' +
                              sequence_dir + ' with sequence len ' +
                              str(len(facial_landmark_file_names)))
                        continue

                    lip_separation_sequence = []
                    for facial_landmark_file_name in facial_landmark_file_names:
                        facial_landmark_file_path = os.path.join(
                            sequence_dir, facial_landmark_file_name)
                        with open(facial_landmark_file_path, 'r') as f_obj:
                            reader = csv.reader(f_obj)
                            for coords in reader:
                                part_61 = (int(coords[2 * 61]),
                                           int(coords[2 * 61 + 1]))
                                part_67 = (int(coords[2 * 67]),
                                           int(coords[2 * 67 + 1]))
                                part_62 = (int(coords[2 * 62]),
                                           int(coords[2 * 62 + 1]))
                                part_66 = (int(coords[2 * 66]),
                                           int(coords[2 * 66 + 1]))
                                part_63 = (int(coords[2 * 63]),
                                           int(coords[2 * 63 + 1]))
                                part_65 = (int(coords[2 * 65]),
                                           int(coords[2 * 65 + 1]))

                                A = dist(part_61, part_67)
                                B = dist(part_62, part_66)
                                C = dist(part_63, part_65)

                                avg_gap = (A + B + C) / 3.0

                                break

                            # note that [avg_gap] is a feature vector of length 1. hence the square brackets
                            lip_separation_sequence.append([avg_gap])

                    scaler = MinMaxScaler()
                    arr = scaler.fit_transform(lip_separation_sequence)
                    X_data.append(arr)
                    y_data.append(CLASS_HASH[class_name])

                    i += 1
                    bar.update(i)

    bar.finish()

    X_data = np.array(X_data)
    y_data = np.array(y_data)
    print('\nData loading completed. X_data.shape=' + str(X_data.shape) +
          ' y_data.shape=' + str(y_data.shape))

    return (X_data, y_data)
示例#3
0
def generate_subtitles(  # pylint: disable=too-many-locals,too-many-arguments
    source_path,
    output=None,
    concurrency=DEFAULT_CONCURRENCY,
    src_language=DEFAULT_SRC_LANGUAGE,
    dst_language=DEFAULT_DST_LANGUAGE,
    subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
    api_key=None,
):
    """
    Given an input audio/video file, generate subtitles in the specified language and format.
    """
    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        try:
            widgets = [
                "Converting speech regions to FLAC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = [
                "Performing speech recognition: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            if src_language.split("-")[0] != dst_language.split("-")[0]:
                if api_key:
                    google_translate_api_key = api_key
                    translator = Translator(dst_language,
                                            google_translate_api_key,
                                            dst=dst_language,
                                            src=src_language)
                    prompt = "Translating from {0} to {1}: ".format(
                        src_language, dst_language)
                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    translated_transcripts = []
                    for i, transcript in enumerate(
                            pool.imap(translator, transcripts)):
                        translated_transcripts.append(transcript)
                        pbar.update(i)
                    pbar.finish()
                    transcripts = translated_transcripts
                else:
                    print(
                        "Error: Subtitle translation requires specified Google Translate API key. "
                        "See --help for further information.")
                    return 1

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = output

    if not dest:
        base = os.path.splitext(source_path)[0]
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as output_file:
        output_file.write(formatted_subtitles.encode("utf-8"))

    os.remove(audio_filename)

    return dest
def main():
    ##################
    #These change a lot
    numWaveforms = 2
    numThreads = 8

    ndim = 6 * numWaveforms + 13
    nwalkers = 100 * ndim

    iter = 50000
    burnIn = 49000
    wfPlotNumber = 100

    ######################

    doPlots = 1

    #  plt.ion()

    fitSamples = 200
    timeStepSize = 1.  #ns

    #Prepare detector
    tempGuess = 79.310080
    gradGuess = 0.04
    pcRadGuess = 2.5
    pcLenGuess = 1.6

    #Create a detector model
    detName = "conf/P42574A_grad%0.2f_pcrad%0.2f_pclen%0.2f.conf" % (0.05, 2.5,
                                                                     1.65)
    det = Detector(detName,
                   temperature=tempGuess,
                   timeStep=timeStepSize,
                   numSteps=fitSamples * 10)
    det.LoadFields("P42574A_fields_v3.npz")
    det.SetFields(pcRadGuess, pcLenGuess, gradGuess)

    b_over_a = 0.107213
    c = -0.815152
    d = 0.822696
    rc1 = 74.4
    rc2 = 1.79
    rcfrac = 0.992
    trapping_rc = 120  #us
    det.SetTransferFunction(b_over_a, c, d, rc1, rc2, rcfrac)
    det.trapping_rc = trapping_rc

    det.siggenInst.set_velocity_type(1)
    h_100_mu0, h_100_beta, h_100_e0, h_111_mu0, h_111_beta, h_111_e0 = 66333., 0.744, 181., 107270., 0.580, 100.

    mlw.initializeDetector(det, )

    #and the remaining 6 are for the transfer function
    fig_size = (20, 10)

    #Create a decent start guess by fitting waveform-by-waveform
    wfFileName = "P42574A_12_fastandslow_oldwfs.npz"
    #  wfFileName =  "P42574A_24_spread.npz"
    #  wfFileName =  "P42574A_5_fast.npz"

    if os.path.isfile(wfFileName):
        data = np.load(wfFileName)
        results = data['results']
        wfs = data['wfs']

        #    wfs = np.delete(wfs, [2])
        #    results = np.delete(results, [2])
        #    results = results[::3]

        idxs = [4, 5]

        wfs = wfs[idxs]
        results = results[idxs]

        numWaveforms = wfs.size
    else:
        print "No saved waveforms available.  Exiting."
        exit(0)

    #prep holders for each wf-specific param
    r_arr = np.empty(numWaveforms)
    phi_arr = np.empty(numWaveforms)
    z_arr = np.empty(numWaveforms)
    scale_arr = np.empty(numWaveforms)
    t0_arr = np.empty(numWaveforms)
    smooth_arr = np.ones(numWaveforms) * 7.
    simWfArr = np.empty((1, numWaveforms, fitSamples))

    #Prepare the initial value arrays
    #  plt.ion()
    #  fig = plt.figure()
    for (idx, wf) in enumerate(wfs):
        wf.WindowWaveformTimepoint(
            fallPercentage=.99,
            rmsMult=2,
        )
        r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[
            idx], smooth_arr[idx] = results[idx]['x']
#    plt.plot(wf.windowedWf)
#    value = raw_input('  --> Press q to quit, any other key to continue\n')

#    t0_arr[idx] -= 15
#Plot the waveforms to take a look at the initial guesses
    if True:
        plt.ion()

        fig1 = plt.figure(1)
        plt.clf()
        gs = gridspec.GridSpec(2, 1, height_ratios=[4, 1])
        ax0 = plt.subplot(gs[0])
        ax1 = plt.subplot(gs[1], sharex=ax0)
        ax1.set_xlabel("Digitizer Time [ns]")
        ax0.set_ylabel("Voltage [Arb.]")
        ax1.set_ylabel("Residual")

        for (idx, wf) in enumerate(wfs):

            print "WF number %d:" % idx
            mlw.initializeWaveform(wf)
            dataLen = wf.wfLength
            t_data = np.arange(dataLen) * 10
            ax0.plot(t_data, wf.windowedWf, color="r")

            #      minresult = None
            #      minlike = np.inf
            #
            #      for r in np.linspace(4, np.floor(det.detector_radius)-3, 6):
            #        for z in np.linspace(4, np.floor(det.detector_length)-3, 6):
            #  #        for t0_guess in np.linspace(wf.t0Guess-10, wf.t0Guess+5, 3):
            #            if not det.IsInDetector(r,0,z): continue
            #            startGuess = [r, np.pi/8, z, wf.wfMax, wf.t0Guess-5, 10]
            #            result = op.minimize(nll, startGuess,   method="Nelder-Mead")
            #            r, phi, z, scale, t0, smooth, = result["x"]
            #            ml_wf = np.copy(det.MakeSimWaveform(r, phi, z, scale, t0, fitSamples, h_smoothing=smooth, ))
            #            if ml_wf is None:
            #              print r, z
            #              continue
            #            if result['fun'] < minlike:
            #              minlike = result['fun']
            #              minresult = result
            #      r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[idx], smooth_arr[idx]  = minresult['x']
            r, phi, z, scale, t0, smooth = results[idx]['x']
            startGuess = [r, phi, z, scale, t0, smooth]
            result = op.minimize(nll, startGuess, method="Powell")
            r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx], t0_arr[
                idx], smooth_arr[idx] = result['x']

            print "  >>r: %f\n  >>phi %f\n  >>z %f\n  >>e %f\n  >>t0 %f\n >>smooth %f" % (
                r_arr[idx], phi_arr[idx], z_arr[idx], scale_arr[idx],
                t0_arr[idx], smooth_arr[idx])
            ml_wf = det.MakeSimWaveform(r_arr[idx],
                                        phi_arr[idx],
                                        z_arr[idx],
                                        scale_arr[idx],
                                        t0_arr[idx],
                                        fitSamples,
                                        h_smoothing=smooth_arr[idx])
            ax0.plot(t_data, ml_wf[:dataLen], color="g")
            ax1.plot(
                t_data,
                ml_wf[:dataLen] - wf.windowedWf,
                color="g",
            )
        value = raw_input('  --> Press q to quit, any other key to continue\n')
        plt.ioff()
        if value == 'q': exit(0)

    #Initialize the multithreading
    p = Pool(numThreads,
             initializer=initializeDetectorAndWaveforms,
             initargs=[det, wfs])
    initializeDetectorAndWaveforms(det, wfs)

    #Do the MCMC
    mcmc_startguess = np.hstack((
        r_arr[:],
        phi_arr[:],
        z_arr[:],
        scale_arr[:],
        t0_arr[:],
        smooth_arr[:],  # waveform-specific params
        trapping_rc,
        b_over_a,
        c,
        d,
        rc1,
        rc2,
        rcfrac,
        h_100_mu0,
        h_100_beta,
        h_100_e0,
        h_111_mu0,
        h_111_beta,
        h_111_e0))  # detector-specific

    #number of walkers _must_ be even
    if nwalkers % 2:
        nwalkers += 1

    pos0 = [
        mcmc_startguess + 1e-2 * np.random.randn(ndim) * mcmc_startguess
        for i in range(nwalkers)
    ]

    trapIdx = -13
    rc1idx = -9
    rc2idx = -8
    rcfracidx = -7

    #Make sure everything in the initial guess is within bounds
    for pos in pos0:
        pos[:numWaveforms] = np.clip(pos[:numWaveforms], 0,
                                     np.floor(det.detector_radius * 10.) / 10.)
        pos[numWaveforms:2 * numWaveforms] = np.clip(
            pos[numWaveforms:2 * numWaveforms], 0, np.pi / 4)
        pos[2 * numWaveforms:3 * numWaveforms] = np.clip(
            pos[2 * numWaveforms:3 * numWaveforms], 0,
            np.floor(det.detector_length * 10.) / 10.)
        pos[4 * numWaveforms:5 * numWaveforms] = np.clip(
            pos[4 * numWaveforms:5 * numWaveforms], 0, fitSamples)
        pos[5 * numWaveforms:6 * numWaveforms] = np.clip(
            pos[5 * numWaveforms:6 * numWaveforms], 0, 20.)

        #    pos[tempIdx] = np.clip(pos[tempIdx], 40, 120)
        pos[trapIdx] = np.clip(pos[trapIdx], 0, np.inf)
        pos[rcfracidx] = np.clip(pos[rcfracidx], 0, 1)
        pos[rc2idx] = np.clip(pos[rc2idx], 0, np.inf)
        pos[rc1idx] = np.clip(pos[rc1idx], 0, np.inf)

        prior = lnprior(pos, )
        if not np.isfinite(prior):
            print "BAD PRIOR WITH START GUESS YOURE KILLING ME SMALLS"
            print pos
            exit(0)

    #Initialize, run the MCMC
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=p)

    #w/ progress bar, & time the thing
    bar = ProgressBar(widgets=[Percentage(), Bar(), ETA()],
                      maxval=iter).start()
    for (idx, result) in enumerate(
            sampler.sample(pos0, iterations=iter, storechain=True)):
        bar.update(idx + 1)
    bar.finish()

    if not doPlots:
        exit(0)

    print "Dumping chain to file..."
    np.save("sampler_%dwfs.npy" % numWaveforms, sampler.chain)

    print "Making MCMC steps figure..."

    #  #########  Plots for Waveform params
    #  stepsFig = plt.figure(2, figsize=fig_size)
    #  plt.clf()
    #  ax0 = stepsFig.add_subplot(611)
    #  ax1 = stepsFig.add_subplot(612, sharex=ax0)
    #  ax2 = stepsFig.add_subplot(613, sharex=ax0)
    #  ax3 = stepsFig.add_subplot(614, sharex=ax0)
    #  ax4 = stepsFig.add_subplot(615, sharex=ax0)
    #  ax5 = stepsFig.add_subplot(616, sharex=ax0)
    #
    #  ax0.set_ylabel('r')
    #  ax1.set_ylabel('phi')
    #  ax2.set_ylabel('z')
    #  ax3.set_ylabel('scale')
    #  ax4.set_ylabel('t0')
    #  ax5.set_ylabel('smoothing')
    #
    #  for i in range(nwalkers):
    #    for j in range(wfs.size):
    #      ax0.plot(sampler.chain[i,:,0+j], alpha=0.3)                 # r
    #      ax1.plot(sampler.chain[i,:,numWaveforms + j], alpha=0.3)    # phi
    #      ax2.plot(sampler.chain[i,:,2*numWaveforms + j], alpha=0.3)  #z
    #      ax3.plot(sampler.chain[i,:,3*numWaveforms + j],  alpha=0.3) #energy
    #      ax4.plot(sampler.chain[i,:,4*numWaveforms + j],  alpha=0.3) #t0
    #      ax5.plot(sampler.chain[i,:,5*numWaveforms + j],  alpha=0.3) #smoothing
    #
    #  plt.savefig("emcee_wfchain_%dwfs.png" % numWaveforms)
    #
    #
    #  #and for the transfer function
    stepsFigTF = plt.figure(7, figsize=fig_size)
    plt.clf()
    tf0 = stepsFigTF.add_subplot(711)
    tf1 = stepsFigTF.add_subplot(712, sharex=tf0)
    tf2 = stepsFigTF.add_subplot(713, sharex=tf0)
    tf3 = stepsFigTF.add_subplot(714, sharex=tf0)
    tf4 = stepsFigTF.add_subplot(715, sharex=tf0)
    tf5 = stepsFigTF.add_subplot(716, sharex=tf0)
    tf6 = stepsFigTF.add_subplot(717, sharex=tf0)
    #  tf7 = stepsFigTF.add_subplot(818, sharex=tf0)

    tf0.set_ylabel('b_over_a')
    tf1.set_ylabel('c')
    tf2.set_ylabel('d')
    tf3.set_ylabel('rc1')
    tf4.set_ylabel('rc2')
    tf5.set_ylabel('rcfrac')
    tf6.set_ylabel('temp')
    #  tf7.set_ylabel('trapping')

    for i in range(nwalkers):
        tf0.plot(sampler.chain[i, :, trapIdx + 1], "b", alpha=0.3)  #2
        tf1.plot(sampler.chain[i, :, trapIdx + 2], "b", alpha=0.3)  #den1
        tf2.plot(sampler.chain[i, :, trapIdx + 3], "b", alpha=0.3)  #2
        tf3.plot(sampler.chain[i, :, trapIdx + 4], "b", alpha=0.3)  #3
        tf4.plot(sampler.chain[i, :, trapIdx + 5], "b", alpha=0.3)  #3
        tf5.plot(sampler.chain[i, :, trapIdx + 6], "b", alpha=0.3)  #3
        tf6.plot(sampler.chain[i, :, trapIdx], "b", alpha=0.3)  #3
#    tf6.plot(sampler.chain[i,:,trapIdx], "b", alpha=0.3) #3

    plt.savefig("emcee_tfchain_%dwfs.png" % numWaveforms)

    stepsFigTF = plt.figure(6, figsize=fig_size)
    plt.clf()
    tf0 = stepsFigTF.add_subplot(611)
    tf1 = stepsFigTF.add_subplot(612, sharex=tf0)
    tf2 = stepsFigTF.add_subplot(613, sharex=tf0)
    tf3 = stepsFigTF.add_subplot(614, sharex=tf0)
    tf4 = stepsFigTF.add_subplot(615, sharex=tf0)
    tf5 = stepsFigTF.add_subplot(616, sharex=tf0)
    #  tf6 = stepsFigTF.add_subplot(717, sharex=tf0)
    #  tf7 = stepsFigTF.add_subplot(818, sharex=tf0)

    tf0.set_ylabel('1')
    tf1.set_ylabel('2')
    tf2.set_ylabel('3')
    tf3.set_ylabel('4')
    tf4.set_ylabel('5')
    tf5.set_ylabel('6')
    #  tf6.set_ylabel('temp')
    #  tf7.set_ylabel('trapping')

    for i in range(nwalkers):
        tf0.plot(sampler.chain[i, :, trapIdx + 7], "b", alpha=0.3)  #2
        tf1.plot(sampler.chain[i, :, trapIdx + 8], "b", alpha=0.3)  #den1
        tf2.plot(sampler.chain[i, :, trapIdx + 9], "b", alpha=0.3)  #2
        tf3.plot(sampler.chain[i, :, trapIdx + 10], "b", alpha=0.3)  #3
        tf4.plot(sampler.chain[i, :, trapIdx + 11], "b", alpha=0.3)  #3
        tf5.plot(sampler.chain[i, :, trapIdx + 12], "b", alpha=0.3)  #3


#    tf6.plot(sampler.chain[i,:,trapIdx], "b", alpha=0.3) #3
#    tf6.plot(sampler.chain[i,:,trapIdx], "b", alpha=0.3) #3

    plt.savefig("emcee_velochain_%dwfs.png" % numWaveforms)

    samples = sampler.chain[:, burnIn:, :].reshape((-1, ndim))

    stepsFigTF = plt.figure(5, figsize=(20, 10))
    plotnum = 600
    tf0 = stepsFigTF.add_subplot(plotnum + 11)
    tf1 = stepsFigTF.add_subplot(plotnum + 12, )
    tf2 = stepsFigTF.add_subplot(plotnum + 13, )
    tf3 = stepsFigTF.add_subplot(plotnum + 14, )
    tf4 = stepsFigTF.add_subplot(plotnum + 15, )
    tf5 = stepsFigTF.add_subplot(plotnum + 16, )

    tf0.set_ylabel('h_100_mu0')
    tf1.set_ylabel('h_100_beta')
    tf2.set_ylabel('h_100_e0')
    tf3.set_ylabel('h_111_mu0')
    tf4.set_ylabel('h_111_beta')
    tf5.set_ylabel('h_111_e0')

    num_bins = 300
    [n, b, p] = tf0.hist(samples[:, -6], bins=num_bins)
    print "h_100_mu0 mode is %f" % b[np.argmax(n)]

    [n, b, p] = tf1.hist(samples[:, -5], bins=num_bins)
    print "h_100_beta mode is %f" % b[np.argmax(n)]

    [n, b, p] = tf2.hist(samples[:, -4], bins=num_bins)
    print "h_100_e0 mode is %f" % b[np.argmax(n)]

    [n, b, p] = tf3.hist(samples[:, -3], bins=num_bins)
    print "h_111_mu0 mode is %f" % b[np.argmax(n)]

    [n, b, p] = tf4.hist(samples[:, -2], bins=num_bins)
    print "h_111_beta mode is %f" % b[np.argmax(n)]

    [n, b, p] = tf5.hist(samples[:, -1], bins=num_bins)
    print "h_111_e0 mode is %f" % b[np.argmax(n)]

    #  print "temp is %f" % np.median(samples[:,tempIdx])
    print "trapping is %f" % np.median(samples[:, trapIdx])
    print "b_over_a is %f" % np.median(samples[:, -6])
    print "c is %f" % np.median(samples[:, -5])
    print "d is %f" % np.median(samples[:, -4])
    print "rc_decay1 is %f" % np.median(samples[:, -3])
    print "rc_decay2 is %f" % np.median(samples[:, -2])
    print "rc_frac   is %f" % np.median(samples[:, -1])

    #TODO: Aaaaaaand plot some waveforms..
    simWfs = np.empty((wfPlotNumber, numWaveforms, fitSamples))

    for idx, (theta) in enumerate(samples[np.random.randint(
            len(samples), size=wfPlotNumber)]):
        #    temp  = theta[tempIdx]
        #    trapping_rc  = theta[trapIdx]
        trapping_rc, b_over_a, c, d, rc1, rc2, rcfrac, h_100_mu0, h_100_beta, h_100_e0, h_111_mu0, h_111_beta, h_111_e0, = theta[
            -13:]
        r_arr, phi_arr, z_arr, scale_arr, t0_arr, smooth_arr = theta[:-13].reshape(
            (6, numWaveforms))

        det.siggenInst.set_hole_params(h_100_mu0, h_100_beta, h_100_e0,
                                       h_111_mu0, h_111_beta, h_111_e0)
        #    det.SetTemperature(temp)
        det.trapping_rc = trapping_rc

        det.SetTransferFunction(b_over_a, c, d, rc1, rc2, rcfrac)

        for wf_idx in range(numWaveforms):
            wf_i = det.MakeSimWaveform(r_arr[wf_idx],
                                       phi_arr[wf_idx],
                                       z_arr[wf_idx],
                                       scale_arr[wf_idx],
                                       t0_arr[wf_idx],
                                       fitSamples,
                                       h_smoothing=smooth_arr[wf_idx])
            simWfs[idx, wf_idx, :] = wf_i
            if wf_i is None:
                print "Waveform %d, %d is None" % (idx, wf_idx)

    residFig = plt.figure(4, figsize=(20, 15))
    helpers.plotManyResidual(simWfs, wfs, figure=residFig)
    plt.savefig("emcee_waveforms_%dwfs.png" % numWaveforms)

    value = raw_input('  --> Press q to quit, any other key to continue\n')
示例#5
0
def go():

    # check for initial launch
    if sdk_vars["loop_counter"] == 0:

        sdk_vars['load_list_a'] = args['load_list_a']
        sdk_vars['load_list_b'] = args['load_list_b']
        sdk_vars['load_wn_list_a'] = args['load_wn_list_a']
        sdk_vars['load_wn_list_b'] = args['load_wn_list_b']

        if args["debug"] == 1:
            logging.basicConfig(
                level=logging.INFO,
                format=
                "%(levelname)s [%(name)s.%(funcName)s:%(lineno)d] %(message)s")
            clilogger = logging.getLogger()
            clilogger.setLevel(logging.INFO, )
        elif args["debug"] == 2:
            logging.basicConfig(
                level=logging.DEBUG,
                format=
                "%(levelname)s [%(name)s.%(funcName)s:%(lineno)d] %(message)s")
            clilogger = logging.getLogger()
            clilogger.setLevel(logging.DEBUG)
        else:
            # set logging off unless asked for
            pass

        # Log
        logger.debug("LOOP_COUNTER: {0}".format(sdk_vars["loop_counter"]))

        logger.info("Initial Launch:")

        # create file-system friendly tenant str.
        #        sdk_vars["tenant_str"] = "".join(x for x in cgx_session.tenant_name if x.isalnum()).lower()
        sdk_vars["tenant_str"] = "tenant"

        # load site lists for first run.
        if sdk_vars['load_list_a']:
            try:
                with open(sdk_vars['load_list_a']) as data_file:
                    data = json.load(data_file)
                site_list_a = data[:]
                print(
                    "\n Site List A:\n\tSuccessfully loaded {0} entries from {1}."
                    .format(len(data), sdk_vars['load_list_a']))
            except (ValueError, IOError) as e:
                print("ERROR, Site List A: Could not load {0}: {1}.".format(
                    sdk_vars['load_list_a'], e))
                site_list_a = []
        else:
            site_list_a = []

        if sdk_vars['load_list_b']:
            try:
                with open(sdk_vars['load_list_b']) as data_file:
                    data = json.load(data_file)
                site_list_b = data[:]
                print(
                    "\n Site List B:\n\tSuccessfully loaded {0} entries from {1}."
                    .format(len(data), sdk_vars['load_list_b']))
            except (ValueError, IOError) as e:
                print("ERROR, Site List B: Could not load {0}: {1}.".format(
                    sdk_vars['load_list_b'], e))
                site_list_b = []
        else:
            site_list_b = []

        # load wan network for first run.
        if sdk_vars['load_wn_list_a']:
            try:
                with open(sdk_vars['load_wn_list_a']) as data_file:
                    data = json.load(data_file)
                sdk_vars['reload_wn_list_a'] = data[:]
                print(
                    "\n Site WAN Network List A:\n\tSuccessfully loaded {0} entries from {1}."
                    .format(len(data), sdk_vars['load_wn_list_a']))
            except (ValueError, IOError) as e:
                print(
                    "ERROR, Site WAN Network List A: Could not load {0}: {1}.".
                    format(sdk_vars['load_wn_list_a'], e))
                sdk_vars['reload_wn_list_a'] = []
        else:
            sdk_vars['reload_wn_list_a'] = []

        if sdk_vars['load_wn_list_b']:
            try:
                with open(sdk_vars['load_wn_list_a']) as data_file:
                    data = json.load(data_file)
                sdk_vars['reload_wn_list_b'] = data[:]
                print(
                    "\n Site WAN Network List B:\n\t Successfully loaded {0} entries from {1}."
                    .format(len(data), sdk_vars['load_wn_list_b']))
            except (ValueError, IOError) as e:
                print(
                    "ERROR, Site WAN Network List B: Could not load {0}: {1}.".
                    format(sdk_vars['load_wn_list_b'], e))
                sdk_vars['reload_wn_list_b'] = []
        else:
            sdk_vars['reload_wn_list_b'] = []

    else:
        # re-loop, pull previous list out of sdk_vars dict.
        site_list_a = sdk_vars["reload_list_a"]
        site_list_b = sdk_vars["reload_list_b"]

    # Get/update list of sites, create python dictionary to map site ID to name.
    print("Caching all site information, please wait...")
    id_sitename_dict, sitename_id_dict, site_id_list, site_name_list, site_id_to_role_dict, site_tags \
        = siteid_to_name_dict(sdk_vars, cgx_session)
    id_wan_network_name_dict, wan_network_name_id_dict, wan_network_id_list, wan_network_name_list, \
        wan_network_to_type_dict = wannetworkid_to_name_dict(sdk_vars, cgx_session)

    logger.debug("SITE -> ROLE ({0}): {1}".format(
        len(site_id_to_role_dict), json.dumps(site_id_to_role_dict, indent=4)))

    # Begin Site Selection Loop
    loop = True
    while loop:

        # Print header
        print("")
        sites.print_selection_overview(site_list_a, site_list_b,
                                       sitename_id_dict, site_id_to_role_dict)
        print("")

        action = [("Edit Site List A", 'edit_sitelista'),
                  ("Edit Site List B", 'edit_sitelistb'),
                  ("Continue", 'continue')]

        banner = "Select Action:"
        line_fmt = "{0}: {1}"

        # just pull 2nd value
        list_name, selected_action = menus.quick_menu(banner, line_fmt, action)

        if selected_action == 'edit_sitelista':
            site_list_a = sites.edit_site_list(site_list_a, "Site List A",
                                               site_name_list,
                                               sdk_vars["tenant_str"],
                                               site_tags)
        elif selected_action == 'edit_sitelistb':
            site_list_b = sites.edit_site_list(site_list_b, "Site List B",
                                               site_name_list,
                                               sdk_vars["tenant_str"],
                                               site_tags)
        elif selected_action == "continue":
            if (len(site_list_a) < 1) or (len(site_list_b) < 1):
                print("\nERROR, must select at least one site in each list.")
            else:
                # Good to go, continue.
                loop = False
        else:
            cgx_session.interactive.logout()
            sys.exit()

    # save lists for re-use next loop.
    sdk_vars["reload_list_a"] = site_list_a
    sdk_vars["reload_list_b"] = site_list_b

    # sites selected, determine if this will be Internet or VPNoMPLS mesh
    action = [
        ("Internet VPN (Public)", 'publicwan'),
        ("Private WAN VPN (Private, VPN over MPLS, release 4.4.1+ only)",
         'privatewan'),
    ]

    banner = "Managing which type of VPN mesh:"
    line_fmt = "{0}: {1}"

    mesh_type = menus.quick_menu(banner, line_fmt, action)[1]

    # map type-specific anynet based on choice above
    anynet_specific_type = 'anynet'
    if mesh_type in ['privatewan']:
        anynet_specific_type = "private-anynet"
    elif mesh_type in ['publicwan']:
        anynet_specific_type = "public-anynet"

    # convert site lists (by name) to ID lists. Look up ID in previous sitename_id dict. if exists, enter.
    site_id_list_a = []
    for site in site_list_a:
        site_id = sitename_id_dict.get(site, None)
        if site_id:
            site_id_list_a.append(site_id)

    site_id_list_b = []
    for site in site_list_b:
        site_id = sitename_id_dict.get(site, None)
        if site_id:
            site_id_list_b.append(site_id)

    # combine site lists and remove duplicates so we can pull topology info from API once per site.
    combined_site_id_list = list(site_id_list_a)
    combined_site_id_list.extend(x for x in site_id_list_b
                                 if x not in site_id_list_a)

    # print json.dumps(combined_site_id_list, indent=4)

    # get/update topology

    print("Loading VPN topology information for {0} sites, please wait...".
          format(len(combined_site_id_list)))

    logger.debug('COMBINED_SITE_ID_LIST ({0}): {1}'.format(
        len(combined_site_id_list), json.dumps(combined_site_id_list,
                                               indent=4)))

    swi_to_wan_network_dict = {}
    swi_to_site_dict = {}
    wan_network_to_swi_dict = {}
    all_anynets = {}
    site_swi_dict = {}

    # could be a long query - start a progress bar.
    pbar = ProgressBar(widgets=[Percentage(), Bar(),
                                ETA()],
                       max_value=len(combined_site_id_list) + 1).start()
    site_processed = 1

    for site in combined_site_id_list:
        site_swi_list = []

        query = {"type": "basenet", "nodes": [site]}

        status = False
        rest_call_retry = 0

        while not status:
            resp = cgx_session.post.topology(query)
            status = resp.cgx_status
            topology = resp.cgx_content

            if not status:
                print(
                    "API request for topology for site ID {0} failed/timed out. Retrying."
                    .format(site))
                rest_call_retry += 1
                # have we hit retry limit?
                if rest_call_retry >= sdk_vars['rest_call_max_retry']:
                    # Bail out
                    print("ERROR: could not query site ID {0}. Continuing.".
                          format(site))
                    status = True
                    topology = False
                else:
                    # wait and keep going.
                    time.sleep(1)

        if status and topology:
            # iterate topology. We need to iterate all of the matching SWIs, and existing anynet connections (sorted).
            logger.debug("TOPOLOGY: {0}".format(json.dumps(topology,
                                                           indent=4)))

            for link in topology.get('links', []):
                link_type = link.get('type', "")

                # if an anynet link (SWI to SWI)
                if link_type in ["anynet", anynet_specific_type]:
                    # vpn record, check for uniqueness.
                    # 4.4.1
                    source_swi = link.get('source_wan_if_id')
                    if not source_swi:
                        # 4.3.x compatibility
                        source_swi = link.get('source_wan_path_id')
                        if source_swi:
                            link['source_wan_if_id'] = source_swi
                    # 4.4.1
                    dest_swi = link.get('target_wan_if_id')
                    if not dest_swi:
                        # 4.3.x compatibility
                        dest_swi = link.get('target_wan_path_id')
                        if dest_swi:
                            link['target_wan_if_id'] = dest_swi
                    # create anynet lookup key
                    anynet_lookup_key = "_".join(sorted([source_swi,
                                                         dest_swi]))
                    if not all_anynets.get(anynet_lookup_key, None):
                        # path is not in current anynets, add
                        all_anynets[anynet_lookup_key] = link

        # Query 2 - now need to query SWI for site, since stub-topology may not be in topology info.
        status = False

        while not status:
            resp = cgx_session.get.waninterfaces(site)
            status = resp.cgx_status
            site_wan_if_result = resp.cgx_content

            if not status:
                print(
                    "API request for Site WAN Interfaces for site ID {0} failed/timed out. Retrying."
                    .format(site))
                time.sleep(1)

        if status and site_wan_if_result:
            site_wan_if_items = site_wan_if_result.get('items', [])
            logger.debug('SITE WAN IF ITEMS ({0}): {1}'.format(
                len(site_wan_if_items), json.dumps(site_wan_if_items,
                                                   indent=4)))

            # iterate all the site wan interfaces
            for current_swi in site_wan_if_items:
                # get the WN bound to the SWI.
                wan_network_id = current_swi.get('network_id', "")
                swi_id = current_swi.get('id', "")

                if swi_id:
                    # update SWI -> Site xlation dict
                    swi_to_site_dict[swi_id] = site

                # get the SWIs that match the mesh_type
                if wan_network_id and swi_id and wan_network_to_type_dict.get(
                        wan_network_id, "") == mesh_type:
                    logger.debug('SWI_ID = SITE: {0} = {1}'.format(
                        swi_id, site))

                    # query existing wan_network_to_swi dict if entry exists.
                    existing_swi_list = wan_network_to_swi_dict.get(
                        wan_network_id, [])

                    # update swi -> WN xlate dict
                    swi_to_wan_network_dict[swi_id] = wan_network_id

                    # update site-level SWI list.
                    site_swi_list.append(swi_id)

                    # update WN -> swi xlate dict
                    existing_swi_list.append(swi_id)
                    wan_network_to_swi_dict[wan_network_id] = existing_swi_list

        # add all matching mesh_type stubs to site_swi_dict
        site_swi_dict[site] = site_swi_list

        # iterate bar and counter.
        site_processed += 1
        pbar.update(site_processed)

    # finish after iteration.
    pbar.finish()

    # update all_anynets with site info. Can't do this above, because xlation table not finished when needed.
    for anynet_key, link in all_anynets.items():
        # 4.4.1
        source_swi = link.get('source_wan_if_id')
        if not source_swi:
            # 4.3.x compatibility
            source_swi = link.get('source_wan_path_id')
        # 4.4.1
        dest_swi = link.get('target_wan_if_id')
        if not dest_swi:
            # 4.3.x compatibility
            dest_swi = link.get('target_wan_path_id')
        link['source_site_id'] = swi_to_site_dict.get(
            source_swi, 'UNKNOWN (Unable to map SWI to Site ID)')
        link['target_site_id'] = swi_to_site_dict.get(
            dest_swi, 'UNKNOWN (Unable to map SWI to Site ID)')

    logger.debug("SWI -> WN xlate ({0}): {1}".format(
        len(swi_to_wan_network_dict),
        json.dumps(swi_to_wan_network_dict, indent=4)))
    logger.debug("All Anynets ({0}): {1}".format(
        len(all_anynets), json.dumps(all_anynets, indent=4)))
    logger.debug("SWI construct ({0}): {1}".format(
        len(site_swi_dict), json.dumps(site_swi_dict, indent=4)))
    logger.debug("WN xlate ({0}): {1}".format(
        len(wan_network_to_swi_dict),
        json.dumps(wan_network_to_swi_dict, indent=4)))
    logger.debug("SWI -> SITE xlate ({0}): {1}".format(
        len(swi_to_site_dict), json.dumps(swi_to_site_dict, indent=4)))

    new_anynets, current_anynets = vpn.main_vpn_menu(site_id_list_a,
                                                     site_id_list_b,
                                                     all_anynets,
                                                     site_swi_dict,
                                                     swi_to_wan_network_dict,
                                                     wan_network_to_swi_dict,
                                                     id_wan_network_name_dict,
                                                     wan_network_name_id_dict,
                                                     swi_to_site_dict,
                                                     id_sitename_dict,
                                                     mesh_type,
                                                     site_id_to_role_dict,
                                                     sdk_vars=sdk_vars,
                                                     sdk_session=cgx_session)

    reload_or_exit = anynets.main_anynet_menu(
        new_anynets, current_anynets, site_swi_dict, swi_to_wan_network_dict,
        wan_network_to_swi_dict, id_wan_network_name_dict,
        wan_network_name_id_dict, swi_to_site_dict, id_sitename_dict,
        mesh_type, site_id_to_role_dict, sdk_vars, cgx_session)

    # Increment global loop counter
    sdk_vars["loop_counter"] += 1

    return reload_or_exit
示例#6
0
 def __init__(self, maxval=0):
     widgets = [Percentage(), ' ', Bar(marker='=', left='[', right=']'), ' ', ETA()]
     super(ProgressBarContext, self).__init__(widgets=widgets, maxval=maxval, fd=sys.stdout)
示例#7
0
def _progress(iterable):
    if ProgressBar:
        pbar = ProgressBar(widgets=[SimpleProgress(), Bar(), ETA()])
    else:
        pbar = iter
    return pbar(iterable)
    print("\nGoing to create subset of the corpus, %d lines to file '%s'" %
          (len(lines), subcorpus_txt))
    subcorpus_file = open(subcorpus_txt, 'w')
    for l in lines:
        subcorpus_file.write(l + '\n')
        subcorpus_file.flush()
    subcorpus_file.close()

    print("\nGoing to print %d random lines to file '%s'" %
          (len(lines), random_txt))
    widgets = [
        'Progress: ',
        Percentage(), ' ',
        Bar(marker='=', left='[', right=']'), ' ',
        ETA(), ' ',
        FileTransferSpeed()
    ]
    pbar = ProgressBar(widgets=widgets, maxval=len(lines))
    pbar.start()

    result_file = open(random_txt, 'w')
    done = 0
    for i in range(len(lines)):
        length = np.random.choice(sizes, p=sizes_probabilities)
        chosen_words = np.random.choice(words, length, p=word_probabilities)
        result_file.write(' '.join(chosen_words) + '\n')
        result_file.flush()
        done = done + 1
        pbar.update(done)
    result_file.close()
#		print ("{} objects done...\r".format(counter))

parser = argparse.ArgumentParser(
    description="Parses title of pages (remove text[0:-60] for full title)")
parser.add_argument("-o",
                    "--output",
                    type=str,
                    help="Write to file (if empty - write to console")
args = parser.parse_args()
# is file specified?
if args.output != None:
    outputFile = open(args.output, 'w')
else:
    print("Output file not specified")

#progress bar init
pbar_widgets = ['Progress: ', Percentage(), ' ', Bar(), ' ', ETA(), ' ']
pbar = ProgressBar(widgets=pbar_widgets)

# http://www.edudic.ru/fam/1/ # last 15181
lastitem = 500
for item in pbar(range(1, lastitem + 1)):
    page = html.parse('http://www.edudic.ru/fam/{}/'.format(item))
    outputFile.write(
        page.find(".//title").text[0:-60].encode('utf-8') + "\r\n")
    #progress(item)

print("FIN")
if args.output != None:
    outputFile.close()
示例#10
0
def main():

    global args
    args = parser.parse_args()

    cuda = args.cuda
    if cuda == 'true':
        cuda = True
    else:
        cuda = False

    result_path = os.path.join(args.result_path, args.task_name)
    if args.style_A:
        result_path = os.path.join(result_path, args.style_A)
    result_path = os.path.join(result_path, args.model_arch)

    model_path = os.path.join(args.model_path, args.task_name)
    if args.style_A:
        model_path = os.path.join(model_path, args.style_A)
    model_path = os.path.join(model_path, args.model_arch)

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    epoch_size = args.epoch_size
    batch_size = args.batch_size

    #     generator_A = Generator()
    encoder_Img = ImageEncoder()
    decoder_Txt = TextDecoder()

    #     generator_B = Generator()
    encoder_Txt = TextEncoder()
    decoder_Img = ImageDecoder()

    #     discriminator_A = Discriminator()
    disciminator_Img = ImageDiscriminator()

    #     discriminator_B = Discriminator()
    discriminator_Txt = TextDiscriminator()

    if cuda:
        # test_I = test_I.cuda()
        # test_T = test_T.cuda()

        #          generator_A = generator_A.cuda()
        #        generator_A = generator_A.cuda()
        encoder_Txt = encoder_Txt.cuda()
        decoder_Img = decoder_Img.cuda()

        #         generator_B = generator_B.cuda()
        encoder_Img = encoder_Img.cuda()
        decoder_Txt = decoder_Txt.cuda()

        #         discriminator_A = discriminator_A.cuda()
        discriminator_Txt = discriminator_Txt.cuda()

        #         discriminator_B = discriminator_B.cuda()
        discriminator_Img = discriminator_Img.cuda()

    data_size = min(len(data_style_A), len(data_style_B))
    n_batches = (data_size // batch_size)

    recon_criterion = nn.MSELoss()
    gan_criterion = nn.BCELoss()
    feat_criterion = nn.HingeEmbeddingLoss()

    #     gen_params = chain(generator_A.parameters(), generator_B.parameters())
    enc_params = chain(encoder_Txt.parameters(), encoder_Img.parameters())
    dec_params = chain(decoder_Txt.parameters(), decoder_Img.parameters())

    #     dis_params = chain(discriminator_A.parameters(), discriminator_B.parameters())
    dis_params = chain(discriminator_Img.parameters(),
                       discriminator_Txt.parameters())

    #     optim_gen = optim.Adam( gen_params, lr=args.learning_rate, betas=(0.5,0.999), weight_decay=0.00001)
    #     optim_dis = optim.Adam( dis_params, lr=args.learning_rate, betas=(0.5,0.999), weight_decay=0.00001)

    iters = 0

    gen_loss_total = []
    dis_loss_total = []

    for epoch in range(epoch_size):
        #         We don't want our data to be shuffled
        #         data_style_A, data_style_B = shuffle_data( data_style_A, data_style_B)

        widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()]
        pbar = ProgressBar(maxval=n_batches, widgets=widgets)
        pbar.start()

        for i in range(n_batches):

            pbar.update(i)

            #             generator_A.zero_grad()
            #             generator_B.zero_grad()
            encoder_Img.zero_grad()
            decoder_Txt.zero_grad()

            encoder_Txt.zero_grad()
            decoder_Img.zero_grad()

            #             discriminator_A.zero_grad()
            discriminator_Txt.zero_grad()

            #             discriminator_B.zero_grad()
            discriminator_Img.zero_grad()

            #           Check what's going on here!!!!
            A_path = data_style_A[i * batch_size:(i + 1) * batch_size]
            B_path = data_style_B[i * batch_size:(i + 1) * batch_size]

            #           Parse Input Correctly
            if args.task_name.startswith('edges2'):
                A = read_images(A_path, 'A', args.image_size)
                B = read_images(B_path, 'B', args.image_size)
            elif args.task_name == 'handbags2shoes' or args.task_name == 'shoes2handbags':
                A = read_images(A_path, 'B', args.image_size)
                B = read_images(B_path, 'B', args.image_size)
            else:
                A = read_images(A_path, None, args.image_size)
                B = read_images(B_path, None, args.image_size)

#           Suppose we have our input sorted out
#             A = Variable( torch.FloatTensor( A ) )
            I = Variable(torch.FloatTensor(I))
            #             B = Variable( torch.FloatTensor( B ) )
            T = Variable(torch.FloatTensor(T))

            if cuda:
                I = I.cuda()
                T = T.cuda()

#             AB = generator_B(A)
#             BA = generator_A(B)

            Iz = encoder_Img(I)
            Tz = encoder_Txt(T)

            IzT = decoder_Txt(Iz)
            TzI = decoder_Img(Tz)

            #             ABA = generator_A(AB)
            IzTz = encoder_Img(IzT)

            #             BAB = generator_B(BA)
            TzIz = encoder_Txt(TzI)

            # Reconstruction Loss
            #             recon_loss_A = recon_criterion( ABA, A )
            recon_loss_IT = recon_criterion(IzTz, Iz)

            #             recon_loss_B = recon_criterion( BAB, B )
            recon_loss_TI = recon_criterion(TzIz, Tz)

            recon_loss_All = recon_criterion(TzIz, IzTz)

            # Real/Fake GAN Loss (A)
            #             A_dis_real, A_feats_real = discriminator_A( A )
            #             A_dis_fake, A_feats_fake = discriminator_A( BA )

            #             dis_loss_A, gen_loss_A = get_gan_loss( A_dis_real, A_dis_fake, gan_criterion, cuda )
            #             fm_loss_A = get_fm_loss(A_feats_real, A_feats_fake, feat_criterion)

            #Real/Fake GImgN Loss (Image)
            Img_dis_real, Img_feats_real = discriminator_Img(I)
            Img_dis_fake, Img_feats_fake = discriminator_Img(IzT)

            dis_loss_Img, gen_loss_Img = get_gan_loss(Img_dis_real,
                                                      Img_dis_fake,
                                                      gan_criterion, cuda)
            fm_loss_Img = get_fm_loss(Img_feats_real, Img_feats_fake,
                                      feat_criterion)

            #             # Real/Fake GAN Loss (B)
            #             B_dis_real, B_feats_real = discriminator_B( B )
            #             B_dis_fake, B_feats_fake = discriminator_B( AB )
            #
            #             dis_loss_B, gen_loss_B = get_gan_loss( B_dis_real, B_dis_fake, gan_criterion, cuda )
            #             fm_loss_B = get_fm_loss( B_feats_real, B_feats_fake, feat_criterion )

            # Real/Fake GAN Loss (Txt)
            Txt_dis_real, Txt_feats_real = discriminator_Txt(T)
            Txt_dis_fake, Txt_feats_fake = discriminator_Txt(IzT)

            dis_loss_Txt, gen_loss_Txt = get_gan_loss(Txt_dis_real,
                                                      Txt_dis_fake,
                                                      gan_criterion, cuda)
            fm_loss_Txt = get_fm_loss(Txt_feats_real, Txt_feats_fake,
                                      feat_criterion)

            # Total Loss

            if iters < args.gan_curriculum:
                rate = args.starting_rate
            else:
                rate = args.default_rate

#             gen_loss_A_total = (gen_loss_B*0.1 + fm_loss_B*0.9) * (1.-rate) + recon_loss_A * rate
#             gen_loss_B_total = (gen_loss_A*0.1 + fm_loss_A*0.9) * (1.-rate) + recon_loss_B * rate

#             if args.model_arch == 'discogan':
#                 gen_loss = gen_loss_A_total + gen_loss_B_total
#                 dis_loss = dis_loss_A + dis_loss_B
#             elif args.model_arch == 'recongan':
#                 gen_loss = gen_loss_A_total
#                 dis_loss = dis_loss_B
#             elif args.model_arch == 'gan':
#                 gen_loss = (gen_loss_B*0.1 + fm_loss_B*0.9)
#                 dis_loss = dis_loss_B

# We do things a little bit differently
            gen_loss_Img_total = (gen_loss_Txt * 0.1 + fm_loss_Txt * 0.9) * (
                1. - rate)  # + recon_loss_Img * rate
            gen_loss_Txt_total = (gen_loss_Img * 0.1 + fm_loss_Img * 0.9) * (
                1. - rate)  # + recon_loss_Txt * rate

            if args.model_arch == 'discogan':
                gen_loss = gen_loss_Img_total + gen_loss_Txt_total + (
                    recon_loss_All + recon_loss_IT + recon_loss_TI) * rate
                dis_loss = dis_loss_Img + dis_loss_Txt
            elif args.model_arch == 'recongan':
                gen_loss = gen_loss_Img_total + gen_loss_Txt_total + (
                    recon_loss_IT + recon_loss_TI) * rate
                dis_loss = dis_loss_Txt
            elif args.model_arch == 'gan':
                gen_loss = (gen_loss_Txt * 0.1 + fm_loss_Txt * 0.9)
                dis_loss = dis_loss_Txt

            if iters % args.update_interval == 0:
                dis_loss.backward()
                optim_dis.step()
            else:
                gen_loss.backward()
                optim_gen.step()

#             if iters % args.log_interval == 0:
#                 print "---------------------"
#                 print "GEN Loss:", as_np(gen_loss_A.mean()), as_np(gen_loss_B.mean())
#                 print "Feature Matching Loss:", as_np(fm_loss_A.mean()), as_np(fm_loss_B.mean())
#                 print "RECON Loss:", as_np(recon_loss_A.mean()), as_np(recon_loss_B.mean())
#                 print "DIS Loss:", as_np(dis_loss_A.mean()), as_np(dis_loss_B.mean())

            if iters % args.log_interval == 0:
                print "---------------------"
                print "GEN Loss:", as_np(gen_loss_Img.mean()), as_np(
                    gen_loss_Txt.mean())
                print "Feature Matching Loss:", as_np(
                    fm_loss_Img.mean()), as_np(fm_loss_Txt.mean())
                print "RECON Loss:", as_np(recon_loss_Img.mean()), as_np(
                    recon_loss_Txt.mean(), as_np(recon_loss_All.mean()))
                print "DIS Loss:", as_np(dis_loss_Img.mean()), as_np(
                    dis_loss_Txt.mean())

            if iters % args.image_save_interval == 0:

                #                 AB = generator_B( test_A )
                #                 BA = generator_A( test_B )
                #                 ABA = generator_A( AB )
                #                 BAB = generator_B( BA )

                Iz = encoder_Img(test_I)
                Tz = encoder_Txt(test_T)

                IzT = decoder_Txt(Iz)
                TzI = decoder_Img(Tz)

                IzTz = encoder_Img(IzT)
                TzIz = encoder_Txt(TzI)

                n_testset = min(test_I.size()[0], test_T.size()[0])

                subdir_path = os.path.join(
                    result_path, str(iters / args.image_save_interval))

                if os.path.exists(subdir_path):
                    pass
                else:
                    os.makedirs(subdir_path)

                for im_idx in range(n_testset):
                    I_val = test_I[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    T_val = test_T[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    TzI_val = TzI[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    TzIzT_val = TzIzT[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    IzT_val = IzT[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    IzTzI_val = IzTzI[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.

#                     Got to handle the Output


#                     filename_prefix = os.path.join (subdir_path, str(im_idx))
#                     scipy.misc.imsave( filename_prefix + '.A.jpg', A_val.astype(np.uint8)[:,:,::-1])
#                     scipy.misc.imsave( filename_prefix + '.B.jpg', B_val.astype(np.uint8)[:,:,::-1])
#                     scipy.misc.imsave( filename_prefix + '.BA.jpg', BA_val.astype(np.uint8)[:,:,::-1])
#                     scipy.misc.imsave( filename_prefix + '.AB.jpg', AB_val.astype(np.uint8)[:,:,::-1])
#                     scipy.misc.imsave( filename_prefix + '.ABA.jpg', ABA_val.astype(np.uint8)[:,:,::-1])
#                     scipy.misc.imsave( filename_prefix + '.BAB.jpg', BAB_val.astype(np.uint8)[:,:,::-1])
#
            if iters % args.model_save_interval == 0:
                #                 torch.save( generator_A, os.path.join(model_path, 'model_gen_A-' + str( iters / args.model_save_interval )))
                torch.save(
                    encoder_Img,
                    os.path.join(
                        model_path, 'model_enc_Img-' +
                        str(iters / args.model_save_interval)))
                torch.save(
                    encoder_Txt,
                    os.path.join(
                        model_path, 'model_enc_Txt-' +
                        str(iters / args.model_save_interval)))

                #                 torch.save( generator_B, os.path.join(model_path, 'model_gen_B-' + str( iters / args.model_save_interval )))
                torch.save(
                    decoder_Txt,
                    os.path.join(
                        model_path, 'model_dec_Txt-' +
                        str(iters / args.model_save_interval)))
                torch.save(
                    decoder_Img,
                    os.path.join(
                        model_path, 'model_dec_Img-' +
                        str(iters / args.model_save_interval)))

                #                 torch.save( discriminator_A, os.path.join(model_path, 'model_dis_A-' + str( iters / args.model_save_interval )))
                torch.save(
                    discriminator_Img,
                    os.path.join(
                        model_path, 'model_dis_Img-' +
                        str(iters / args.model_save_interval)))

                #                 torch.save( discriminator_B, os.path.join(model_path, 'model_dis_B-' + str( iters / args.model_save_interval )))
                torch.save(
                    discriminator_Txt,
                    os.path.join(
                        model_path, 'model_dis_Txt-' +
                        str(iters / args.model_save_interval)))

            iters += 1
示例#11
0
        i = i + 1
        for word in initial_file[y]["processed_text"]:
            if word not in only_lsd:
                only_lsd[word] = {"frequency": 1, "doc_num" : 0}
            else:
                only_lsd[word]["frequency"] = only_lsd[word]["frequency"] + 1
    elif "mdma" in initial_file[y]["drugs"]:
        i = i + 1
        for word in initial_file[y]["processed_text"]:
            if word not in only_mdma:
                only_mdma[word] = {"frequency": 1, "doc_num" : 0}
            else:
                only_mdma[word]["frequency"] = only_mdma[word]["frequency"] + 1

widgets = ['Something: ', Percentage(), ' ', Bar(marker=RotatingMarker()),
           ' ', ETA(), ' ', FileTransferSpeed()]
pbar = ProgressBar(widgets=widgets, maxval = i).start()
a = 0
b = 0
c = 0
for x,y in enumerate(initial_file):
    if "mdma" in initial_file[y]["drugs"] and "lsd" in initial_file[y]["drugs"]:
        a = a + 1
        for word in both_drugs:
            if word in initial_file[y]["processed_text"]:
                both_drugs[word]["doc_num"] = both_drugs[word]["doc_num"] + 1
    elif "lsd" in initial_file[y]["drugs"]:
        b = b + 1
        for word in only_lsd:
            if word in initial_file[y]["processed_text"]:
                only_lsd[word]["doc_num"] = only_lsd[word]["doc_num"] + 1
示例#12
0
def main():

    global args
    args = parser.parse_args()

    cuda = args.cuda
    if cuda == 'true':
        cuda = True
    else:
        cuda = False

    task_name = args.task_name

    epoch_size = args.epoch_size
    batch_size = args.batch_size

    result_path = os.path.join(args.result_path, args.task_name)
    if args.style_A:
        result_path = os.path.join(result_path, args.style_A)
    result_path = os.path.join(result_path, args.model_arch)

    model_path = os.path.join(args.model_path, args.task_name)
    if args.style_A:
        model_path = os.path.join(model_path, args.style_A)
    model_path = os.path.join(model_path, args.model_arch)

    data_style_A, data_style_B, test_style_A, test_style_B = get_data()

    if args.task_name.startswith('edges2'):
        test_A = read_images(test_style_A, 'A', args.image_size)
        test_B = read_images(test_style_B, 'B', args.image_size)

    elif args.task_name == 'handbags2shoes' or args.task_name == 'shoes2handbags':
        test_A = read_images(test_style_A, 'B', args.image_size)
        test_B = read_images(test_style_B, 'B', args.image_size)

    else:
        test_A = read_images(test_style_A, None, args.image_size)
        test_B = read_images(test_style_B, None, args.image_size)

    device = torch.device('cuda:{}'.format(args.gpu_ids[0]))  # rui

    test_A = Variable(torch.FloatTensor(test_A), volatile=True)
    test_B = Variable(torch.FloatTensor(test_B), volatile=True)

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    generator_A = Generator()
    generator_B = Generator()
    discriminator_A = Discriminator()
    discriminator_B = Discriminator()

    # rui edit for gpu
    #if cuda:
    #    test_A = test_A.cuda()
    #    test_B = test_B.cuda()
    #    generator_A = generator_A.cuda()
    #    generator_B = generator_B.cuda()
    #    discriminator_A = discriminator_A.cuda()
    #    discriminator_B = discriminator_B.cuda()
    if cuda:
        test_A = test_A.to(device)
        test_B = test_B.to(device)
        generator_A = generator_A.to(device)
        generator_B = generator_B.to(device)
        discriminator_A = discriminator_A.to(device)
        discriminator_B = discriminator_B.to(device)
    # rui

    data_size = min(len(data_style_A), len(data_style_B))
    n_batches = (data_size // batch_size)

    recon_criterion = nn.MSELoss()
    gan_criterion = nn.BCELoss()
    feat_criterion = nn.HingeEmbeddingLoss()

    gen_params = chain(generator_A.parameters(), generator_B.parameters())
    dis_params = chain(discriminator_A.parameters(),
                       discriminator_B.parameters())

    optim_gen = optim.Adam(gen_params,
                           lr=args.learning_rate,
                           betas=(0.5, 0.999),
                           weight_decay=0.00001)
    optim_dis = optim.Adam(dis_params,
                           lr=args.learning_rate,
                           betas=(0.5, 0.999),
                           weight_decay=0.00001)

    iters = 0

    gen_loss_total = []
    dis_loss_total = []

    for epoch in range(epoch_size):
        data_style_A, data_style_B = shuffle_data(data_style_A, data_style_B)

        widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()]
        pbar = ProgressBar(maxval=n_batches, widgets=widgets)
        pbar.start()

        for i in range(n_batches):

            pbar.update(i)

            generator_A.zero_grad()
            generator_B.zero_grad()
            discriminator_A.zero_grad()
            discriminator_B.zero_grad()

            A_path = data_style_A[i * batch_size:(i + 1) * batch_size]
            B_path = data_style_B[i * batch_size:(i + 1) * batch_size]

            if args.task_name.startswith('edges2'):
                A = read_images(A_path, 'A', args.image_size)
                B = read_images(B_path, 'B', args.image_size)
            elif args.task_name == 'handbags2shoes' or args.task_name == 'shoes2handbags':
                A = read_images(A_path, 'B', args.image_size)
                B = read_images(B_path, 'B', args.image_size)
            else:
                A = read_images(A_path, None, args.image_size)
                B = read_images(B_path, None, args.image_size)

            A = Variable(torch.FloatTensor(A))
            B = Variable(torch.FloatTensor(B))

            # rui edit for gpu
            #if cuda:
            #    A = A.cuda()
            #    B = B.cuda()
            if cuda:
                A = A.to(device)
                B = B.to(device)
            # rui

            AB = generator_B(A)
            BA = generator_A(B)

            ABA = generator_A(AB)
            BAB = generator_B(BA)

            # Reconstruction Loss
            recon_loss_A = recon_criterion(ABA, A)
            recon_loss_B = recon_criterion(BAB, B)

            # Real/Fake GAN Loss (A)
            A_dis_real, A_feats_real = discriminator_A(A)
            A_dis_fake, A_feats_fake = discriminator_A(BA)

            #dis_loss_A, gen_loss_A = get_gan_loss( A_dis_real, A_dis_fake, gan_criterion, cuda )
            #fm_loss_A = get_fm_loss(A_feats_real, A_feats_fake, feat_criterion)
            dis_loss_A, gen_loss_A = get_gan_loss(A_dis_real, A_dis_fake,
                                                  gan_criterion, cuda,
                                                  device)  # rui
            fm_loss_A = get_fm_loss(A_feats_real, A_feats_fake, feat_criterion,
                                    device)  # rui

            # Real/Fake GAN Loss (B)
            B_dis_real, B_feats_real = discriminator_B(B)
            B_dis_fake, B_feats_fake = discriminator_B(AB)

            #dis_loss_B, gen_loss_B = get_gan_loss( B_dis_real, B_dis_fake, gan_criterion, cuda )
            #fm_loss_B = get_fm_loss( B_feats_real, B_feats_fake, feat_criterion )
            dis_loss_B, gen_loss_B = get_gan_loss(B_dis_real, B_dis_fake,
                                                  gan_criterion, cuda,
                                                  device)  # rui
            fm_loss_B = get_fm_loss(B_feats_real, B_feats_fake, feat_criterion,
                                    device)  # rui

            # Total Loss

            if iters < args.gan_curriculum:
                rate = args.starting_rate
            else:
                rate = args.default_rate

            gen_loss_A_total = (gen_loss_B * 0.1 + fm_loss_B * 0.9) * (
                1. - rate) + recon_loss_A * rate
            gen_loss_B_total = (gen_loss_A * 0.1 + fm_loss_A * 0.9) * (
                1. - rate) + recon_loss_B * rate

            if args.model_arch == 'discogan':
                gen_loss = gen_loss_A_total + gen_loss_B_total
                dis_loss = dis_loss_A + dis_loss_B
            elif args.model_arch == 'recongan':
                gen_loss = gen_loss_A_total
                dis_loss = dis_loss_B
            elif args.model_arch == 'gan':
                gen_loss = (gen_loss_B * 0.1 + fm_loss_B * 0.9)
                dis_loss = dis_loss_B

            if iters % args.update_interval == 0:
                dis_loss.backward()
                optim_dis.step()
            else:
                gen_loss.backward()
                optim_gen.step()

            if iters % args.log_interval == 0:
                print "---------------------"
                print "GEN Loss:", as_np(gen_loss_A.mean()), as_np(
                    gen_loss_B.mean())
                print "Feature Matching Loss:", as_np(fm_loss_A.mean()), as_np(
                    fm_loss_B.mean())
                print "RECON Loss:", as_np(recon_loss_A.mean()), as_np(
                    recon_loss_B.mean())
                print "DIS Loss:", as_np(dis_loss_A.mean()), as_np(
                    dis_loss_B.mean())

            if iters % args.image_save_interval == 0:
                AB = generator_B(test_A)
                BA = generator_A(test_B)
                ABA = generator_A(AB)
                BAB = generator_B(BA)

                n_testset = min(test_A.size()[0], test_B.size()[0])

                subdir_path = os.path.join(
                    result_path, str(iters / args.image_save_interval))

                if os.path.exists(subdir_path):
                    pass
                else:
                    os.makedirs(subdir_path)

                for im_idx in range(n_testset):
                    A_val = test_A[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    B_val = test_B[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    BA_val = BA[im_idx].cpu().data.numpy().transpose(1, 2,
                                                                     0) * 255.
                    ABA_val = ABA[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    AB_val = AB[im_idx].cpu().data.numpy().transpose(1, 2,
                                                                     0) * 255.
                    BAB_val = BAB[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.

                    filename_prefix = os.path.join(subdir_path, str(im_idx))
                    scipy.misc.imsave(filename_prefix + '.A.jpg',
                                      A_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.B.jpg',
                                      B_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.BA.jpg',
                                      BA_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.AB.jpg',
                                      AB_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.ABA.jpg',
                                      ABA_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.BAB.jpg',
                                      BAB_val.astype(np.uint8)[:, :, ::-1])

            if iters % args.model_save_interval == 0:
                torch.save(
                    generator_A,
                    os.path.join(
                        model_path, 'model_gen_A-' +
                        str(iters / args.model_save_interval)))
                torch.save(
                    generator_B,
                    os.path.join(
                        model_path, 'model_gen_B-' +
                        str(iters / args.model_save_interval)))
                torch.save(
                    discriminator_A,
                    os.path.join(
                        model_path, 'model_dis_A-' +
                        str(iters / args.model_save_interval)))
                torch.save(
                    discriminator_B,
                    os.path.join(
                        model_path, 'model_dis_B-' +
                        str(iters / args.model_save_interval)))

            iters += 1
def despotify(pcube,
              vcube,
              vgrid,
              voxel_size=3.08e18,
              species='o-h2co',
              cloud=None,
              cloudfile='MilkyWayGMC.desp',
              cloudfile_path=None,
              output_linenumbers=[0, 2],
              output_properties=['tau', 'Tex', 'intTB']):
    """
    Turn a simulated ppp cube into a ppv cube using despotic for the radiative
    transfer

    Note that it is "despot-ify", not "de-spotify".

    Parameters
    ----------
    pcube : np.ndarray
        3-dimensional array containing values with units of density in n(H2) cm^-3
    vcube : np.ndarray
        3-dimensional array containing Z-velocity values, i.e. the velocity
        should be in the direction of the 0'th axis (because python arrays are
        inverted).  Expected unit is km/s, but it doesn't matter as long as the
        velocity units match the vgrid units
    vgrid : np.ndarray
        1-dimensional array containing the output velocity grid.  Must have
        same units as vcube.
    voxel_size : float
        1-dimensional size of a voxel in cm.  Used to convert from density to
        column
    species : str
        A string identifying the LAMDA species name, e.g. 'o-h2co', 'co', etc.
    cloud : None or despotic.cloud
        Can pass in a despotic cloud instance that will be modified by the
        specified cube density.  Otherwise, will be read from file.
    cloudfile : str
        The filename specifying the default cloud file to use
    cloudfile_path : str or None
        If none, defaults to despotic.__path__/cloudfiles/
    output_linenumbers : iterable
        A list of integer indices for which line numbers should be output as
        cubes
    output_properties : iterable
        A list of strings identifying the line properties to output as cubes

    Returns
    -------
    A data cube of dimensions [velocity,position,position] for each line in
    output_linenumbers for each property in output_properties
    """

    if pcube.shape != vcube.shape:
        raise ValueError('Cube Size mismatch: {0},{1}'.format(
            str(pcube.shape), str(vcube.shape)))
    if vgrid.ndim > 1:
        raise ValueError('Velocity grid must be 1-dimensional')

    imshape = pcube.shape[1:]
    outcubeshape = (vgrid.size, ) + imshape
    nelts = vgrid.size

    vinds = np.empty(vcube.shape, dtype='int64')
    # not needed
    # volume_spectra = np.empty(outcubeshape)
    # dens_spectra = np.empty(outcubeshape)
    for jj, kk in np.ndindex(imshape):
        vinds[:, jj, kk] = np.digitize(vcube[:, jj, kk], vgrid)
    #     volume_spectra[:,jj,kk] = np.bincount(vinds[:,jj,kk], minlength=nelts)
    #     dens_spectra[:,jj,kk] = np.bincount(vinds[:,jj,kk],
    #             weights=pcube[:,jj,kk],
    #             minlength=nelts)

    cloudfile_path = cloudfile_path or despotic.__path__[0] + "/cloudfiles/"

    if cloud is None:
        cloud = despotic.cloud(
            fileName="{0}/{1}".format(cloudfile_path, cloudfile))

    try:
        from progressbar import ProgressBar, Percentage, Bar
        from progressbar import AdaptiveETA as ETA
    except ImportError:
        from progressbar import ProgressBar, Percentage, Bar
        from progressbar import ETA
    pb = ProgressBar(widgets=[Percentage(), ETA(), Bar()],
                     maxval=pcube.size).start()

    # property cubes prior to gridding have same shape as input cubes
    # use dict() instead of {} for python2.6 compatibility
    prop_cubes = dict([
        ("{0}{1}".format(pr, ln), np.empty(pcube.shape))
        for ln, pr in itertools.product(output_linenumbers, output_properties)
    ])

    for (zi, yi, xi), nH in np.ndenumerate(pcube):
        cloud.nH = pcube[zi, yi, xi]
        cloud.colDen = cloud.nH * voxel_size
        line = cloud.lineLum(species)

        for ln, pr in itertools.product(output_linenumbers, output_properties):
            key = "{0}{1}".format(pr, ln)
            prop_cubes[key][zi, yi, xi] = line[ln][pr]

        pb.update(pb.currval + 1)
    pb.finish()

    # spectral cubes have outcubeshape
    spectra_cubes = {}
    spectra_cubes = dict([
        ("{0}{1}".format(pr, ln), np.empty(outcubeshape))
        for ln, pr in itertools.product(output_linenumbers, output_properties)
    ])

    for key in prop_cubes:
        for jj, kk in itertools.product(*map(xrange, imshape)):
            spectra_cubes[key][:,jj,kk] = \
                np.bincount(vinds[:,jj,kk],
                            weights=prop_cubes[key][:,jj,kk],
                            minlength=nelts)

    return spectra_cubes, prop_cubes
示例#14
0
    def train(self):
        config = tf.ConfigProto(allow_soft_placement=True)
        with tf.Session(config=config) as sess:
            with tf.device("/gpu:%d" % cfg.GPU_ID):
                counter = self.build_model(sess)
                saver = tf.train.Saver(tf.all_variables(),
                                       keep_checkpoint_every_n_hours=5)

                # summary_op = tf.merge_all_summaries()
                summary_writer = tf.summary.FileWriter(self.log_dir,
                                                       sess.graph)

                if cfg.TRAIN.FINETUNE_LR:
                    keys = ["hr_d_loss", "hr_g_loss", "d_loss", "g_loss"]
                else:
                    keys = ["d_loss", "g_loss"]
                log_vars = []
                log_keys = []
                for k, v in self.log_vars:
                    if k in keys:
                        log_vars.append(v)
                        log_keys.append(k)
                generator_lr = cfg.TRAIN.GENERATOR_LR
                discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR
                lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH
                number_example = self.dataset.train._num_examples
                updates_per_epoch = int(number_example / self.batch_size)
                # int((counter + lr_decay_step/2) / lr_decay_step)
                decay_start = cfg.TRAIN.PRETRAINED_EPOCH
                epoch_start = int(counter / updates_per_epoch)
                for epoch in range(epoch_start, self.max_epoch):
                    widgets = [
                        "epoch #%d|" % epoch,
                        Percentage(),
                        Bar(),
                        ETA()
                    ]
                    pbar = ProgressBar(maxval=updates_per_epoch,
                                       widgets=widgets)
                    pbar.start()

                    if epoch % lr_decay_step == 0 and epoch > decay_start:
                        generator_lr *= 0.5
                        discriminator_lr *= 0.5

                    all_log_vals = []
                    for i in range(updates_per_epoch):
                        pbar.update(i)
                        log_vals = self.train_one_step(generator_lr,
                                                       discriminator_lr,
                                                       counter, summary_writer,
                                                       log_vars, sess)
                        all_log_vals.append(log_vals)
                        # save checkpoint
                        counter += 1
                        if counter % self.snapshot_interval == 0:
                            snapshot_path = "%s/%s_%s.ckpt" %\
                                             (self.checkpoint_dir,
                                              self.exp_name,
                                              str(counter))
                            fn = saver.save(sess, snapshot_path)
                            print("Model saved in file: %s" % fn)

                    img_summary, img_summary2 =\
                        self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY)
                    summary_writer.add_summary(img_summary, counter)
                    summary_writer.add_summary(img_summary2, counter)

                    avg_log_vals = np.mean(np.array(all_log_vals), axis=0)
                    dic_logs = {}
                    for k, v in zip(log_keys, avg_log_vals):
                        dic_logs[k] = v
                        # print(k, v)

                    log_line = "; ".join("%s: %s" % (str(k), str(dic_logs[k]))
                                         for k in dic_logs)
                    print("Epoch %d | " % (epoch) + log_line)
                    sys.stdout.flush()
                    if np.any(np.isnan(avg_log_vals)):
                        raise ValueError("NaN detected!")
示例#15
0
def download_coincidences(file,
                          group='',
                          cluster=None,
                          stations=None,
                          start=None,
                          end=None,
                          n=2,
                          progress=True):
    """Download event summary data coincidences

    :param file: PyTables datafile handler.
    :param group: path of destination group, which need not exist yet.
    :param cluster: HiSPARC cluster name for which to get data.
    :param stations: a list of HiSPARC station numbers for which to get data.
    :param start: a datetime instance defining the start of the search interval.
    :param end: a datetime instance defining the end of the search interval.
    :param n: the minimum number of events in the coincidence.

    The start and end parameters may both be None.  In that case,
    yesterday's data is downloaded.  If only end is None, a single day's
    worth of data is downloaded, starting at the datetime specified with
    start.

    Optionally either a cluster or stations can be defined to limit the
    results to include only events from those stations.

    Example::

        >>> import tables
        >>> import datetime
        >>> import sapphire.esd
        >>> data = tables.open_file('data_coincidences.h5', 'w')
        >>> sapphire.esd.download_coincidences(data, cluster='Aarhus',
        ...     start=datetime.datetime(2013, 9, 1),
        ...     end=datetime.datetime(2013, 9, 2), n=3)

    """
    # sensible defaults for start and end
    if start is None:
        if end is not None:
            raise RuntimeError(
                "Start is None, but end is not. I can't go on like this.")
        else:
            yesterday = datetime.date.today() - datetime.timedelta(days=1)
            start = datetime.datetime.combine(yesterday, datetime.time(0, 0))
    if end is None:
        end = start + datetime.timedelta(days=1)

    if stations is not None and len(stations) < n:
        raise Exception('To few stations in query, give at least n.')

    # build and open url, create tables and set read function
    query = urlencode({
        'cluster': cluster,
        'stations': stations,
        'start': start,
        'end': end,
        'n': n
    })
    url = get_coincidences_url().format(query=query)
    station_groups = _read_or_get_station_groups(file, group)
    c_group = _get_or_create_coincidences_tables(file, group, station_groups)

    try:
        data = urlopen(url, timeout=1800)
    except BadStatusLine:
        # Unexplained transient error, retry once
        data = urlopen(url, timeout=1800)

    # keep track of event timestamp within [start, end] interval for
    # progressbar
    t_start = calendar.timegm(start.utctimetuple())
    t_end = calendar.timegm(end.utctimetuple())
    t_delta = t_end - t_start
    if progress:
        pbar = ProgressBar(max_value=1., widgets=[Percentage(),
                                                  Bar(),
                                                  ETA()]).start()

    # loop over lines in tsv as they come streaming in, keep temporary
    # lists until a full coincidence is in.
    prev_update = time.time()
    reader = csv.reader(iterdecode(data, 'utf-8'), delimiter='\t')
    current_coincidence = 0
    coincidence = []
    for line in reader:
        if line[0][0] == '#':
            continue
        elif int(line[0]) == current_coincidence:
            coincidence.append(line)
        else:
            # Full coincidence has been received, store it.
            timestamp = _read_lines_and_store_coincidence(
                file, c_group, coincidence, station_groups)
            # update progressbar every 0.5 seconds
            if progress and time.time(
            ) - prev_update > 0.5 and not timestamp == 0.:
                pbar.update((1. * timestamp - t_start) / t_delta)
                prev_update = time.time()
            coincidence = [line]
            current_coincidence = int(line[0])
            file.flush()

    if len(coincidence):
        # Store last coincidence
        _read_lines_and_store_coincidence(file, c_group, coincidence,
                                          station_groups)
    if progress:
        pbar.finish()

    if line[0][0] == '#':
        if len(line[0]) == 1:
            # No events recieved, and no success line
            raise Exception('Failed to download data, no data recieved.')
        else:
            # Successful download because last line is a non-empty comment
            pass
    else:
        # Last line is data, report failed download and date/time of last line
        raise Exception('Failed to complete download, last received data '
                        'from: %s %s.' % tuple(line[2:4]))

    file.flush()
    def train(self):
        config = tf.ConfigProto(allow_soft_placement=True)
        with tf.Session(config=config) as sess:
            with tf.device("/gpu:%d" % cfg.GPU_ID):
                counter = self.build_model(sess)
                saver = tf.train.Saver(tf.global_variables(),
                                       keep_checkpoint_every_n_hours=2)

                # summary_op = tf.merge_all_summaries()
                summary_writer = tf.summary.FileWriter(self.log_dir,
                                                        sess.graph)

                keys = ["d_loss", "g_loss", "e_loss"]
                log_vars = []
                log_keys = []
                for k, v in self.log_vars:
                    if k in keys:
                        log_vars.append(v)
                        log_keys.append(k)
                        # print(k, v)
                generator_lr = cfg.TRAIN.GENERATOR_LR
                encoder_lr = cfg.TRAIN.ENCODER_LR
                discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR
                num_embedding = cfg.TRAIN.NUM_EMBEDDING
                lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH
                number_example = self.dataset.train._num_examples
                updates_per_epoch = int(number_example / self.batch_size)
                epoch_start = int(counter / updates_per_epoch)
                for epoch in range(epoch_start, self.max_epoch):
                    widgets = ["epoch #%d|" % epoch,
                               Percentage(), Bar(), ETA()]
                    pbar = ProgressBar(maxval=updates_per_epoch,
                                       widgets=widgets)
                    pbar.start()

                    #Decay Learning Rate by 1/2 every 'x' steps(Change to exp?)
                    if epoch % lr_decay_step == 0 and epoch != 0:
                        generator_lr *= 0.5
                        encoder_lr *= 0.5
                        discriminator_lr *= 0.5

                    #Exponential Decay for all LRs

                    #generator_lr = tf.train.exponential_decay(generator_lr,counter, 100000,
                    #        0.96, staircase=True)
                    #discriminator_lr = tf.train.exponential_decay(discriminator_lr,counter,
                    #        100000, 0.96, staircase=True)
                    #encoder_lr = tf.train.exponential_decay(encoder_lr,counter, 100000,
                    #        0.96, staircase=True)

                    all_log_vals = []
                    for i in range(updates_per_epoch):
                        pbar.update(i)
                        
                        images, wrong_images, embeddings, _, _ =\
                            self.dataset.train.next_batch(self.batch_size,
                                                          num_embedding)
                        feed_dict = {self.images: images,
                                     self.wrong_images: wrong_images,
                                     self.embeddings: embeddings,
                                     self.generator_lr: generator_lr,
                                     self.encoder_lr: encoder_lr,
                                     self.discriminator_lr: discriminator_lr
                                     }
                        if cfg.TRAIN.DISCRIMINATOR:
                            # Train the discriminator
                            feed_out = [self.discriminator_trainer,
                                        self.d_sum,
                                        self.hist_sum,
                                        log_vars]
                            _, d_sum, hist_sum, log_vals = sess.run(feed_out,
                                                                    feed_dict)
                            summary_writer.add_summary(d_sum, counter)
                            summary_writer.add_summary(hist_sum, counter)
                            all_log_vals.append(log_vals)
                        if cfg.TRAIN.SUPERVISED and cfg.TRAIN.ENCODER and i % cfg.TRAIN.ENCODER_PERIOD == 0:
                            # Train the encoder
                            feed_out = [self.encoder_trainer,
                                        self.e_sum]
                            _, e_sum = sess.run(feed_out,
                                                feed_dict)
                            summary_writer.add_summary(e_sum, counter)
                        if cfg.TRAIN.GENERATOR:
                            # Train the generator
                            feed_out = [self.generator_trainer,
                                        self.g_sum]
                            _, g_sum = sess.run(feed_out,
                                                feed_dict)
                            summary_writer.add_summary(g_sum, counter)
                        # save checkpoint
                        counter += 1
                        if counter % self.snapshot_interval == 0:
                            snapshot_path = "%s/%s_%s.ckpt" %\
                                             (self.checkpoint_dir,
                                              self.exp_name,
                                              str(counter))
                            fn = saver.save(sess, snapshot_path)
                            print("Model saved in file: %s" % fn)

                    img_sum = self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY)
                    summary_writer.add_summary(img_sum, counter)

                    avg_log_vals = np.mean(np.array(all_log_vals), axis=0)
                    dic_logs = {}
                    #for k, v in zip(log_keys, avg_log_vals):
                        #dic_logs[k] = v
                        # print(k, v)

                    #log_line = "; ".join("%s: %s" %
                    #                     (str(k), str(dic_logs[k]))
                    #                     for k in dic_logs)
                    #print("Epoch %d | " % (epoch) + log_line)
                    sys.stdout.flush()
示例#17
0
    def get_validation_score(self, is_train=False):

        print('validation started')

        y_predict_text = []
        widgets = [
            'Evaluating ',
            Percentage(), ' ',
            Bar(marker='#', left='[', right=']'), ' ',
            ETA()
        ]
        pbar = ProgressBar(widgets=widgets)
        for qu_batch, an_batch, im_batch in pbar(
                zip(
                    grouper(self.val_ques,
                            self.batch_size,
                            fillvalue=self.val_ques[-1]),
                    grouper(self.val_ans,
                            self.batch_size,
                            fillvalue=self.val_ans[-1]),
                    grouper(self.val_img,
                            self.batch_size,
                            fillvalue=self.val_img[-1]))):
            fdict = {}
            fdict[self.img[0]] = im_batch
            fdict[self.ques[0]] = qu_batch
            if is_train:
                y_proba = self.sess.run(self.out_proba_train, feed_dict=fdict)
            else:
                y_proba = self.sess.run(self.out_proba_test, feed_dict=fdict)
            y_predict = y_proba.argmax(axis=-1)
            y_predict_text.extend(
                self.labelencoder.inverse_transform(y_predict))

        correct_val = 0.0
        total = 0.
        binary_correct_val = 0.0
        binary_total = 0.1
        num_correct_val = 0.0
        num_total = 0.1
        other_correct_val = 0.0
        other_total = 0.1
        f1 = open(self.results_path, 'w')
        for prediction, truth, question, image in zip(y_predict_text,
                                                      self.val_ans,
                                                      self.val_ques,
                                                      self.val_img):
            temp_count = 0
            for _truth in truth.split(';'):
                if prediction == _truth:
                    temp_count += 1
            if temp_count > 2:
                correct_val += 1
            else:
                correct_val += float(temp_count) / 3

            total += 1

            binary_temp_count = 0
            num_temp_count = 0
            other_count = 0
            if prediction == 'yes' or prediction == 'no':
                for _truth in truth.split(';'):
                    if prediction == _truth:
                        binary_temp_count += 1
                if binary_temp_count > 2:
                    binary_correct_val += 1
                else:
                    binary_correct_val += float(binary_temp_count) / 3
                binary_total += 1
            elif np.core.defchararray.isdigit(prediction):
                for _truth in truth.split(';'):
                    if prediction == _truth:
                        num_temp_count += 1
                if num_temp_count > 2:
                    num_correct_val += 1
                else:
                    num_correct_val += float(num_temp_count) / 3
                num_total += 1
            else:
                for _truth in truth.split(';'):
                    if prediction == _truth:
                        other_count += 1
                if other_count > 2:
                    other_correct_val += 1
                else:
                    other_correct_val += float(other_count) / 3
                other_total += 1

            #f1.write(question.encode('utf-8'))
            #f1.write('\n')
            #f1.write(image.encode('utf-8'))
            #f1.write('\n')
            f1.write(prediction)
            f1.write('\n')
            f1.write(truth.encode('utf-8'))
            f1.write('\n')
            f1.write('\n')

        f1.write('Final Accuracy is ' + str(correct_val / total))
        f1.close()
        f2 = open('overall_results.txt', 'a')
        f2.write(str(correct_val / total) + '\n\n')
        f2.write(str(binary_correct_val / binary_total) + '\n\n')
        f2.write(str(num_correct_val / num_total) + '\n\n')
        f2.write(str(other_correct_val / other_total) + '\n\n')
        f2.close()

        print 'Final Accuracy is', correct_val / total
        return correct_val / total
示例#18
0
def main():

    model = model_from_json(open('baseline_mlp.json').read())
    model.load_weights('weights/MLP_epoch_99.hdf5')
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    print("Model Loaded with Weights")

    val_imgs = open('preprocessed/v2/val_images_coco_id.txt',
                    'rb').read().decode('utf-8').splitlines()
    val_ques = open('preprocessed/v2/ques_val.txt',
                    'rb').read().decode('utf-8').splitlines()
    val_ans = open('preprocessed/v2/answer_val.txt',
                   'rb').read().decode('utf-8').splitlines()
    img_ids = open('preprocessed/v2/coco_vgg_IDMap.txt').read().splitlines()
    vgg_path = "data/coco/vgg_feats.mat"

    label_encoder = pk.load(open('preprocessed/v2/label_encoder.sav', 'rb'))
    vgg_ = scipy.io.loadmat(vgg_path)
    vgg_features = vgg_['feats']
    print("Loaded VGG Features")
    id_map = dict()
    for ids in img_ids:
        id_split = ids.split()
        id_map[id_split[0]] = int(id_split[1])

    print("Loading en_core_web_md")
    nlp = spacy.load("en_core_web_md")
    n_classes = 1500
    y_pred = []
    batch_size = 128

    print("Word2Vec Loaded!")

    widgets = [
        'Evaluating ',
        Percentage(), ' ',
        Bar(marker='#', left='[', right=']'), ' ',
        ETA()
    ]
    pbar = ProgressBar(widgets=widgets)
    #i=1

    for qu_batch, an_batch, im_batch in pbar(
            zip(grouped(val_ques, batch_size, fillvalue=val_ques[0]),
                grouped(val_ans, batch_size, fillvalue=val_ans[0]),
                grouped(val_imgs, batch_size, fillvalue=val_imgs[0]))):
        X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
        X_i_batch = get_images_matrix(im_batch, id_map, vgg_features)
        X_batch = np.hstack((X_q_batch, X_i_batch))
        y_predict = model.predict_classes(X_batch, verbose=0)
        y_pred.extend(label_encoder.inverse_transform(y_predict))
        #print (i,"/",len(val_ques))
        #i+=1
        #print(label_encoder.inverse_transform(y_predict))

    correct_val = 0.0
    total = 0
    f1 = open('res.txt', 'w')

    for pred, truth, ques, img in zip(y_pred, val_ans, val_ques, val_imgs):
        t_count = 0
        for _truth in truth.split(';'):
            if pred == truth:
                t_count += 1
        if t_count >= 2:
            correct_val += 1
        else:
            correct_val += float(t_count) / 3

        total += 1

        try:
            f1.write(str(ques))
            f1.write('\n')
            f1.write(str(img))
            f1.write('\n')
            f1.write(str(pred))
            f1.write('\n')
            f1.write(str(truth))
            f1.write('\n')
            f1.write('\n')
        except:
            pass

    print("Accuracy: ", correct_val / total)
    f1.write('Final Accuracy is ' + str(correct_val / total))
    f1.close()
import argparse



if __name__ == '__main__':
    parser = argparse.ArgumentParser(description= " Sinker for ventalitor")
    parser.add_argument('-r', '--receiver', help=' port to listen to' )
    args = parser.parse_args()


    receiver_ip = "tcp://*:" + args.receiver
    context = zmq.Context()
    # Get reciever
    receiver = context.socket(zmq.PULL)
    receiver.bind(receiver_ip)
    prg_bar = ProgressBar(widgets=[Percentage(), Bar(marker=RotatingMarker()), ETA()])
    i = 0
    if not os.path.exists('data/logs'):
        os.makedirs('data/logs')


    fh_out = open('data/logs/completed_rna_dist.log','w+')
    while True:
        msg = receiver.recv_json()
        if msg['sender'] == 'ventilator':
            total = int(len(msg['body']))
            prg_bar.maxval = total
            prg_bar = prg_bar.start()
        else:
            i += 1
            prg_bar.update(i)
示例#20
0
    def spider(self):
        #set server when need ssh
        # server=mongo.setServer()
        # server.start()
        # db=mongo.mongoConnection(server)
        widgets = ['Test: ', Percentage(), ' ', Bar(marker=RotatingMarker()),
               ' ', ETA(), ' ', FileTransferSpeed()]
        pbar = ProgressBar(widgets=widgets, maxval=10000000).start()
        db=mongo.localMongo()
        headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36'}
        TZ_data = pd.DataFrame()
        HT_data = pd.DataFrame()
        i = self.start
        while i <= self.stop: #设定抓取页数
            next = ',f_'+str(i)+'.html'#按照发帖时间排序
            href = self.url.replace(',f_1.html',next)#股吧帖子列表页面
            resp=requests.get(href,headers=headers) #获取股吧页面
            html = BeautifulSoup(resp.text,'lxml') #股吧页面解析
            Title= html.find_all(class_="articleh")

            for item in Title:
                if str(item)=='<div class="articleh" id="ad_topic"></div>':
                    continue
                if item.find_all('em')==[] or item.find_all('em')[0].text=='':
                    info = item.find_all('span',class_='l3')[0].find_all('a')[0]#每个帖子的具体信息
                    LL= int(item.find_all('span',class_="l1")[0].contents[0])#判断帖子是否异常的指标
                    print(LL)
                    if LL > 50 :
                        hrefT = 'http://guba.eastmoney.com'+ info['href'] #帖子链接
                        respT=requests.get(hrefT,headers=headers) #获取帖子信息
                        htmlT = BeautifulSoup(respT.text,'lxml') #帖子首页页面解析
                        fttime_html= htmlT.find_all(class_="zwfbtime")
                        fttime = re.findall('\d+[-]\d+[-]\d+\s\d+[:]\d+[:]\d+',fttime_html[0].contents[0])[0]#发帖时间
                        tag_data=info['href'][1:-5].split(",")
                        art_id=tag_data.pop()
                        # print(art_id)
                        # print(tag_data)

                        biaoti_text = htmlT.find_all(id="zwconttbt")[0].text.strip()#发帖标题
                        biaoti_info = htmlT.find_all(class_="stockcodec")[0].text.strip()#标题详细内容     未去除空字符串
                       
                        publisher_id=htmlT.find_all('div',id="zwconttbn")[0].find_all('a')[0]["data-popper"]
                        publisher_name=htmlT.find_all('div',id="zwconttbn")[0].find_all('a')[0].text
                        # print(art_id)
                        # print(biaoti_text)
                        tiezi_info = htmlT.find_all(text=re.compile('var num=\d+;var count=\d+'))#根据标签内容查找
                        ll,xg = re.findall('\d+',tiezi_info[0])
                        pinglun = htmlT.find_all(text=re.compile('var pinglun_num=\d+'))
                        pinglun= re.findall('var pinglun_num=\d+',pinglun[0])
                        pl = re.findall('\d+',pinglun[0])[0]
                        # print(xg)
                        tiezi = {'date':fttime,'title':biaoti_text,'num_visited':ll,'num_comment':pl,'related_arc':xg,'url':hrefT,'contents':biaoti_info}
                        # print(tiezi)
                        # print(biaoti_text)
                        tiezi_data = pd.DataFrame(tiezi,index=['0']) #每次都更新
                        HT_data_sub = pd.DataFrame()
                        j = 1
                        print(biaoti_text)
                        ply = np.ceil(int(pl)/30)
                        pnum=0
                        comment_list=[]
                        while j <= ply:  #抓取全部评论
                            nextT = '_'+str(j)+'.html'
                            hrefTJ = hrefT.replace('.html',nextT)#帖子页面
                            respT = requests.get(hrefTJ,headers=headers) #获取帖子页面
                            htmlT = BeautifulSoup(respT.text,'lxml') #帖子页面解析
                            commentid_list = htmlT.find_all('div',class_="zwli clearfix")
                            huitie_time = htmlT.find_all('div',class_="zwlitime")
                            huitie_time = list(huitie_time)
                            huitie_time = pd.Series(huitie_time)
                            huitie_time = huitie_time.apply(self.time_re)
                            # huitie_info = htmlT.find_all('div',class_="zwlitext stockcodec")
                            # huitie_info = pd.Series(list(huitie_info))
                            # huitie_info = huitie_info.apply(f).tolist()
                            pnum=0
                            for k in huitie_time:
                                commentid_id=commentid_list[pnum]["data-huifuid"]
                                userid=htmlT.find_all('span',class_="zwnick")[pnum].find_all('a')[0]["data-popper"]
                                username=htmlT.find_all('span',class_="zwnick")[pnum].find_all('a')[0].text

                                if htmlT.find_all('div',class_="zwlitext stockcodec") and pnum<len(htmlT.find_all('div',class_="zwlitext stockcodec")):
                                    huitie_info = htmlT.find_all('div',class_="zwlitext stockcodec")[pnum].text.strip()
                                else:
                                    continue
                                username=htmlT.find_all('span',class_="zwnick")[pnum].find_all('a')[0].text
                                # print(k)
                                # print(commentid_id)
                                # print(userid)
                                # print(username)
                                # print(huitie_info[pnum])
                                if huitie_info=="":
                                    pnum+=1 
                                    continue
                                # print(username)
                                # print(huitie_info)
                                # print(pnum)
                                # print("-------------------")
                                comment_data={
                                "comment_id":commentid_id,
                                "userid":userid,
                                "username":username,
                                "data":k,
                                "comment_content":huitie_info
                                }
                                pnum+=1   
                                
                                comment_list.append(comment_data)
                            j += 1
                        db.article_data.insert({
                            "arc_id":art_id,
                            "tags":tag_data,
                            "title":biaoti_text,
                            "data":fttime,
                            "publisher_id":publisher_id,
                            "publisher_name":publisher_name,
                            "num_visited":ll,
                            "num_comment":pl,
                            "related_arc":xg,
                            "url":hrefT,
                            "contents":biaoti_info,
                            "comments":comment_list
                            })
                
            print('第%s页抓取完毕!'%i)
            i += 1
            pbar.update(10*i+1)
        pbar.finish()
        print('爬取完毕,共爬取%s第%s页到第%s页帖子数据' %(self.name,self.start,self.stop))
        server.stop()
        sys.stdout.write("\n")
        return(TZ_data,HT_data)
示例#21
0
    def run(self):
        from progressbar import ProgressBar, Bar, Counter, Timer, ETA, Percentage, RotatingMarker
        try:
            widgets = [Percentage(), Bar(left='[', right=']'), ' Processed: ', Counter(), '/', "%s" % self.task_count, ' total files (', Timer(), ') ', ETA()]
            pb = ProgressBar(widgets=widgets, maxval=self.task_count).start()
            while self.task_queue.qsize():
                pb.update(self.task_count - self.task_queue.qsize())
                
                time.sleep(0.5)
            pb.finish()

        except KeyboardInterrupt:
            print("\n")
            return 1

        return 0
示例#22
0
文件: heppi.py 项目: Python3pkg/Heppi
def draw_instack(variable, label='VBF', select=''):
    histos = []
    varname = variable
    formula = variables[varname].get('formula',varname)
    if ':=' in variable:
        varname  = variable.split(':=')[0]
        formula  = variable.split(':=')[1]
        
    histfilename = ('histogram_stack_' +
                    varname + '_' + label+ '_'
                    + selection['name'])
    
    legend  = None
    if settings.two_colomn_legend:
        legend  = ROOT.TLegend(0.45, 0.72,
                               (1 - ROOT.gStyle.GetPadRightMargin()),
                               (0.96 - ROOT.gStyle.GetPadTopMargin()))
        legend.SetNColumns(2)
        legend.SetColumnSeparation(0)
    else:
        legend  = ROOT.TLegend(0.6, 0.62,
                               (1 - ROOT.gStyle.GetPadRightMargin()),
                               (0.96 - ROOT.gStyle.GetPadTopMargin()))

    cutflow = variable_cutflow(variable,select)

    hstack = ROOT.THStack('hs_' + varname,'')
    
    hstack.SetName('hs_'+ varname)
    hstack.SetTitle(";" + variables[varname]['title']+";entries")

    histUpSys = {}
    histDwSys = {}

    for sys in treesUpSys:
        sysname   = sys.split('*')[1]
        histUpSys.update({sysname : None })
    for sys in treesDwSys:
        sysname   = sys.split('*')[1]
        histDwSys.update({sysname : None })
    for sys in branchUpSys:
        histUpSys.update({sys : None })
    for sys in branchDwSys:
        histDwSys.update({sys : None })
        

        
    if len(cutflow)!=0 and options.nocuts==False:
        cutflow = 'weight*(' + cutflow + ')'
    else:
        cutflow = 'weight*(1)'
    if  options.nocuts:
        histfilename = histfilename + '_nocuts'
    # loop over the samples
    bar    = ProgressBar(widgets=[colored('-- variables:: {0:20!s}   '.format(variable), 'green'),
                                  Percentage(),'  ' ,Bar('>'), ' ', ETA()], term_width=100)
    ordsam = OrderedDict(sorted(samples.items(), key=lambda x: x[1]['order']))
    for proc in bar(ordsam):
        logger.debug(' -- {0:17!s}  {1:12!s} '.format(proc, samples[proc].get('name')))
        
        tree       = samples[proc].get('_root_tree_')
        sample_cut = samples[proc].get('cut','')
        _cutflow_  = cutflow
        _sample_weight_ = samples[proc].get('weight','1')

        if samples[proc].get('cut','') != '':
            _cutflow_ = cutflow[:-1] + '&&' +  samples[proc].get('cut','') + ')'
        if variables[variable]['blind'] != '' and proc == 'Data':
            _cutflow_ = cutflow[:-1] + '&&' +  variables[variable]['blind']+ ')'
            
        if proc != 'Data':        
            tree.Project(
                'h_' + varname + variables[variable]['hist'],
                formula,
                _cutflow_.replace('weight','weight*{0:f}*{1:f}*{2:f}*{3!s}'.format(treeinfo.get('kfactor',1.0),
                                                                 treeinfo.get('lumi'   ,1.0),
                                                                 samples[proc].get('kfactor',1.0),
                                                                 _sample_weight_))
                )
        else:
            tree.Project(
                'h_' + varname + variables[variable]['hist'],
                formula,
                _cutflow_
            )
        #=== systematics 
        for sys in treesUpSys:
            if proc != 'Data' and samples[proc].get('dosysts',True):
                sysname = sys.split('*')[1]
                treeUp  = [x for x in samples[proc].get('_root_tree_sysUp_') if sysname in x.GetName()][0]
                print 'sys ::', sys, ' :: treeUp ::', treeUp, ' :: ', treeUp.GetEntries()
                treeUp.Project(
                    'h_UpSys_' + sysname +'_'+ varname + variables[variable]['hist'],
                    formula,
                    _cutflow_.replace('weight','weight*{0:f}*{1:f}*{2:f}*{3!s}'.format(treeinfo.get('kfactor',1.0),
                                                                     treeinfo.get('lumi'   ,1.0),
                                                                     samples[proc].get('kfactor',1.0),
                                                                     _sample_weight_))
                )
                histUp = ROOT.gDirectory.Get('h_UpSys_' + sysname +'_'+ varname )
                histUp.SetDirectory(0)
                if histUpSys[sysname] is None:
                    histUpSys[sysname] = histUp
                else:
                    histUpSys[sysname].Add(histUp)
        for sys in treesDwSys:
            if proc != 'Data' and samples[proc].get('dosysts',True):
                sysname   = sys.split('*')[1]
                treeDw  = [x for x in samples[proc].get('_root_tree_sysDw_') if sysname in x.GetName()][0]
                print 'sys ::', sys, ' :: treeUp ::', treeDw, ' :: ', treeDw.GetEntries()
                treeDw.Project(
                    'h_DwSys_' + sysname +'_'+ varname + variables[variable]['hist'],
                    formula,
                    _cutflow_.replace('weight','weight*{0:f}*{1:f}*{2:f}*{3!s}'.format(treeinfo.get('kfactor',1.0),
                                                                     treeinfo.get('lumi'   ,1.0),
                                                                     samples[proc].get('kfactor',1.0),
                                                                     _sample_weight_))
                )
                histDw = ROOT.gDirectory.Get('h_DwSys_' + sysname +'_'+ varname )
                histDw.SetDirectory(0)
                if histDwSys[sysname] is None:
                    histDwSys[sysname] = histDw
                else:
                    histDwSys[sysname].Add(histDw)
        # ======= weight systematics
        for sys in branchUpSys:
            if proc != 'Data' and samples[proc].get('dosysts',True):
                print 'sys ::', sys, ' :: treeUp ::', tree, ' :: ', tree.GetEntries(), ' :: ', 'weight*{0:f}*{1:f}*{2!s}*{3:f}*{4!s}'.format(treeinfo.get('kfactor',1.0),
                                                                                                                        treeinfo.get('lumi'   ,1.0),
                                                                                                                        sys,
                                                                                                                        samples[proc].get('kfactor',1.0),
                                                                                                                        _sample_weight_)
                tree.Project(
                    'h_weight_UpSys_' + sysname +'_'+ varname + variables[variable]['hist'],
                    formula,
                    _cutflow_.replace('weight','weight*{0:f}*{1:f}*{2!s}*{3:f}*{4!s}'.format(treeinfo.get('kfactor',1.0),
                                                                        treeinfo.get('lumi'   ,1.0),
                                                                        sys,
                                                                        samples[proc].get('kfactor',1.0),
                                                                        _sample_weight_))
                )
                histUp = ROOT.gDirectory.Get('h_weight_UpSys_' + sysname +'_'+ varname)
                histUp.SetDirectory(0)
                if histUpSys[sys]    is None:
                    histUpSys[sys] = histUp
                else:
                    histUpSys[sys].Add(histUp)

        for sys in branchDwSys:
            if proc != 'Data' and samples[proc].get('dosysts',True):
                print 'sys ::', sys, ' :: treeUp ::', tree, ' :: ', tree.GetEntries(), ' :: ', 'weight*{0:f}*{1:f}*{2!s}*{3:f}*{4!s}'.format(treeinfo.get('kfactor',1.0),
                                                                                                                        treeinfo.get('lumi'   ,1.0),
                                                                                                                        sys,
                                                                                                                        samples[proc].get('kfactor',1.0),
                                                                                                                        _sample_weight_)
                tree.Project(
                    'h_weight_DwSys_' + sysname +'_'+ varname + variables[variable]['hist'],
                    formula,
                    _cutflow_.replace('weight','weight*{0:f}*{1:f}*{2!s}*{3:f}*{4!s}'.format(treeinfo.get('kfactor',1.0),
                                                                        treeinfo.get('lumi'   ,1.0),
                                                                        sys,
                                                                        samples[proc].get('kfactor',1.0),
                                                                        _sample_weight_))
                )
                histDw = ROOT.gDirectory.Get('h_weight_DwSys_' + sysname +'_'+ varname)
                histDw.SetDirectory(0)
                if histDwSys[sys]    is None:
                    histDwSys[sys] = histDw
                else:
                    histDwSys[sys].Add(histDw)
        # ----------------------------------------------    
        hist = ROOT.gDirectory.Get('h_' + varname )
        hist.SetDirectory(0)

        hist.SetTitle(";" + variables[variable]['title']+";entries")
        hcolor = 1

        hcolor = samples[proc]['color']
        if ('signal'==samples[proc]['label']) or ('spectator'==samples[proc]['label']):
            hist.SetLineColor(hcolor)
            hist.SetLineStyle(1)
            hist.SetLineWidth(2)
            hist.SetFillStyle(0)
            histos.append(hist)
            if samples[proc].get('kfactor',1) !=1:
                legend.AddEntry(hist,
                                samples[proc]["title"] + ("#times{0:d}".format(samples[proc].get('kfactor',1))),
                                "l" );
            else:
                legend.AddEntry( hist, samples[proc]["title"], "l" );
        if 'data' in samples[proc]['label']:
            hist.SetMarkerColor(ROOT.kBlack)
            hist.SetLineColor  (ROOT.kBlack)
            hist.SetMarkerStyle(20)
            hist.SetMarkerSize (0.8) # fixme
            hist.SetFillColorAlpha(0,0)
            hist.SetLineWidth(2)
            hist.SetBinErrorOption(ROOT.TH1.kPoisson)
            hist.SetName(hist.GetName() + 'data')
            legend.AddEntry( hist, samples[proc]["title"], "lep" );
            histos.append(hist)
        if 'background' in samples[proc]['label']:
            hist.SetLineColor(ROOT.kBlack)
            hist.SetFillColor(hcolor)
            hist.SetLineWidth(2)
            hstack.Add(hist)
            legend.AddEntry( hist, samples[proc]["title"], "f" );
    # drawing
    c = makeRatioPlotCanvas(name = varname)
    c.cd(1)
    htmp   = histos[0].Clone('__htmp__')
    bounds = [float(s) for s in re.findall('[-+]?\d*\.\d+|\d+',variables[variable]['hist'])]
    htmp.SetTitle(';' + variables[variable]['title']
                  + (';events {0!s} {1!s} '.format(fformat((bounds[2]-bounds[1])/bounds[0]),
                                            variables[variable].get('unit',''))    ))
    htmp.Reset()
    if  options.allloghist or variables[variable]['log']:
        ymin = 0.01 - 0.003
        ymax = hstack.GetMaximum()*1000
        htmp.GetYaxis().SetRangeUser(ymin,ymax)
        histfilename = histfilename + '_log'
        ROOT.gPad.SetLogy()
    else:
        ymin = 0
        ymax = hstack.GetMaximum() + hstack.GetMaximum()*0.5
        htmp.GetYaxis().SetRangeUser(ymin,ymax)
    customizeHisto(htmp)
    htmp.Draw('')
    hstack.Draw('hist,same')
    herrstat = drawStatErrorBand(hstack.GetStack().Last(), histDwSys, histUpSys)
    herrstat.Draw('E2,same')
    hdata = None
    for h in histos:
        if 'data' not in h.GetName():
            h.Draw('hist,same')
        else:
            h.Draw('E,same')
            hdata = h

    if len(histUpSys)>0 and len(histDwSys)>0:
        legend.AddEntry(herrstat, "Stat #oplus Syst", "f" )
    else:
        legend.AddEntry(herrstat, "Stat Uncert", "f" )
        # cosmetics
    draw_cut_line(htmp,variable)
    draw_categories(variables[varname].get('boudaries',[]),
                    miny=htmp.GetMinimum(),
                    maxy=htmp.GetMaximum())
    ROOT.gPad.RedrawAxis();
    # this is for the legend
    legend.SetTextAlign( 12 )
    legend.SetTextFont ( 43 )
    legend.SetTextSize ( 18 )
    legend.SetLineColor( 0 )
    legend.SetFillColor( 0 )
    legend.SetFillStyle( 0 )
    legend.SetLineColorAlpha(0,0)
    legend.SetShadowColor(0)
    legend.Draw()
    # draw labels
    if  options.nocuts:
        draw_labels('w/o cuts')
    else:
        draw_labels(plotlabels['name'])
    draw_cms_headlabel(label_right='#sqrt{{s}} = 13 TeV, L = {0:1.2f} fb^{{-1}}'.format(treeinfo.get('lumi',2.63)))
    
    c.cd()
    c.cd(2)
    errorHist = MakeStatProgression(hstack.GetStack().Last(),histDwSys, histUpSys)
    ROOT.SetOwnership(errorHist,0)
    errorHist.GetXaxis().SetTitle(htmp.GetXaxis().GetTitle())
    errorHist.GetYaxis().SetTitle('Data/MC')
    errorHist.GetYaxis().CenterTitle(True)
    customizeHisto(errorHist)
    errorHist.Draw('E2')
    ratioHist = None
    sig_and_bkg_ratio = []
    if hdata isNone:
        ratioHist = hstack.GetStack().Last().Clone('_temp_')
        ratioHist.Clear()
        ratioHist.SetLineColorAlpha(0,0)
        ratioHist.SetMarkerColorAlpha(0,0)
        ROOT.SetOwnership(ratioHist,0)
        ratioHist.GetXaxis().SetTitle(htmp.GetXaxis().GetTitle())
        ratioHist.GetYaxis().SetTitle(htmp.GetYaxis().GetTitle())
        if settings.ratio_draw_signal:
            for sig in histos:
                sig_and_bkg = hstack.GetStack().Last().Clone('_temp_bkg_' + sig.GetName())
                sig_and_bkg.Add(sig)
                sig_and_bkg_ratio_ = makeRatio(sig_and_bkg,hstack.GetStack().Last())
                ROOT.SetOwnership(sig_and_bkg_ratio_,0)
                sig_and_bkg_ratio_.GetXaxis().SetTitle(htmp.GetXaxis().GetTitle())
                sig_and_bkg_ratio_.GetYaxis().SetTitle(htmp.GetYaxis().GetTitle())
                sig_and_bkg_ratio_.SetFillColorAlpha(0,0)
                sig_and_bkg_ratio_.SetLineColor(sig.GetLineColor())
                sig_and_bkg_ratio.append(sig_and_bkg_ratio_)
    else:
        ratioHist = makeRatio(hist1 = hdata, hist2 = hstack.GetStack().Last(), isdata = True)
        ROOT.SetOwnership(ratioHist,0)
        ratioHist.GetXaxis().SetTitle(htmp.GetXaxis().GetTitle())
        ratioHist.GetYaxis().SetTitle(htmp.GetYaxis().GetTitle())
        if settings.ratio_draw_signal:
            for sig in histos:
                sig_and_bkg = hstack.GetStack().Last().Clone('_temp_bkg_' + sig.GetName())
                sig_and_bkg.Add(sig)
                sig_and_bkg_ratio_ = makeRatio(sig_and_bkg,hstack.GetStack().Last())
                ROOT.SetOwnership(sig_and_bkg_ratio_,0)
                sig_and_bkg_ratio_.GetXaxis().SetTitle(htmp.GetXaxis().GetTitle())
                sig_and_bkg_ratio_.GetYaxis().SetTitle(htmp.GetYaxis().GetTitle())
                sig_and_bkg_ratio_.SetFillColorAlpha(0,0)
                sig_and_bkg_ratio_.SetLineColor(sig.GetLineColor())
                sig_and_bkg_ratio.append(sig_and_bkg_ratio_)
                
    for o in sig_and_bkg_ratio:
        o.Draw('same,hist')
    draw_cut_line(errorHist,variable)
    line = ROOT.TLine(ratioHist.GetXaxis().GetXmin(),1,ratioHist.GetXaxis().GetXmax(),1)
    line.SetLineColor(4)
    line.SetLineStyle(7)
    line.Draw()
    draw_categories(variables[varname].get('boudaries',[]),
                    miny=htmp.GetMinimum(),
                    maxy=htmp.GetMaximum())
    ROOT.SetOwnership(line,0)
    ratioHist.Draw('same')

    c.cd()
    
    if variables[variable]['norm']==True or allnormhist==True:
        histfilename = histfilename + '_norm'
    c.SaveAs( 'plots/' + histfilename + '.png')
    c.SaveAs( 'plots/' + histfilename + '.pdf')
示例#23
0
def flatten(analysis,sample,**kwargs):
    histParams = kwargs.pop('histParams',{})
    histSelections = kwargs.pop('histSelections',{})
    inputFileList = kwargs.pop('inputFileList','')
    outputFile = kwargs.pop('outputFile','')
    shift = kwargs.pop('shift','')
    countOnly = kwargs.pop('countOnly',False)
    njobs = kwargs.pop('njobs',1)
    job = kwargs.pop('job',0)
    multi = kwargs.pop('multi',False)
    useProof = kwargs.pop('useProof',False)
    intLumi = kwargs.pop('intLumi',float(getLumi()))
    if hasProgress and multi:
        pbar = kwargs.pop('progressbar',ProgressBar(widgets=['{0}: '.format(sample),' ',SimpleProgress(),' histograms ',Percentage(),' ',Bar(),' ',ETA()]))
    else:
        pbar = None

    if outputFile:
        flat = outputFile
        proj = outputFile.replace('.root','_projection.root')
        flattener = FlattenTree(analysis,sample,inputFileList=inputFileList,flat=flat,proj=proj,shift=shift,countOnly=countOnly,useProof=useProof,intLumi=intLumi)
    else:
        flattener = FlattenTree(analysis,sample,inputFileList=inputFileList,shift=shift,countOnly=countOnly,useProof=useProof,intLumi=intLumi)

    for histName, params in histParams.iteritems():
        flattener.addHistogram(histName,**params)

    for selName, sel in histSelections.iteritems():
        if sel: flattener.addSelection(selName,**sel['kwargs'])

    flattener.flattenAll(progressbar=pbar,njobs=njobs,job=job,multi=multi)
示例#24
0
    def _trainer(self,
                 dataset,
                 n_epoch=100,
                 z_std=5,
                 checkpoint_interval=50,
                 report_flag=True):
        """
        train the adversarial autoencoder
        Arguments:
            dataset: tensorflow dataset object
            n_epoch: int, number of epochs
            z_std: float, standard deviation of z_prior
            checkpoint_interval: int, interval in number of batches to store the network parameters
            report_flag: bool, a flag to print the log values
        """
        step = 0
        self.n_epoch = n_epoch
        self.sess.run(tf.global_variables_initializer())
        path = os.path.join(self.tensorboar_dir, self.run_id)
        if not os.path.exists(path):
            os.mkdir(path)
        writer = tf.summary.FileWriter(logdir=path, graph=self.graph)
        self._log_setup(path)

        with self.sess.as_default() as sess:
            for epoch_num in range(self.n_epoch):

                n_batches = int(dataset.train.num_examples / self.batch_size)
                widgets = [
                    'epoch {}|'.format(epoch_num),
                    Percentage(),
                    Bar(),
                    ETA(),
                    Timer()
                ]
                pbar = ProgressBar(maxval=n_batches, widgets=widgets)
                pbar.start()

                for batch_num in range(1, n_batches + 1):
                    pbar.update(batch_num)
                    # getting the batch data
                    z_prior = np.random.randn(self.batch_size,
                                              self.reduced_dim) * z_std
                    batch_x, batch_label = dataset.train.next_batch(
                        self.batch_size)
                    batch_x = batch_x.reshape((-1, 28, 28))

                    if self.aae_type == 'unsupervised':
                        sess.run(self.rec_opt,
                                 feed_dict={
                                     self.X: batch_x,
                                     self.Y: batch_x
                                 })
                    elif self.aae_type == 'supervised':
                        sess.run(self.rec_opt,
                                 feed_dict={
                                     self.X: batch_x,
                                     self.Y: batch_x,
                                     self.Label: batch_label
                                 })
                    sess.run(self.dis_opt,
                             feed_dict={
                                 self.X: batch_x,
                                 self.Y: batch_x,
                                 self.Z_prior: z_prior
                             })
                    sess.run(self.gen_opt,
                             feed_dict={
                                 self.X: batch_x,
                                 self.Y: batch_x
                             })

                    if batch_num % checkpoint_interval == 0:
                        if self.aae_type == 'unsupervised':
                            a_loss, d_loss, g_loss, summary = sess.run(
                                [
                                    self.rec_loss, self.dis_loss,
                                    self.gen_loss, self.summary_op
                                ],
                                feed_dict={
                                    self.X: batch_x,
                                    self.Y: batch_x,
                                    self.Z_prior: z_prior
                                })
                        elif self.aae_type == 'supervised':
                            a_loss, d_loss, g_loss, summary = sess.run(
                                [
                                    self.rec_loss, self.dis_loss,
                                    self.gen_loss, self.summary_op
                                ],
                                feed_dict={
                                    self.X: batch_x,
                                    self.Y: batch_x,
                                    self.Label: batch_label,
                                    self.Z_prior: z_prior
                                })
                        writer.add_summary(summary, global_step=step)
                        if report_flag:
                            print('Epoch: {}, iteration: {}'.format(
                                epoch_num, batch_num))
                            print('Autoencoder Loss: {}'.format(a_loss))
                            print('Discriminator Loss: {}'.format(d_loss))
                            print('Generator Loss: {}'.format(g_loss))
                        with open(path + '/log.txt', 'a') as log:
                            log.write('Epoch: {}, batch number: {}\n'.format(
                                epoch_num, batch_num))
                            log.write('Autoencoder Loss: {}\n'.format(a_loss))
                            log.write(
                                'Discriminator Loss: {}\n'.format(d_loss))
                            log.write('Generator Loss: {}\n'.format(g_loss))

                    step += 1
        writer.flush()
        writer.close()
示例#25
0
def runToys(w, options, cfg, seed):

    setStyle()
    if seed > -1:
        rt.RooRandom.randomGenerator().SetSeed(seed)

    extRazorPdf = w.pdf('extRazorPdf')
    dataHist = w.data("data_obs")
    if w.obj("fitresult_extRazorPdf_data_obs") != None:
        fr = w.obj("fitresult_extRazorPdf_data_obs")
    elif w.obj("nll_extRazorPdf_data_obs") != None:
        fr = w.obj("nll_extRazorPdf_data_obs")
    elif w.obj("fitresult_extRazorPdf_data_obs_with_constr") != None:
        fr = w.obj("fitresult_extRazorPdf_data_obs_with_constr")
    elif w.obj("nll_extRazorPdf_data_obs_with_constr") != None:
        fr = w.obj("nll_extRazorPdf_data_obs_with_constr")

    fr.Print("V")
    if options.r > -1:
        extSpBPdf = w.pdf('extSpBPdf')

    th1x = w.var("th1x")

    params = extRazorPdf.getParameters(dataHist)
    paramsToRemove = []
    for p in rootTools.RootIterator.RootIterator(params):
        if p.isConstant(): paramsToRemove.append(p)

    [params.remove(p) for p in paramsToRemove]
    paramNames = [
        p.GetName() for p in rootTools.RootIterator.RootIterator(params)
    ]
    paramNames.sort()
    if options.r > -1: paramNames.append('r')

    x = array('d', cfg.getBinning(options.box)[0])  # MR binning
    y = array('d', cfg.getBinning(options.box)[1])  # Rsq binning
    z = array('d', cfg.getBinning(options.box)[2])  # nBtag binning
    nBins = (len(x) - 1) * (len(y) - 1) * (len(z) - 1)

    th1x.setBins(nBins)

    fitband = convertSideband(options.fitRegion, w, x, y, z)
    sideband = convertSideband('LowMR,LowRsq', w, x, y, z)
    ixMin = 3
    iyMin = 3
    if options.box in ['MuMultiJet', 'EleMultiJet']:
        if x[2] == 500:
            ixMin = 3
        else:
            ixMin = 2
        iyMin = 3

    unc = 'Bayes'
    if options.varyN and options.noStat: unc = "Bayes_varyN_noStat"
    elif options.varyN: unc = "Bayes_varyN"
    elif options.noStat: unc = "Bayes_noStat"
    elif options.noSys: unc = "Bayes_noSys"
    elif options.oneSigma: unc = 'oneSigma'

    if options.freq and options.noStat: unc = 'Freq_noStat'
    elif options.freq: unc = 'Freq_varyN'

    if options.r > -1:
        rString = str('%.3f' % options.r).replace(".", "p")
        if seed > -1:
            output = rt.TFile.Open(
                options.outDir + '/toys_%s_r%s_s%i_%s.root' %
                (unc, rString, seedoptions.box), 'recreate')
        else:
            output = rt.TFile.Open(
                options.outDir + '/toys_%s_r%s_%s.root' %
                (unc, rString, options.box), 'recreate')
    else:
        if seed > -1:
            output = rt.TFile.Open(
                options.outDir + '/toys_%s_s%i_%s.root' %
                (unc, seed, options.box), 'recreate')
        else:
            output = rt.TFile.Open(
                options.outDir + '/toys_%s_%s.root' % (unc, options.box),
                'recreate')

    output.cd()
    myTree = rt.TTree("myTree", "myTree")

    s1 = getTree(myTree, paramNames, nBins, options.box, z)
    value = setattr(s1, 'toy_num', -1)  # set toy number to -1

    for p in rootTools.RootIterator.RootIterator(fr.floatParsFinal()):
        w.var(p.GetName()).setVal(p.getVal())
        w.var(p.GetName()).setError(p.getError())
        value = setattr(s1, p.GetName(), p.getVal())
        value = setattr(s1, p.GetName() + '_error', p.getError())
    if options.r > -1:
        value = setattr(s1, 'r', 0)
        value = setattr(s1, 'r_error', 0)
        value = setattr(s1, 'r_errorlo', 0)
        value = setattr(s1, 'r_errorhi', 0)

    asimov = extRazorPdf.generateBinned(rt.RooArgSet(th1x),
                                        rt.RooFit.Name('central'),
                                        rt.RooFit.Asimov())

    chi2_data = 0
    n2llr_data = 0
    nll_data = 0
    chi2_data_btag = [0 for k in range(1, len(z))]
    n2llr_data_btag = [0 for k in range(1, len(z))]
    nll_data_btag = [0 for k in range(1, len(z))]
    bestFitByBin = []
    iBinX = -1
    for i in range(1, len(x)):
        for j in range(1, len(y)):
            for k in range(1, len(z)):
                iBinX += 1
                th1x.setVal(iBinX + 0.5)
                #expected = extRazorPdf.getValV(rt.RooArgSet(th1x)) * extRazorPdf.expectedEvents(rt.RooArgSet(th1x))
                expected = float(asimov.weight(rt.RooArgSet(th1x)))
                bestFitByBin.append(expected)
                observed = float(dataHist.weight(rt.RooArgSet(th1x)))
                value = setattr(s1, 'b%i' % iBinX, expected)
                if expected > 0:
                    chi2_data += (observed - expected) * (
                        observed - expected) / (expected)
                    chi2_data_btag[k - 1] += (observed - expected) * (
                        observed - expected) / (expected)
                    #if k==1:
                    #    print "bin", i, j, k, "observed:          ", observed
                    #    print "bin", i, j, k, "expected:          ", expected
                    #    print "bin", i, j, k, "chi2 contribution: ", ( observed - expected ) * ( observed - expected ) / ( expected )
                    #    print "bin", k, "chi2 sum so far:       ", chi2_data_btag[k-1]
                if expected > 0:
                    nll_data -= observed * rt.TMath.Log(expected) - expected
                    nll_data_btag[
                        k - 1] -= observed * rt.TMath.Log(expected) - expected
                if observed > 0 and expected > 0:
                    n2llr_data += 2 * (
                        observed * rt.TMath.Log(observed / expected) -
                        observed)
                    n2llr_data_btag[k - 1] += 2 * (
                        observed * rt.TMath.Log(observed / expected) -
                        observed)
                if expected > 0:
                    n2llr_data += 2 * (expected)
                    n2llr_data_btag[k - 1] += 2 * (expected)
                    #if k==1:
                    #    print "bin", i, j, k, "n2llr contribution:", 2 * ( observed*rt.TMath.Log(observed/expected) - observed ) + 2 * ( expected )
                    #    print "bin", k, "n2llr sum so far:      ", n2llr_data_btag[k-1]

    value = setattr(s1, 'nll_%s' % options.box, nll_data)
    value = setattr(s1, 'n2llr_%s' % options.box, n2llr_data)
    value = setattr(s1, 'chi2_%s' % options.box, chi2_data)

    for k in range(1, len(z)):
        ibtag = z[k - 1]
        value = setattr(s1, 'nll_%ibtag_%s' % (ibtag, options.box),
                        nll_data_btag[k - 1])
        value = setattr(s1, 'n2llr_%ibtag_%s' % (ibtag, options.box),
                        n2llr_data_btag[k - 1])
        value = setattr(s1, 'chi2_%ibtag_%s' % (ibtag, options.box),
                        chi2_data_btag[k - 1])

    myTree.Fill()

    iToy = 0

    nBadPars = 0
    pBest = fr.floatParsFinal()
    pBestVal = {}
    pBestErr = {}
    #mu = rt.RooArgList()
    #hesseParams = rt.RooArgList()
    #iArray = 0
    #nIndexArray = []
    #xFactor = {}
    for p in rootTools.RootIterator.RootIterator(pBest):
        pBestVal[p.GetName()] = p.getVal()
        pBestErr[p.GetName()] = p.getError()
        #p.setConstant(True)
        #mu.add(p)
        #hesseParams.add(w.var(p.GetName()))
        #if 'n_TTj' in p.GetName() or 'b_TTj' in p.GetName():
        #if 'MultiJet' in p.GetName():
        #    nIndexArray.append(iArray)
        #    if 'TTj0b' in p.GetName():
        #        xFactor[iArray] = 1.7
        #    elif 'TTj1b' in p.GetName():
        #        xFactor[iArray] = 1.2
        #    elif 'TTj2b' in p.GetName():
        #        xFactor[iArray] = 1.15
        #    elif 'TTj3b' in p.GetName():
        #        xFactor[iArray] = 1.15
        #iArray+=1
    #maxArray = iArray

    #covMatrix = fr.covarianceMatrix()
    #corrMatrix = fr.correlationMatrix()
    #covMatrixClone = covMatrix.Clone(covMatrix.GetName()+"_varyN")

    # double the uncertainty for each n parameter
    #print nIndexArray
    #print xFactor
    #for nIndex in nIndexArray:
    #    for otherIndex in nIndexArray:
    #        covMatrixClone[nIndex][otherIndex] = xFactor[nIndex]*xFactor[otherIndex]*covMatrix[nIndex][otherIndex]
    #        covMatrixClone[otherIndex][nIndex] = xFactor[otherIndex]*xFactor[nIndex]*covMatrix[otherIndex][nIndex]

    #hesseParams.Print('v')
    #mu.Print('v')
    #covMatrix.Print('v')
    #covMatrixClone.Print("V")
    #hessePdf = rt.RooMultiVarGaussian('hessePdf','hessePdf',hesseParams,mu,covMatrixClone)
    #hessePdf.Print('v')
    #hesseDs = hessePdf.generate(params,int(100*options.nToys))
    #corrMatrix.Print('v')
    #c = rt.TCanvas('c','c',500,400)
    #varName = 'n_TTj1b_MultiJet'
    #varName2 = 'b_TTj1b_MultiJet'
    #frame = w.var(varName).frame(rt.RooFit.Range(0,10))
    #frame.Print('v')
    #hesseDs.plotOn(frame)
    #hessePdf.plotOn(frame)
    #frame.Draw()
    #c.Print(varName+'.2pdf')
    #w.var(varName).setMin(0)
    #w.var(varName2).setMin(0)
    #w.var(varName).setMax(10)
    #w.var(varName2).setMax(2.5)
    #hist2d = hesseDs.createHistogram(w.var(varName),w.var(varName2),100,100)
    #hist2d.GetXaxis().SetTitle(varName)
    #hist2d.GetYaxis().SetTitle(varName2)
    #hist2d.Draw("colz")
    #c.Print(varName+varName2+'.2pdf')
    #sys.exit()

    if options.box in [
            'MultiJet', 'DiJet', 'MultiJet_0b', 'MultiJet_1b', 'MultiJet_2b',
            'DiJet_0b', 'DiJet_1b', 'DiJet_2b'
    ]:
        xFactor = [1.8, 1.4, 1.4, 1.4]  #xFactor for each b-tag bin
    elif options.box in [
            'MuMultiJet', 'LeptonMultiJet', 'LeptonJet', 'LeptonJet_0b',
            'LeptonJet_1b', 'LeptonJet_2b', 'LeptonMultiJet_0b',
            'LeptonMultiJet_1b', 'LeptonMultiJet_2b'
    ]:
        xFactor = [2.0, 2.0, 2.0, 2.0]  #xFactor for each b-tag bin
    elif options.box == 'EleMultiJet':
        xFactor = [2.0, 1.8, 1.2, 1.2]  #xFactor for each b-tag bin
    widgets = [
        'Running %s toys ' % unc,
        Percentage(), ' ',
        Bar(marker=RotatingMarker()), ' ',
        ETA(), ' ',
        FileTransferSpeed()
    ]
    pbar = ProgressBar(widgets=widgets, maxval=options.nToys).start()
    iAttempt = -1
    while iToy < options.nToys:
        iAttempt += 1
        if options.freq:
            pSet = fr.floatParsFinal()
        elif options.oneSigma:
            pSet = fr.randomizePars()
        else:
            if options.noSys:
                pSet = fr.floatParsFinal()
            elif options.varyN:
                #pSet = hesseDs.get(iAttempt)
                pSet = fr.randomizePars()
            else:
                pSet = fr.randomizePars()
        for p in rootTools.RootIterator.RootIterator(pSet):
            w.var(p.GetName()).setVal(p.getVal())
            w.var(p.GetName()).setError(p.getError())

        badPars = []
        for bkgd in ['TTj0b', 'TTj1b', 'TTj2b', 'TTj3b']:
            if w.var('n_%s_%s' % (bkgd, options.box)) != None:
                badPars.append(
                    w.var('n_%s_%s' % (bkgd, options.box)).getVal() <= 0)
            if w.var('b_%s_%s' % (bkgd, options.box)) != None:
                badPars.append(
                    w.var('b_%s_%s' % (bkgd, options.box)).getVal() <= 0)
            if w.var('MR0_%s_%s' % (bkgd, options.box)) != None:
                badPars.append(
                    w.var('MR0_%s_%s' % (bkgd, options.box)).getVal() <= 0)
                #badPars.append(w.var('MR0_%s_%s'%(bkgd,options.box)).getVal() >= w.var('MR').getMin())
            if w.var('R0_%s_%s' % (bkgd, options.box)) != None:
                badPars.append(
                    w.var('R0_%s_%s' % (bkgd, options.box)).getVal() <= 0)
                #badPars.append(w.var('R0_%s_%s'%(bkgd,options.box)).getVal() >= w.var('Rsq').getMin())
        if any(badPars):
            nBadPars += 1
            #print "bad pars toy=%i"%iToy
            #print badPars
            continue

        #print "good pars"
        errorCountBefore = rt.RooMsgService.instance().errorCount()

        badVal = False
        for iBinX in range(0, nBins):
            th1x.setVal(iBinX + 0.5)  # check number of events in each bin
            pdfValV = extRazorPdf.getValV(
                rt.RooArgSet(th1x)) * extRazorPdf.expectedEvents(
                    rt.RooArgSet(th1x))
            pdfVal0 = extRazorPdf.getValV(0) * extRazorPdf.expectedEvents(
                rt.RooArgSet(th1x))
            if bestFitByBin[
                    iBinX] > 0 and pdfValV / bestFitByBin[iBinX] <= 1e-12:
                #if bestFitByBin[iBinX] > 0 and pdfValV <= 0:
                #print "bin = %i"%iBinX
                #print "best fit = %e"%(bestFitByBin[iBinX])
                #print "pdf valv = %e"%(pdfValV)
                #print "pdf val0 = %e"%(pdfVal0)
                badVal = True
        if badVal:
            #print "bad val"
            continue

        errorCountAfter = rt.RooMsgService.instance().errorCount()
        if errorCountAfter > errorCountBefore:
            print "can't evaulate pdf toy=%i" % iToy
            continue

        errorCountBefore = rt.RooMsgService.instance().errorCount()
        #print "start generating toy=%i"%iToy
        if options.freq:
            if options.r > -1:
                w.var('r').setVal(options.r)
                asimov = extSpBPdf.generateBinned(rt.RooArgSet(th1x),
                                                  rt.RooFit.Name('toy'),
                                                  rt.RooFit.Extended(True))
            else:
                asimov = extRazorPdf.generateBinned(rt.RooArgSet(th1x),
                                                    rt.RooFit.Name('toy'),
                                                    rt.RooFit.Extended(True))
        elif options.noStat:
            if options.r > -1:
                w.var('r').setVal(options.r)
                asimov = extSpBPdf.generateBinned(rt.RooArgSet(th1x),
                                                  rt.RooFit.Name('toy'),
                                                  rt.RooFit.Asimov())
            else:
                asimov = extRazorPdf.generateBinned(rt.RooArgSet(th1x),
                                                    rt.RooFit.Name('toy'),
                                                    rt.RooFit.Asimov())
        else:
            if options.r > -1:
                w.var('r').setVal(options.r)
                asimov = extSpBPdf.generateBinned(rt.RooArgSet(th1x),
                                                  rt.RooFit.Name('toy'),
                                                  rt.RooFit.Extended(True))
            else:
                asimov = extRazorPdf.generateBinned(rt.RooArgSet(th1x),
                                                    rt.RooFit.Name('toy'),
                                                    rt.RooFit.Extended(True))

        #print "toy entries = %.2f"%asimov.sumEntries()
        errorCountAfter = rt.RooMsgService.instance().errorCount()
        if errorCountAfter > errorCountBefore:
            print "can't generate toy=%i" % iToy
            continue

        #print "SUCCESS: generated toy=%i"%iToy

        pSetSave = pSet
        migrad_status = -1
        hesse_status = -1
        minos_status = -1
        if options.freq:
            if options.r > -1:
                nll_func_toy = extSpBPdf.createNLL(asimov,
                                                   rt.RooFit.Extended(True))
                m = rt.RooMinimizer(nll_func_toy)
                m.setStrategy(0)
                m.setPrintLevel(-1)
                m.setPrintEvalErrors(-1)
                rSet = rt.RooArgSet(w.var('r'))
                migrad_status = m.minimize('Minuit2', 'migrad')
                #hesse_status = m.minimize('Minuit2','hesse')
                #minos_status = m.minos(rSet)
                fr_toy = m.save()
                value = setattr(s1, 'migrad_%s' % options.box, migrad_status)
                value = setattr(s1, 'hesse_%s' % options.box, hesse_status)
                value = setattr(s1, 'minos_%s' % options.box, minos_status)
            else:
                #print "yes"
                nll_func_toy = extRazorPdf.createNLL(asimov,
                                                     rt.RooFit.Extended(True),
                                                     rt.RooFit.Range(fitband))
                m = rt.RooMinimizer(nll_func_toy)
                m.setStrategy(0)
                m.setPrintLevel(-1)
                m.setPrintEvalErrors(-1)
                migrad_status = m.minimize('Minuit2', 'migrad')
                #migrad_status = m.minimize('Minuit2','migrad')
                #migrad_status = m.minimize('Minuit2','migrad')
                fr_toy = m.save()
            value = setattr(s1, 'covQual_%s' % options.box, fr_toy.covQual())
            value = setattr(s1, 'migrad_%s' % options.box, migrad_status)
            value = setattr(s1, 'hesse_%s' % options.box, hesse_status)
            value = setattr(s1, 'minos_%s' % options.box, minos_status)
            pSetSave = fr_toy.floatParsFinal()

        for p in rootTools.RootIterator.RootIterator(pSetSave):
            value = setattr(s1, p.GetName(), p.getVal())
            value = setattr(s1, p.GetName() + "_error", p.getError())
            if p.GetName() == 'r':
                value = setattr(s1, "r_errorlo", p.getAsymErrorLo())
                value = setattr(s1, "r_errorhi", p.getAsymErrorHi())

        chi2_toy = 0
        n2llr_toy = 0
        nll_toy = 0
        chi2_toy_btag = [0 for k in range(1, len(z))]
        n2llr_toy_btag = [0 for k in range(1, len(z))]
        nll_toy_btag = [0 for k in range(1, len(z))]
        # restore best-fit to calculate expected values
        for p in rootTools.RootIterator.RootIterator(pSetSave):
            w.var(p.GetName()).setVal(p.getVal())
            w.var(p.GetName()).setError(p.getError())

        #if options.varyN:
        #    xGaus = rt.RooRandom.randomGenerator().Gaus(0,1)

        iBinX = -1
        for i in range(1, len(x)):
            for j in range(1, len(y)):
                for k in range(1, len(z)):
                    iBinX += 1
                    th1x.setVal(iBinX + 0.5)
                    inSideband = False
                    #print "x, y = ", x[i-1], y[j-1]
                    #print "xMin, yMin = ", x[ixMin-1], y[iyMin-1]
                    if x[i - 1] < x[ixMin - 1]:
                        inSideband = True
                    if y[j - 1] < y[iyMin - 1]:
                        inSideband = True
                    #print "inSideband = %s"%inSideband
                    expected = extRazorPdf.getValV(
                        rt.RooArgSet(th1x)) * extRazorPdf.expectedEvents(
                            rt.RooArgSet(th1x))
                    if options.noStat and options.varyN:
                        if inSideband:
                            toy = float(asimov.weight(rt.RooArgSet(th1x)))
                        else:
                            xGaus = rt.RooRandom.randomGenerator().Gaus(0, 1)
                            toy = float(asimov.weight(
                                rt.RooArgSet(th1x))) * rt.TMath.Power(
                                    xFactor[k - 1], xGaus)
                    elif options.varyN:
                        if inSideband:
                            toy = float(asimov.weight(rt.RooArgSet(th1x)))
                        else:
                            xGaus = rt.RooRandom.randomGenerator().Gaus(0, 1)
                            central = float(
                                expected *
                                rt.TMath.Power(xFactor[k - 1], xGaus))
                            toy = rt.RooRandom.randomGenerator().Poisson(
                                central)
                    elif options.noStat:
                        toy = float(asimov.weight(rt.RooArgSet(th1x)))
                    else:
                        toy = float(asimov.weight(rt.RooArgSet(th1x)))
                    observed = float(dataHist.weight(rt.RooArgSet(th1x)))
                    if options.oneSigma:
                        toy = observed  # to get nll with respect to original dataset
                        value = setattr(s1, 'b%i' % iBinX,
                                        expected)  #save expected yield
                    elif options.freq and options.noStat:
                        #print "expected = ", expected, toy, observed
                        value = setattr(s1, 'b%i' % iBinX, expected)
                    elif options.freq:
                        withStat = rt.RooRandom.randomGenerator().Poisson(
                            expected)
                        #print "with stats = ", withStat
                        value = setattr(s1, 'b%i' % iBinX, withStat)
                    else:
                        value = setattr(s1, 'b%i' % iBinX, toy)

                        #print "observed, expected, toy = ", observed, expected, toy

                    if expected > 0:
                        chi2_toy += (toy - expected) * (toy -
                                                        expected) / (expected)
                        chi2_toy_btag[k - 1] += (toy - expected) * (
                            toy - expected) / (expected)
                    if toy > 0 and expected > 0:
                        n2llr_toy += 2 * (toy * rt.TMath.Log(toy / expected) -
                                          toy)
                        n2llr_toy_btag[k - 1] += 2 * (
                            toy * rt.TMath.Log(toy / expected) - toy)
                    if expected > 0:
                        n2llr_toy += 2 * (expected)
                        n2llr_toy_btag[k - 1] += 2 * (expected)
                    if expected > 0:
                        nll_toy -= toy * rt.TMath.Log(expected) - expected
                        nll_toy_btag[
                            k - 1] -= toy * rt.TMath.Log(expected) - expected

        # to check  nll, chi2 calculation
        #nll_func_toy = extRazorPdf.createNLL(asimov,rt.RooFit.Extended(True))
        #chi2_func_toy = extRazorPdf.createChi2(asimov,rt.RooFit.Extended(True),rt.RooFit.DataError(rt.RooAbsData.Expected))

        #print ''
        #print "chi2 func:   ", chi2_func_toy.getVal()
        #print "chi2 by hand  ", chi2_toy
        #print "nll func:    ", nll_func_toy.getVal()
        #print "nll by hand: ", nll_toy
        if options.oneSigma:
            nsigma = 1.0
            nparam = fr.floatParsFinal().getSize()
            if 2 * (nll_toy - nll_data) > rt.Math.chisquared_quantile(
                    rt.Math.erf(nsigma / rt.TMath.Sqrt(2.)), nparam):
                continue

        value = setattr(s1, 'nll_%s' % options.box, nll_toy)
        value = setattr(s1, 'n2llr_%s' % options.box, n2llr_toy)
        value = setattr(s1, 'chi2_%s' % options.box, chi2_toy)

        for k in range(1, len(z)):
            ibtag = z[k - 1]
            value = setattr(s1, 'nll_%ibtag_%s' % (ibtag, options.box),
                            nll_toy_btag[k - 1])
            value = setattr(s1, 'n2llr_%ibtag_%s' % (ibtag, options.box),
                            n2llr_toy_btag[k - 1])
            value = setattr(s1, 'chi2_%ibtag_%s' % (ibtag, options.box),
                            chi2_toy_btag[k - 1])

        value = setattr(s1, 'toy_num', iToy)  # save toy number
        pbar.update(iToy)
        myTree.Fill()
        iToy += 1
    rt.RooMsgService.instance().reset()
    pbar.finish()

    w.Print('v')
    output.cd()
    myTree.Write()
    w.Write()
    output.Close()
    return output.GetName()
示例#26
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-model', type=str, required=True)
    parser.add_argument('-results', type=str, required=True)
    args = parser.parse_args()

    model = load_model(args.model)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    questions_val = open('./data/questions_val2014.txt',
                         'r').read().decode('utf8').splitlines()
    answers_val = open('./data/answers_val2014_all.txt',
                       'r').read().decode('utf8').splitlines()
    images_val = open('./data/images_val2014_all.txt',
                      'r').read().decode('utf8').splitlines()

    print 'Model compiled, weights loaded...'
    labelencoder = joblib.load('./labelencoder.pkl')

    image_ids = open("./id_map.txt").read().splitlines()
    id_map = {}
    for ids in image_ids:
        id_split = ids.split()
        id_map[int(id_split[0])] = int(id_split[1]) - 1

    sherlock_features = np.load('./sherlock_features_temp.npy')

    nlp = English()
    print 'loaded word2vec features'

    nb_classes = 1000
    y_predict_text = []
    batchSize = 128
    widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#', left='[', right=']'),
               ' ', ETA()]
    pbar = ProgressBar(widgets=widgets)

    for qu_batch, an_batch, im_batch in pbar(zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]),
                                                 grouper(answers_val, batchSize, fillvalue=answers_val[0]),
                                                 grouper(images_val, batchSize, fillvalue=images_val[0]))):
        X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
        X_i_batch = get_images_matrix(im_batch, id_map, sherlock_features)
        X_batch = np.hstack((X_q_batch, X_i_batch))
        y_predict = model.predict_classes(X_batch, verbose=0)
        y_predict_text.extend(labelencoder.inverse_transform(y_predict))

    correct_val = 0.0
    total = 0
    f1 = open(args.results, 'w')

    for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val):
        temp_count = 0
        for _truth in truth.split(';'):
            if prediction == _truth:
                temp_count += 1

        if temp_count > 2:
            correct_val += 1
        else:
            correct_val += float(temp_count) / 3

        total += 1
        f1.write(question.encode('utf-8'))
        f1.write('\n')
        f1.write(image.encode('utf-8'))
        f1.write('\n')
        f1.write(prediction)
        f1.write('\n')
        f1.write(truth.encode('utf-8'))
        f1.write('\n')
        f1.write('\n')

    f1.write('Final Accuracy is ' + str(correct_val / total))
    f1.close()
    f1 = open('../results/overall_results.txt', 'a')
    f1.write(args.weights + '\n')
    f1.write(str(correct_val / total) + '\n')
    f1.close()
    print 'Final Accuracy on the validation set is', correct_val / total
示例#27
0
    def run(self):

        self.initialize()
        self.iters = 0

        for epoch in range(self.args.epoch_size):
            data_style_A, data_style_B = shuffle_data(self.data_style_A,
                                                      self.data_style_B)

            widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()]
            pbar = ProgressBar(maxval=self.n_batches, widgets=widgets)
            pbar.start()

            for i in range(self.n_batches):

                pbar.update(i)

                self.generator_A.zero_grad()
                self.generator_B.zero_grad()
                self.discriminator_A.zero_grad()
                self.discriminator_B.zero_grad()

                self.A_path = data_style_A[i * self.args.batch_size:(i + 1) *
                                           self.args.batch_size]
                self.B_path = data_style_B[i * self.args.batch_size:(i + 1) *
                                           self.args.batch_size]

                A, B = self.get_images(self.A_path, self.B_path)
                A = Variable(torch.FloatTensor(A))
                B = Variable(torch.FloatTensor(B))

                if self.cuda:
                    A = A.cuda()
                    B = B.cuda()

                AB = self.generator_B(A)
                BA = self.generator_A(B)

                ABA = self.generator_A(AB)
                BAB = self.generator_B(BA)

                # Reconstruction Loss
                self.recon_loss_A = self.recon_criterion(ABA, A)
                self.recon_loss_B = self.recon_criterion(BAB, B)

                # Real/Fake GAN Loss (A)
                A_dis_real, A_feats_real = self.discriminator_A(A)
                A_dis_fake, A_feats_fake = self.discriminator_A(BA)
                self.fm_loss_A = self.get_fm_loss(A_feats_real, A_feats_fake)
                self.dis_loss_A, self.gen_loss_A = self.get_gan_loss(
                    A_dis_real, A_dis_fake)

                # Real/Fake GAN Loss (B)
                B_dis_real, B_feats_real = self.discriminator_B(B)
                B_dis_fake, B_feats_fake = self.discriminator_B(AB)
                self.fm_loss_B = self.get_fm_loss(B_feats_real, B_feats_fake)
                self.dis_loss_B, self.gen_loss_B = self.get_gan_loss(
                    B_dis_real, B_dis_fake)

                # Total Loss
                if self.iters < self.args.gan_curriculum:
                    rate = self.args.starting_rate
                else:
                    rate = self.args.default_rate

                self.gan_loss_A = (self.gen_loss_B * 0.1 +
                                   self.fm_loss_B * 0.9)
                self.gan_loss_B = (self.gen_loss_A * 0.1 +
                                   self.fm_loss_A * 0.9)
                self.gen_loss_A_total = self.gan_loss_B * (
                    1. - rate) + self.recon_loss_A * rate
                self.gen_loss_B_total = self.gan_loss_A * (
                    1. - rate) + self.recon_loss_B * rate

                if self.args.model_arch == 'discogan':
                    self.gen_loss = self.gen_loss_A_total + self.gen_loss_B_total
                    self.dis_loss = self.dis_loss_A + self.dis_loss_B
                elif self.args.model_arch == 'recongan':
                    self.gen_loss = self.gen_loss_A_total
                    self.dis_loss = self.dis_loss_B
                elif self.args.model_arch == 'gan':
                    self.gen_loss = (self.gen_loss_B * 0.1 + self.fm_loss_B *
                                     0.9) + (self.gen_loss_A * 0.1 +
                                             self.fm_loss_A * 0.9)
                    self.dis_loss = self.dis_loss_B + self.dis_loss_A

                self.finish_iteration()
                self.iters += 1
示例#28
0
def download_data(file,
                  group,
                  station_number,
                  start=None,
                  end=None,
                  type='events',
                  progress=True):
    """Download event summary data

    :param file: the PyTables datafile handler.
    :param group: the PyTables destination group, which need not exist.
    :param station_number: The HiSPARC station number for which to get data.
    :param start: a datetime instance defining the start of the search interval.
    :param end: a datetime instance defining the end of the search interval.
    :param type: the datatype to download, either 'events', 'weather', or 'singles'.
    :param progress: if True show a progressbar while downloading.

    If group is None, use '/s<station_number>' as a default.

    The start and stop parameters may both be None.  In that case,
    yesterday's data is downloaded.  If only end is None, a single day's
    worth of data is downloaded, starting at the datetime specified with
    start.

    Example::

        >>> import tables
        >>> import datetime
        >>> import sapphire.esd
        >>> data = tables.open_file('data.h5', 'w')
        >>> sapphire.esd.download_data(data, '/s501', 501,
        ...     datetime.datetime(2013, 9, 1), datetime.datetime(2013, 9, 2))

    """
    # sensible default for group name
    if group is None:
        group = '/s%d' % station_number

    # sensible defaults for start and end
    if start is None:
        if end is not None:
            raise RuntimeError(
                "Start is None, but end is not. I can't go on like this.")
        else:
            yesterday = datetime.date.today() - datetime.timedelta(days=1)
            start = datetime.datetime.combine(yesterday, datetime.time(0, 0))
    if end is None:
        end = start + datetime.timedelta(days=1)

    # build and open url, create tables and set read function
    query = urlencode({'start': start, 'end': end})
    if type == 'events':
        url = get_events_url().format(station_number=station_number,
                                      query=query)
        table = _get_or_create_events_table(file, group)
        read_and_store = _read_line_and_store_event_class
    elif type == 'weather':
        url = get_weather_url().format(station_number=station_number,
                                       query=query)
        table = _get_or_create_weather_table(file, group)
        read_and_store = _read_line_and_store_weather_class
    elif type == 'singles':
        url = get_singles_url().format(station_number=station_number,
                                       query=query)
        table = _get_or_create_singles_table(file, group)
        read_and_store = _read_line_and_store_singles_class
    elif type == 'lightning':
        url = get_lightning_url().format(lightning_type=station_number,
                                         query=query)
        table = _get_or_create_lightning_table(file, group)
        read_and_store = _read_line_and_store_lightning_class
    else:
        raise ValueError("Data type not recognized.")

    try:
        data = urlopen(url)
    except BadStatusLine:
        # Unexplained transient error, retry once
        data = urlopen(url)

    # keep track of event timestamp within [start, end] interval for
    # progressbar
    t_start = calendar.timegm(start.utctimetuple())
    t_end = calendar.timegm(end.utctimetuple())
    t_delta = t_end - t_start
    if progress:
        pbar = ProgressBar(max_value=1., widgets=[Percentage(),
                                                  Bar(),
                                                  ETA()]).start()

    # loop over lines in tsv as they come streaming in
    prev_update = time.time()
    reader = csv.reader(iterdecode(data, 'utf-8'), delimiter='\t')
    with read_and_store(table) as writer:
        for line in reader:
            timestamp = writer.store_line(line)
            # update progressbar every 0.5 seconds
            if progress and time.time(
            ) - prev_update > 0.5 and not timestamp == 0.:
                pbar.update((1. * timestamp - t_start) / t_delta)
                prev_update = time.time()
    if progress:
        pbar.finish()

    if line[0][0] == '#':
        if len(line[0]) == 1:
            # No events recieved, and no success line
            raise Exception('Failed to download data, no data recieved.')
        else:
            # Successful download because last line is a non-empty comment
            return
    else:
        # Last line is data, report failed download and date/time of last line
        raise Exception(
            'Failed to complete download, last received data from: %s %s.' %
            tuple(line[:2]))
    # Restorer adverse Discriminator
    g_loss = ops.binary_cross_entropy_with_logits(tf.ones_like(D_), D_)
    g_optim = tf.train.AdamOptimizer(FLAGS.r_learning_rate, epsilon=1.0)
    g_train = pt.apply_optimizer(g_optim, losses=[g_loss])

    # General stuff
    init = tf.initialize_all_variables()
    saver = tf.train.Saver()

    # run as session
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(FLAGS.max_epoch):
            training_loss = 0.0
            widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]
            pbar = ProgressBar(FLAGS.updates_per_epoch, widgets=widgets)
            pbar.start()
            for i in range(FLAGS.updates_per_epoch):
                pbar.update(i)
                mask, x_masked, x_ground_truth = celebACropped.train.next_batch(
                    FLAGS.batch_size)
                # print (mask.shape)
                # print ('reconstruct')
                # Restorer reconstruct
                _, loss_value = sess.run(fetches=[r_train, r_loss],
                                         feed_dict={
                                             input_tensor: x_masked,
                                             mask_tensor: mask,
                                             ground_truth_tensor:
                                             x_ground_truth
示例#30
0
    for act in muscle_acts:
        act_error += act**2

    deviation_error = math.sqrt(
        sum((px - qx)**2.0 for px, qx in zip(x0, muscle_acts)))

    # return (1000*torque_error) + act_error + (10*deviation_error)
    return (1000 * torque_error) + act_error


# widgets for the progress bar
widgets = [
    'PROGRESS: ',
    Percentage(), ' ',
    Bar(marker='-', left='[', right=']\n'), ' ',
    ETA(), ' \n '
]

# create a progress bar object
pbar = ProgressBar(maxval=len(current_experiment.t), widgets=widgets).start()

# bounds on muscle activations
lb = []
ub = []
for muscle in current_model.musculature.muscles:
    lb.append(muscle.min_act)
    ub.append(muscle.max_act)

# Bounds function in scipy
bounds = Bounds(lb, ub)