Пример #1
0
def preprocess_all():
    bucket = util.get_bucket(config.S3_BUCKET_BATCH_RAW)
    for csv_obj in bucket.objects.all():
        preprocess_file(config.S3_BUCKET_BATCH_RAW, csv_obj.key)
        print(
            colored(
                "Finished preprocessing file s3a://{0}/{1}".format(
                    config.S3_BUCKET_BATCH_RAW, csv_obj.key), "green"))
Пример #2
0
    def run(self):
        producer = kafka.KafkaProducer(bootstrap_servers=config.KAFKA_SERVERS)

        bucket = util.get_bucket(config.S3_BUCKET_STREAM)

        for json_obj in bucket.objects.all():
            json_file = "s3://{0}/{1}".format(config.S3_BUCKET_STREAM, json_obj.key)
            for line in smart_open.smart_open(json_file):
                    if config.LOG_DEBUG: print(line)
                    time.sleep(config.KAFKA_PRODUCER_RATE)
                    producer.send(config.KAFKA_TOPIC, line)
Пример #3
0
def smart_size(url):
    result = urllib.parse.urlparse(url)
    if result.scheme in ['', 'file']:
        return os.path.getsize(url)
    elif result.scheme == 's3':
        try:
            response = get_s3client_for_bucket(url).head_object(Bucket=get_bucket(result), Key=get_key(result))
            size = response['ContentLength']
            return size > 0
        except Exception as e:
            return False
Пример #4
0
def smart_etag(url) -> str:
    result = urllib.parse.urlparse(url)
    if result.scheme in ['', 'file']:
        return os.path.getmtime(url)
    elif result.scheme == 's3':
        try:
            response = get_s3client_for_bucket(url).head_object(Bucket=get_bucket(result), Key=get_key(result))
            size = response['ETag'][1:-1]
            return size > 0
        except Exception as e:
            return False
Пример #5
0
def smart_move(source, destination):
    result = urllib.parse.urlparse(destination)

    if result.scheme in ['', 'file']:
        with smart_open.smart_open(source, 'rb') as src:
            with smart_open.smart_open(destination, 'wb') as dst:
                shutil.copyfileobj(src, dst)
    elif result.scheme == 's3':
        get_s3client_for_bucket(destination).upload_file(source,
                                                         get_bucket(result),
                                                         get_key(result),
                                                         Callback=ProgressPercentage(smart_size(source), source))
        os.remove(source)
Пример #6
0
def upload_files(ctx, record_id, fname):
    """Upload files to existing record
    """

    token = ctx.obj['token']
    zen_log = ctx.obj['log']
    # get either sandbox or api token to connect

    # get bucket_url for record
    bucket_url = get_bucket(ctx.obj['url'], token, record_id)

    #read file paths from file
    with open(fname) as f:
        file_paths = f.readlines()

    # upload all files to record, one by one
    for f in file_paths:
        zen_log.info(f"Uploading {f} ...")
        f = f.replace('\n', '')
        status = upload_file(bucket_url, token, record_id, f)
        zen_log.info(f"Request status: {status}")
Пример #7
0
def dist_generator(particles, energy, machine, fort13, jobs, factor, emittance_x, emittance_y, alpha_x, alpha_y, beta_x, beta_y, offset_x, offset_xp, offset_y, offset_yp, dispersion_x, dispersion_y, bunch, spread, seed):

    job_str = '%s'%jobs
    # Getting the Transverse sigmas (amplitudes of phase space ellipse)
    # --------------------------------------------------------------------------------------------------------------
    gamma_rel, beta_rel, p0, mass = get_rel_params(energy)
    tx_max, txp_max               = get_sigmas(alpha_x, beta_x, emittance_x, dispersion_x, spread, beta_rel, gamma_rel)
    ty_max, typ_max               = get_sigmas(alpha_y, beta_y, emittance_y, dispersion_y, spread, beta_rel, gamma_rel)

    # Seeding
    # --------------------------------------------------------------------------------------------------------------
    if seed == 0:
        myseed = random.randint(0, 429496729)
    else:
        myseed = seed
    with open('seed.txt', 'a') as g:
        print >> g,  'job ', job_str ,'seed ', myseed
    np.random.seed(myseed)
    random.seed(myseed)
    
    # Generating the Transverse Distribution
    # --------------------------------------------------------------------------------------------------------------
    x_t  = np.asarray(np.random.normal(0, factor * tx_max, round(particles)))
    xp_t = np.asarray(np.random.normal(0, factor * txp_max, round(particles)))
    y_t  = np.asarray(np.random.normal(0, factor * ty_max, round(particles)))
    yp_t = np.asarray(np.random.normal(0, factor * typ_max, round(particles)))

    # Rotating the Transverse Distribution
    # --------------------------------------------------------------------------------------------------------------
    angle_x = np.arctan(-alpha_x/beta_x)
    x       = x_t*np.cos(angle_x) - xp_t*np.sin(angle_x)
    xp      = x_t*np.sin(angle_x) + xp_t*np.cos(angle_x)

    angle_y = np.arctan(-alpha_y/beta_y)
    y       = y_t*np.cos(angle_y) - yp_t*np.sin(angle_y)
    yp      = y_t*np.sin(angle_y) + yp_t*np.cos(angle_y)
    
    # Generating the Longitudinal Distribution
    # --------------------------------------------------------------------------------------------------------------
    z  = []
    E  = []
    dp = []
    
    while len(z) < particles:
        # Generate for as long time as is needed
        particle_z = random.gauss(0,1)
        particle_e = random.gauss(0,1)
        trial_z    = particle_z * bunch
        trial_e    = energy * (1 + particle_e*spread) #eV

        trial_p = np.sqrt((trial_e - mass) * (trial_e + mass))
        dPP     = (trial_p - p0) / p0
        h       = get_bucket(machine, plot=False, z=trial_z, DELTA=dPP)  # Longitudinal contour

        if machine=='HL_coll' or  machine=='HL_coll_200' or machine=='HL_coll_tcp'  or machine=='HL_coll_tcp_200':
            Hmargin = -0.01
        elif machine=='SPS_inj':
            Hmargin = -1

        if h <= Hmargin:
            z.append(float(trial_z))
            E.append(float(trial_e))
            dp.append(dPP)
        else:
            print 'Outside margin, trying again,', h
  
    zz  = np.asarray(z)
    EE  = np.asarray(E)
    ddp = np.asarray(dp)

    if fort13=='False':
        outfile = 'init_dist_' + job_str + '.txt'
        with open(outfile, 'w') as f:
            for e1, e2, e3, e4, e5, e6 in zip(x, xp, y, yp, zz*1e3, EE*1e-6):
                f.write('%8.6e %8.6e %8.6e %8.6e %8.6e %8.6e\n' % (e1, e2, e3, e4, e5, e6))
    elif fort13=='True':
        outfile = 'fort.13'
        with open(outfile, 'w') as f:
            for i in xrange(0, particles, 2):
                f.write(str((x[i] + offset_x)*1e3) + "\n") #mm
                f.write(str((xp[i] + offset_xp)*1e3) + "\n") #mrad
                f.write(str((y[i] + offset_y)*1e3) + "\n") #mm
                f.write(str((yp[i] + offset_yp)*1e3) + "\n") #mrad
                f.write(str(zz[i]*1e3) + "\n") #mm
                f.write(str(ddp[i]) + "\n") #-

                f.write(str((x[i+1] + offset_x)*1e3) + "\n") #mm
                f.write(str((xp[i+1] + offset_xp)*1e3) + "\n") #mrad
                f.write(str((y[i+1] + offset_y)*1e3) + "\n") #mm
                f.write(str((yp[i+1] + offset_yp)*1e3) + "\n") #mrad
                f.write(str(zz[i+1]*1e3) + "\n") #mm
                f.write(str(ddp[i+1]) + "\n") #-

                f.write(str(energy*1e-6) + "\n") #MeV
                f.write(str(EE[i]*1e-6) + "\n") #MeV
                f.write(str(EE[i+1]*1e-6) + "\n") #MeV
    else:
        print 'Please input True or False in the fourth argument'
Пример #8
0
def dist_generator(particles, energy, machine, fort13, jobs, factor,
                   emittance_x, emittance_y, alpha_x, alpha_y, beta_x, beta_y,
                   offset_x, offset_xp, offset_y, offset_yp, dispersion_x,
                   dispersion_y, bunch, spread, seed):

    job_str = '%s' % jobs
    # Getting the Transverse sigmas (amplitudes of phase space ellipse)
    # --------------------------------------------------------------------------------------------------------------
    gamma_rel, beta_rel, p0, mass = get_rel_params(energy)
    tx_max, txp_max = get_sigmas(alpha_x, beta_x, emittance_x, dispersion_x,
                                 spread, beta_rel, gamma_rel)
    ty_max, typ_max = get_sigmas(alpha_y, beta_y, emittance_y, dispersion_y,
                                 spread, beta_rel, gamma_rel)

    # Seeding
    # --------------------------------------------------------------------------------------------------------------
    if seed == 0:
        myseed = random.randint(0, 429496729)
    else:
        myseed = seed
    with open('seed.txt', 'a') as g:
        print >> g, 'job ', job_str, 'seed ', myseed
    np.random.seed(myseed)
    random.seed(myseed)

    # Generating the Transverse Distribution
    # --------------------------------------------------------------------------------------------------------------
    x_t = np.asarray(np.random.normal(0, factor * tx_max, round(particles)))
    xp_t = np.asarray(np.random.normal(0, factor * txp_max, round(particles)))
    y_t = np.asarray(np.random.normal(0, factor * ty_max, round(particles)))
    yp_t = np.asarray(np.random.normal(0, factor * typ_max, round(particles)))

    # Rotating the Transverse Distribution
    # --------------------------------------------------------------------------------------------------------------
    angle_x = np.arctan(-alpha_x / beta_x)
    x = x_t * np.cos(angle_x) - xp_t * np.sin(angle_x)
    xp = x_t * np.sin(angle_x) + xp_t * np.cos(angle_x)

    angle_y = np.arctan(-alpha_y / beta_y)
    y = y_t * np.cos(angle_y) - yp_t * np.sin(angle_y)
    yp = y_t * np.sin(angle_y) + yp_t * np.cos(angle_y)

    # Generating the Longitudinal Distribution
    # --------------------------------------------------------------------------------------------------------------
    z = []
    E = []
    dp = []

    while len(z) < particles:
        # Generate for as long time as is needed
        particle_z = random.gauss(0, 1)
        particle_e = random.gauss(0, 1)
        trial_z = particle_z * bunch
        trial_e = energy * (1 + particle_e * spread)  #eV

        trial_p = np.sqrt((trial_e - mass) * (trial_e + mass))
        dPP = (trial_p - p0) / p0
        h = get_bucket(machine, plot=False, z=trial_z,
                       DELTA=dPP)  # Longitudinal contour

        if machine == 'HL_coll' or machine == 'HL_coll_200' or machine == 'HL_coll_tcp' or machine == 'HL_coll_tcp_200':
            Hmargin = -0.01
        elif machine == 'SPS_inj':
            Hmargin = -1

        if h <= Hmargin:
            z.append(float(trial_z))
            E.append(float(trial_e))
            dp.append(dPP)
        else:
            print 'Outside margin, trying again,', h

    zz = np.asarray(z)
    EE = np.asarray(E)
    ddp = np.asarray(dp)

    if fort13 == 'False':
        outfile = 'init_dist_' + job_str + '.txt'
        with open(outfile, 'w') as f:
            for e1, e2, e3, e4, e5, e6 in zip(x, xp, y, yp, zz * 1e3,
                                              EE * 1e-6):
                f.write('%8.6e %8.6e %8.6e %8.6e %8.6e %8.6e\n' %
                        (e1, e2, e3, e4, e5, e6))
    elif fort13 == 'True':
        outfile = 'fort.13'
        with open(outfile, 'w') as f:
            for i in xrange(0, particles, 2):
                f.write(str((x[i] + offset_x) * 1e3) + "\n")  #mm
                f.write(str((xp[i] + offset_xp) * 1e3) + "\n")  #mrad
                f.write(str((y[i] + offset_y) * 1e3) + "\n")  #mm
                f.write(str((yp[i] + offset_yp) * 1e3) + "\n")  #mrad
                f.write(str(zz[i] * 1e3) + "\n")  #mm
                f.write(str(ddp[i]) + "\n")  #-

                f.write(str((x[i + 1] + offset_x) * 1e3) + "\n")  #mm
                f.write(str((xp[i + 1] + offset_xp) * 1e3) + "\n")  #mrad
                f.write(str((y[i + 1] + offset_y) * 1e3) + "\n")  #mm
                f.write(str((yp[i + 1] + offset_yp) * 1e3) + "\n")  #mrad
                f.write(str(zz[i + 1] * 1e3) + "\n")  #mm
                f.write(str(ddp[i + 1]) + "\n")  #-

                f.write(str(energy * 1e-6) + "\n")  #MeV
                f.write(str(EE[i] * 1e-6) + "\n")  #MeV
                f.write(str(EE[i + 1] * 1e-6) + "\n")  #MeV
    else:
        print 'Please input True or False in the fourth argument'
Пример #9
0
def smart_delete(url):
    result = urllib.parse.urlparse(url)
    if result.scheme in ['', 'file']:
        return os.remove(url)
    elif result.scheme == 's3':
        response = get_s3client_for_bucket(url).delete_object(Bucket=get_bucket(result), Key=get_key(result))