Python download_dataset示例，download.download_dataset Python示例

示例#1

0

显示文件

文件： train.py 项目： kungfu-ml/kungfu-mindspore

def run(args):
    ms.context.set_context(mode=ms.context.GRAPH_MODE,
                           device_target=args.device)
    dataset_sink_mode = False

    download_dataset(args.data_dir)

    # define the loss function
    net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    # create the network
    network = LeNet5()
    # define the optimizer
    net_opt = build_optimizer(args, network)
    config_ck = CheckpointConfig(save_checkpoint_steps=1875,
                                 keep_checkpoint_max=10)
    # save the network model and parameters for subsequence fine-tuning
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck)
    # group layers into an object with training and evaluation features
    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})

    if args.init_ckpt:
        load_ckpt(network, args.init_ckpt)

    train_net(network, model, args, ckpoint_cb, dataset_sink_mode)

示例#2

0

显示文件

 def download(self):
     """
     API call to download OpenImages slice
     :return: None
     """
     download_dataset(dest_dir=self.directory + "/train",
                      meta_dir="./data/create",
                      class_labels=self.labels,
                      exclusions_path=None,
                      limit=self.limit)

示例#3

0

显示文件

def download_field(fname, basedir=None, force=False):

    # check database
    if basedir is None:
        print('No basedir supplied, working in current directory...')
        basedir = '.'
    workdir = basedir + '/' + fname
    with SurveysDB() as sdb:
        result = sdb.get_field(fname)
        if result is None:
            print('Field', fname, 'does not exist in the database')
            sys.exit(1)
        if result['status'] != 'Not started' and result[
                'status'] != 'D/L failed':
            print('Field', fname, 'has status', result['status'])
            if not force:
                return False
        # get the ids of the observations
        sdb.cur.execute(
            'select * from observations where field=%s and (status="DI_processed" or status="Archived")',
            (fname, ))
        obs = sdb.cur.fetchall()
        if len(obs) > 0:
            result['status'] = 'Downloading'
            if not os.path.isdir(workdir):
                os.mkdir(workdir)
            tag_field(sdb, result)
            sdb.set_field(result)
        else:
            print('No downloadable observations for this field')
            sys.exit(3)

    # now do the download for each field
    overall_success = True
    for o in obs:
        print('Downloading observation ID L' + str(o['id']))
        for prefix in ['', 'prefactor_v1.0/', 'prefactor_v3.0/']:
            success = download_dataset('https://lofar-webdav.grid.sara.nl',
                                       '/SKSP/' + prefix + 'L' + str(o['id']) +
                                       '/',
                                       workdir=workdir)
            if success:
                break
            else:
                print('URL failed, trying alternative')

        if not success:
            print('Download failed')
        overall_success = overall_success and success

    with SurveysDB() as sdb:
        if overall_success:
            result['status'] = 'Downloaded'
            tag_field(sdb, result, workdir=workdir)
        else:
            result['status'] = 'D/L failed'
            tag_field(sdb, result, workdir=workdir)
        sdb.set_field(result)

    return overall_success

示例#4

0

显示文件

文件： data.py 项目： RJason13/A-ResP

def load_eval_dataset(ds_name, data_path, categorical=False):
    # download dataset if not exists
    if not os.path.exists(data_path):
        parent_path = os.path.dirname(os.path.dirname(data_path))
        parent_path = os.path.join(parent_path, "")
        download.download_dataset(ds_name, parent_path)

    spec = config.SPECS[ds_name]
    input_size = spec["input_size"]
    category_depth = 1 if spec.get("categorical", False) else 0

    has_val = "n_val" in spec
    if not has_val and "val_portion" not in spec:
        raise NotImplementedError(
            'Either "has_val" or "val_portion" must be specified in dataset configuration'
        )

    n_eval = spec["n_val"] if has_val else spec["n_train"]

    eval_imgs_dir = data_path + "stimuli"
    eval_maps_dir = data_path + "maps"
    eval_fixs_dir = data_path + "fixations"

    if has_val:
        eval_imgs_dir += "/val"
        eval_maps_dir += "/val"
        eval_fixs_dir += "/val"

    eval_x = _get_file_list(eval_imgs_dir, category_depth)
    eval_y = _get_file_list(eval_maps_dir, category_depth)
    eval_fixs = _get_file_list(eval_fixs_dir, category_depth)
    _check_consistency(zip(eval_x, eval_y, eval_fixs), n_eval)
    eval_ds = tf.data.Dataset.from_tensor_slices((eval_x, eval_y, eval_fixs))
    eval_ds = _prepare_image_ds(eval_ds,
                                ds_name,
                                input_size,
                                category_depth,
                                one_at_a_time=True)
    return (eval_ds, n_eval)

示例#5

0

显示文件

文件： download_field.py 项目： mhardcastle/ddf-pipeline

def download_field(fname,basedir=None,force=False):

    # check database
    if basedir is None:
        print 'No basedir supplied, working in current directory...'
        basedir='.'
    workdir=basedir+'/'+fname
    with SurveysDB() as sdb:
        result=sdb.get_field(fname)
        if result is None:
            print 'Field',fname,'does not exist in the database'
            sys.exit(1)
        if result['status']!='Not started' and result['status']!='D/L failed':
            print 'Field',fname,'has status',result['status']
            if not force:
                return False
        # get the ids of the observations
        sdb.cur.execute('select * from observations where field=%s and status="DI_processed"',(fname,))
        obs=sdb.cur.fetchall()
        if len(obs)>0:
            result['status']='Downloading'
            if not os.path.isdir(workdir):
                os.mkdir(workdir)
            tag_field(sdb,result)
            sdb.set_field(result)
        else:
            print 'No downloadable observations for this field'
            sys.exit(3)

    # now do the download for each field
    overall_success=True
    for o in obs:
        print 'Downloading observation ID L'+str(o['id'])
        success=download_dataset('https://lofar-webdav.grid.sara.nl','/SKSP/L'+str(o['id'])+'/',workdir=workdir)
        if success==False:
            print 'Download failed'
        overall_success=overall_success and success

    with SurveysDB() as sdb:
        if overall_success:
            result['status']='Downloaded'
            tag_field(sdb,result,workdir=workdir)
        else:
            result['status']='D/L failed'
            tag_field(sdb,result,workdir=workdir)
        sdb.set_field(result)

    return overall_success

示例#6

0

显示文件

def old_parse_data():
    """Parsing data from dataset to fit the frontend. 
    If dataset is 'out of date' (on hour old) it downloads new data. 
    The parsed data is then saved as a .json file.
    If dataset is NOT 'out of date' it will return immediately.
    
    Returns the filename of the parsed data
    """

    filename = "data/api/" + get_todays_date() + "-external.json"
    if os.path.exists(filename):
        file_time = datetime.fromtimestamp(os.path.getmtime(filename))
        now = datetime.now()
        file_lifetime = now - file_time
        if (file_lifetime.total_seconds() / 60) / 60 < 1:
            return filename

    data = download_dataset()
    if data == None:
        with open("data/external/latest.json", "r") as f:
            data = json.load(f)

    stats = {
        "confirmed":
        data['latest']['confirmed'],
        "deaths":
        data['latest']['deaths'],
        "recovered":
        data['latest']['recovered'],
        "countries":
        len(
            list(
                set([
                    country['country']
                    for country in data['confirmed']['locations']
                ]))),
        "updated":
        data['confirmed']['last_updated'].split("T")[0]
    }

    cities = []
    for c, d, r in zip(data['confirmed']['locations'],
                       data['deaths']['locations'],
                       data['recovered']['locations']):
        if c['country'] == d['country'] == r['country']:
            if not (c['latest'] == d['latest'] == r['latest'] == 0):
                parsed_city = {
                    "type": "Feature",
                    "properties": {
                        "city": c['country'],
                        "count": c['latest'],
                        "deaths": d['latest'],
                        "recovered": r['latest'],
                        "icon": "theatre"
                    },
                    "geometry": {
                        "type":
                        "Point",
                        "coordinates": [
                            float(c['coordinates']['long']),
                            float(c['coordinates']['lat'])
                        ]
                    }
                }
                cities.append(parsed_city)

    with open(filename, "+w") as json_file:
        json.dump({
            "cities": cities,
            "stats": stats
        },
                  json_file,
                  sort_keys=True,
                  indent=2,
                  separators=(",", ":"))

    return filename

示例#7

0

显示文件

文件： run_pipeline.py 项目： alexmatze/ddf-pipeline

os.chdir(rootdir)

name = sys.argv[1]
try:
    qsubfile = sys.argv[2]
except:
    qsubfile = '/home/mjh/git/ddf-pipeline/pipeline.qsub'

try:
    os.mkdir(name)
except OSError:
    warn('Working directory already exists')
    pass
os.chdir(name)
report('Downloading data')
if not download_dataset('https://lofar-webdav.grid.sara.nl',
                        '/SKSP/' + name + '/'):
    die('Download failed to get the right number of files')

report('Unpacking data')
unpack()

report('Deleting tar files')
os.system('rm *.tar.gz')

report('Making ms lists')
if make_list():
    report('Submit job')
    os.system('qsub -N ddfp-' + name + ' -v WD=' + rootdir + '/' + name + ' ' +
              qsubfile)
else:
    die('make_list could not construct the MS list')

示例#8

0

显示文件

文件： run_pipeline.py 项目： rvweeren/ddf-pipeline

def do_run_pipeline(name, basedir):

    if name[0] != 'P' and name[0] != 'L':
        die('This code should be used only with field or observation names',
            database=False)

    do_field = (name[0] == 'P')

    try:
        qsubfile = sys.argv[2]
    except:
        qsubfile = '/home/mjh/pipeline-master/ddf-pipeline/torque/pipeline.qsub'

    workdir = basedir + '/' + name
    try:
        os.mkdir(workdir)
    except OSError:
        warn('Working directory already exists')

    report('Downloading data')
    if do_field:
        success = download_field(name, basedir=basedir)
    else:
        success = download_dataset('https://lofar-webdav.grid.sara.nl',
                                   '/SKSP/' + name + '/',
                                   basedir=basedir)

    if not success:
        die('Download failed, see earlier errors', database=False)

    report('Unpacking data')
    try:
        unpack(workdir=workdir)
    except RuntimeError:
        if do_field:
            update_status(name, 'List failed', workdir=workdir)
        raise
    if do_field:
        update_status(name, 'Unpacked', workdir=workdir)

    report('Deleting tar files')
    os.system('rm ' + workdir + '/*.tar.gz')
    os.system('rm ' + workdir + '/*.tar')

    averaged = False
    report('Checking structure')
    g = glob.glob(workdir + '/*.ms')
    msl = MSList(None, mss=g)
    uobsids = set(msl.obsids)
    for thisobs in uobsids:
        # check one MS with each ID
        for m, ch, o in zip(msl.mss, msl.channels, msl.obsids):
            if o == thisobs:
                channels = len(ch)
                print 'MS', m, 'has', channels, 'channels'
                if channels > 20:
                    update_status(name, 'Averaging', workdir=workdir)
                    print 'Averaging needed for', thisobs, '!'
                    averaged = True
                    average(wildcard=workdir + '/*' + thisobs + '*')
                    os.system('rm -r ' + workdir + '/*' + thisobs +
                              '*pre-cal.ms')
                break

    report('Making ms lists')
    success = make_list(workdir=workdir)
    if do_field:
        list_db_update(success, workdir=workdir)
    if not success:
        die('make_list could not construct the MS list', database=False)

    report('Creating custom config file from template')
    make_custom_config(name, workdir, do_field, averaged)

    # now run the job
    do_run_job(name, basedir=basedir, qsubfile=None, do_field=do_field)

示例#9

0

显示文件

文件： data.py 项目： RJason13/A-ResP

def load_train_dataset(ds_name, data_path):
    # download dataset if not exists
    if not os.path.exists(data_path):
        parent_path = os.path.dirname(os.path.dirname(data_path))
        parent_path = os.path.join(parent_path, "")
        download.download_dataset(ds_name, parent_path)

    spec = config.SPECS[ds_name]
    input_size = spec["input_size"]
    category_depth = 1 if spec.get("categorical", False) else 0

    n_train = spec["n_train"]
    has_val = "n_val" in spec
    if not has_val and "val_portion" not in spec:
        raise NotImplementedError(
            'Either "has_val" or "val_portion" must be specified in dataset configuration'
        )

    train_imgs_dir = data_path + "stimuli"
    train_maps_dir = data_path + "maps"

    if has_val:
        n_val = spec["n_val"]
        train_imgs_dir += "/train"
        train_maps_dir += "/train"
    else:
        n_val = math.floor(n_train * spec["val_portion"])

    # loading dataset
    train_x = _get_file_list(train_imgs_dir, category_depth)
    train_y = _get_file_list(train_maps_dir, category_depth)
    _check_consistency(zip(train_x, train_y), n_train)
    train_ds = tf.data.Dataset.from_tensor_slices((train_x, train_y))

    # if has a separate validation set: load them
    # else: take a certain portion from train dataset
    if has_val:
        train_ds = train_ds.shuffle(n_train, reshuffle_each_iteration=False)
        val_x = _get_file_list(data_path + "stimuli/val", category_depth)
        val_y = _get_file_list(data_path + "maps/val", category_depth)
        _check_consistency(zip(val_x, val_y), n_val)

        val_ds = tf.data.Dataset.from_tensor_slices((val_x, val_y))
        val_ds = val_ds.shuffle(n_val, reshuffle_each_iteration=False)
    else:
        if category_depth > 0:
            n_category = spec["n_category"]
            n_per_cat = n_train // n_category
            n_val_per_cat = math.floor(n_per_cat * spec["val_portion"])
            n_val = n_val_per_cat * n_category
            for i in range(n_category):
                cat_train_ds = train_ds.take(n_per_cat)
                cat_train_ds = cat_train_ds.shuffle(
                    n_per_cat, reshuffle_each_iteration=False)
                if i == 0:
                    val_ds = cat_train_ds.take(n_val_per_cat)
                    new_train_ds = cat_train_ds.skip(n_val_per_cat)
                else:
                    val_ds = val_ds.concatenate(
                        cat_train_ds.take(n_val_per_cat))
                    new_train_ds = new_train_ds.concatenate(
                        cat_train_ds.skip(n_val_per_cat))
                train_ds = train_ds.skip(n_per_cat)
            train_ds = new_train_ds
        else:
            train_ds = train_ds.shuffle(n_train,
                                        reshuffle_each_iteration=False)
            val_ds = train_ds.take(n_val)
            train_ds = train_ds.skip(n_val)

        n_train -= n_val

    train_ds = _prepare_image_ds(train_ds, ds_name, input_size, category_depth)
    val_ds = _prepare_image_ds(val_ds, ds_name, input_size, category_depth)

    return ((train_ds, n_train), (val_ds, n_val))

示例#10

0

显示文件

文件： run_pipeline_leiden.py 项目： mhardcastle/ddf-pipeline

if name[0]!='P' and name[0]!='L':
    die('This code should be used only with field or observation names',database=False)

do_field=(name[0]=='P')

try:
    os.mkdir(name)
except OSError:
    warn('Working directory already exists')
    pass
os.chdir(name)
report('Downloading data')
if do_field:
    success=download_field(name)
else:
    success=download_dataset('https://lofar-webdav.grid.sara.nl','/SKSP/'+name+'/')

if not success:
    die('Download failed, see earlier errors',database=False)

    
report('Unpacking data')
unpack()
if do_field:
    unpack_db_update()
    
report('Deleting tar files')
os.system('rm *.tar.gz')

report('Making ms lists')
success=make_list()

示例#11

0

显示文件

def do_run_pipeline(name,basedir,qsubfile=None,do_field=True):
    '''
    set do_field False for the now obsolete behaviour of downloading
    and imaging a particular observation

    '''
    if qsubfile is None:
        qsubfile='/home/mjh/pipeline-master/ddf-pipeline/torque/pipeline.qsub'

    workdir=basedir+'/'+name
    try:
        os.mkdir(workdir)
    except OSError:
        warn('Working directory already exists')

    report('Downloading data')
    if do_field:
        success=download_field(name,basedir=basedir)
    else:
        success=download_dataset('https://lofar-webdav.grid.sara.nl','/SKSP/'+name+'/',basedir=basedir)

    if not success:
        die('Download failed, see earlier errors',database=False)

    report('Unpacking data')
    try:
        unpack(workdir=workdir)
    except RuntimeError:
        if do_field:
            update_status(name,'Unpack failed',workdir=workdir)
        raise
    if do_field:
        update_status(name,'Unpacked',workdir=workdir)

    report('Deleting tar files')
    os.system('rm '+workdir+'/*.tar.gz')
    os.system('rm '+workdir+'/*.tar')

    averaged=False
    report('Checking structure')
    g=glob.glob(workdir+'/*.ms')
    msl=MSList(None,mss=g)
    dysco=np.any(msl.dysco)
    uobsids=set(msl.obsids)
    for thisobs in uobsids:
        # check one MS with each ID
        for m,ch,o,hc in zip(msl.mss,msl.channels,msl.obsids,msl.hascorrected):
            if o==thisobs:
                if not(hc):
                    print('MS',m,'has no corrected_data column, force use of DATA')
                    averaged=True
                channels=len(ch)
                print('MS',m,'has',channels,'channels')
                if channels>20:
                    update_status(name,'Averaging',workdir=workdir)
                    print('Averaging needed for',thisobs,'!')
                    averaged=True
                    average(wildcard=workdir+'/*'+thisobs+'*')
                    os.system('rm -r '+workdir+'/*'+thisobs+'*pre-cal.ms')
                break
    
    report('Making ms lists')
    success=make_list(workdir=workdir)
    if do_field:
        list_db_update(success,workdir=workdir)
    if not success:
        die('make_list could not construct the MS list',database=False)
        
    report('Creating custom config file from template')
    make_custom_config(name,workdir,do_field,averaged)
    
    # now run the job
    do_run_job(name,basedir=basedir,qsubfile=None,do_field=do_field,dysco=dysco)