Пример #1
0
def add_file():
    error, conf = "", ""
    if "create_file_submit" in request.form:
        if request.form["file_name"] == "":
            error = messages.name_file_err;
        else:
            util.create_file(request.form["file_name"], request.form["file_content"])
            conf = messages.file_created_conf
    elif "upload_file_submit" in request.form:
        if not request.files.get("file", None):
            error = messages.choose_file_err
        else:
            util.upload_file(request.files["file"])
            conf = messages.file_uploaded_conf
    elif "sync_files_submit" in request.form:
    	protocol.epidemic_sync_files()
    	conf = messages.sync_files_conf
    elif "listen_sw_submit" in request.form:
	conf = messages.listen_sw_conf;
	protocol.sw_listen()
    elif "wlan0_submit" in request.form:
        protocol.interface = "wlan0"
        print protocol.interface
    elif "wlan1_submit" in request.form:
        protocol.interface = "wlan1"
        print protocol.interface
    return main(error=error, conf=conf)
Пример #2
0
 def process(self, element):
     from similarity import compare_image
     from util import upload_file
     import pandas as pd
     res1, res2, res3, res4, res5, w, h = range(7)
     el1, el2 = element.strip().split(",")
     pel1 = "" + el1
     pel2 = "" + el2
     try:
         res1, res2, res3, res4, res5, w, h = compare_image(pel1, pel2)
     except Exception as e:
         print(e)
         print(e)
         try:
             res1, res2, res3, res4, res5, w, h = compare_image(pel1, pel2)
         except Exception as e:
             print(e)
             print(e)
     row = pd.DataFrame([[el1, el2, res1, res2, res3, res4, res5, w, h]], columns=["truth", "scale", "ssim", "nssim", "ssim2", "nssim2", "psnr", "width", "height"])
     print(row)
     path = os.path.basename(el1) + "_" + os.path.basename(el2)
     path = os.path.join("output", path)
     csvpath = path + ".csv"
     row.to_csv(csvpath, index=None, header=None)
     upload_file("gs://"+BUCKET_NAME+"/"+csvpath, csvpath)
     print("Task finished: " + csvpath)
Пример #3
0
def upload_file(config, file_path, key_name, log):
    '''
    upload the file to GCS
    
    parameters:
        file_path: path to the local file
        key_name: GCS object key to use
        log: logger to log any messages
    '''
    bucket_name = config['buckets']['open']
    if config['upload_files']:
        log.info('\tuploading %s' % (key_name))
        util.upload_file(config, file_path, bucket_name, key_name, log)
Пример #4
0
def transformation():
    """Do an inference on a single batch of data. In this sample server, we take data as CSV, convert
    it to a pandas data frame for internal use and then convert the predictions back to CSV (which really
    just means one prediction per line, since there's a single column.
    """
    data = None
    # Convert from CSV to pandas
    if flask.request.content_type == 'application/json':
        form = json.loads(flask.request.data.decode('utf-8'))
        # decoded = url.decode('utf-8')
        data = preprocess.video_to_npy(form["video_url"])

    else:
        return flask.Response(response='this predictor only recieve url',
                              status=415,
                              mimetype='text/plain')

    print('Invoked with {} records'.format(data.shape[0]))

    # Do the prediction
    predictions = ScoringService.predict(data)
    statistics = np.mean(predictions, axis=0)
    statistics = statistics.tolist()
    np.savetxt('data/emotion.csv', predictions, delimiter=",")
    filename = util.upload_file('data/emotion.csv', form, '.csv',
                                'mind-gitter-diary')
    audio = util.upload_file('data/audio.wav', form, '.wav',
                             'mind-gitter-diary')
    fulltext, text = util.speaking_to_text(audio, form)
    komoran = Komoran()
    keywords = textrank_keyword(fulltext,
                                komoran,
                                2,
                                2,
                                0,
                                df=0.85,
                                max_iter=30,
                                topk=30)
    keysents = textrank_keysentence(fulltext, tokenize=komoran, min_count=2)
    result = {
        "emotions": filename,
        "fulltext": text,
        "tags": keywords,
        "abb": keysents,
        "statistics": statistics
    }

    return flask.Response(response=json.dumps(result),
                          status=200,
                          mimetype='application/json')
Пример #5
0
def route_ask_new_question():

    if request.method == 'POST':

        util.upload_file(request.files['image'])
        user_name = session['username']

        new_question = question_manager.add_question(
            request.form, request.files['image'].filename, user_name)
        question_manager.add_question_to_db(new_question)

        return redirect('/')

    return render_template('new_question.html')
Пример #6
0
def route_new_answer(question_id=None):
    if request.method == 'POST':

        answer = request.form.get('answer')
        user_name = session['username']
        util.upload_file(request.files['image'])
        answer_manager.add_answer(question_id, answer,
                                  request.files['image'].filename, user_name)

        return redirect(
            url_for('route_question_with_answer', question_id=question_id))

    question = question_manager.get_question_by_id(question_id=question_id)
    return render_template('add_answer.html',
                           question=question,
                           question_id=question_id)
Пример #7
0
def upload_sdrf_file(config, archive_path, file_name, metadata, log):
    center2platform = config['upload_archives']['mage-tab']
    try:
        if metadata['DataCenterName'] not in center2platform or metadata[
                'Platform'] not in center2platform[metadata['DataCenterName']]:
            log.info('\t\tskipping uploading %s from sdrf archive' %
                     (file_name))
            return
    except Exception as e:
        log.exception('problem checking uploading SDRF file')
        raise e
    bucket_name = config['buckets']['open']
    key_name = getSDRFKeyName(file_name, metadata, log)
    if config['upload_files'] and config['upload_open']:
        util.upload_file(config, archive_path + file_name, bucket_name,
                         key_name, log)
    else:
        log.info('\t\tnot uploading %s from sdrf archive to %s' %
                 (file_name, key_name))
Пример #8
0
def upload_files(config, archive_path, file2metadata, log):
    '''
    uploads the files in the archive_path folder
    
    parameters:
        config: the configuration map
        archive_path: folder archive was downloaded to and the files extracted to
        file2metadata: map of file to its metadata
        log: logger to log any messages
    '''
    files = os.listdir(archive_path)
    if 0 < len(files):
        bucket_name, key_prefix = get_bucket_key_prefix(config, file2metadata[files[0]])
        for file_name in files:
            metadata = file2metadata[file_name]
            key_name = key_prefix + metadata['DataLevel'].replace(' ', '_') + '/'+ file_name
            if config['upload_files']:
                util.upload_file(config, archive_path + file_name, bucket_name, key_name, log)
    else:
        log.warning('\tno files for %s' % (archive_path))
Пример #9
0
def test_upload_and_download():

    if os.getenv('DATA_DIR', None) is None:
        return

    response = requests.post(uri('auth', '/tokens'),
                             headers=login_header('ralph', 'secret'))
    assert response.status_code == 201
    token = response.json()['token']

    response = requests.get(uri('storage', '/file-types?names=txt'),
                            headers=token_header(token))
    assert response.status_code == 200
    file_type_id = response.json()[0]['id']

    response = requests.get(uri('storage', '/scan-types?name=none'),
                            headers=token_header(token))
    assert response.status_code == 200
    scan_type_id = response.json()[0]['id']

    name = 'repository-{}'.format(generate_string(8))
    response = requests.post(uri('storage', '/repositories'),
                             json={'name': name},
                             headers=token_header(token))
    assert response.status_code == 201
    repository_id = response.json()['id']

    file_name = os.path.join(os.getenv('DATA_DIR'), 'data.nii.gz')
    file_id, _ = upload_file(file_name, file_type_id, scan_type_id,
                             repository_id, token)

    response = requests.get(uri(
        'storage', '/repositories/{}/files/{}'.format(repository_id, file_id)),
                            headers=token_header(token))
    assert response.status_code == 200
    storage_id = response.json()['storage_id']

    response = requests.get(uri('storage', '/downloads/{}'.format(storage_id)),
                            headers=token_header(token))
    assert response.status_code == 200
    assert response.content

    with open('tmp.nii.gz', 'wb') as f:
        for chunk in response.iter_content(1024 * 1024):
            f.write(chunk)

    n = os.path.getsize('tmp.nii.gz')
    m = os.path.getsize(file_name)
    assert n == m

    os.system('rm -f tmp.nii.gz')
Пример #10
0
def execute():
    metrics = generate_metrics()
    util.upload_file("v1/containers.json", util.json_dump(metrics))
    print "Updated metrics file uploaded to S3"
def main(configfilename):
    try:
        with open(configfilename) as configFile:
            config = json.load(configFile)

        log_dir = str(date.today()).replace(
            '-', '_') + '_' + config['log_dir_tag'] + '/'
        log_name = create_log(log_dir, 'create_snapshot')
        log = logging.getLogger(log_name)
        log.info('begin create snapshot')
        gcs_wrapper_gcloud.open_connection(config, log)

        latestarchive07jun16path = '/titan/cancerregulome11/TCGA/repositories/dcc-mirror/datareports/resources/latestarchive_07jun16'
        latestarchivepath = 'latestarchive'
        snapshotprefix = '/titan/cancerregulome11/TCGA/repositories/dcc-mirror/public'
        dccprefixlen = len(
            'https://tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous'
        )
        googlebucket = 'dcc_repository'
        googlefolderprefix = '2016_06_07/public'
        googlelinkprefix = 'https://console.cloud.google.com/m/cloudstorage/b/dcc_repository/o/2016_06_07/public'

        count = 1
        with open(latestarchive07jun16path) as latestarchive07jun16, open(
                latestarchivepath, 'w') as latestarchive:
            # copy the header
            latestarchive.write(latestarchive07jun16.readline())
            for line in latestarchive07jun16:
                try:
                    fields = line.strip().split('\t')
                    if 'tcga4yeo' in fields[2]:
                        # skip controlled access archives
                        continue
                    # translate the location in the dcc to the location in our mirror
                    pathsuffix = fields[2][dccprefixlen:]
                    fields[2] = googlelinkprefix + pathsuffix
                    latestarchive.write('\t'.join(fields) + '\n')
                    # upload to GCS
                    snapshotloc = snapshotprefix + pathsuffix
                    uploadpath = googlefolderprefix + pathsuffix
                    try:
                        if os.path.exists(snapshotloc):
                            upload_file(config, snapshotloc, googlebucket,
                                        uploadpath, log)
                    except ValueError as ve:
                        if ('%s' % ve) != ('found %s in %s' %
                                           (uploadpath, googlebucket)):
                            raise ve
                    except:
                        raise
                    if 1 == count % 250:
                        log.info(
                            '\t==================================\n\tgoogle path: %s\n\tgoogle link: %s\n\tsnapshot location: %s\n'
                            % (uploadpath, fields[2], snapshotloc))
                    count += 1
                except:
                    log.exception('problem on row %d: %s(%s)' %
                                  (count, line, fields))
                    raise

        log.info('finished create snapshot, found %s archives' % (count))
    finally:
        gcs_wrapper_gcloud.close_connection()
Пример #12
0
def process_files(config, endpt_type, file2info, outputdir, start, end,
                  program_name, project, data_type, etl_class, log):
    try:
        filepath = outputdir + config['download_output_file_template'] % (
            start, end - 1)
        with tarfile.open(filepath) as tf:
            log.info('\t\textract tar files from %s' % (filepath))
            tf.extractall(outputdir)
            log.info('\t\tdone extract tar files from %s' % (filepath))

        with open(outputdir + 'MANIFEST.txt') as manifest:
            lines = manifest.read().split('\n')
            paths = []
            filenames = set()
            for line in lines[1:]:
                filepath = line.split('\t')[1]
                paths += [filepath]
                filenames.add(filepath.split('/')[1])
        paths.sort(key=lambda path: path.split('/')[1])

        if config['upload_files']:
            for path in paths:
                basefolder = config['buckets']['folders']['base_file_folder']

                metadata = flatten_map(
                    file2info[path], config[program_name]['process_files']
                    ['data_table_mapping'])
                keypath_template = config[program_name]['process_files'][
                    'bucket_path_template']
                key_path_components = []
                for part in config[program_name]['process_files'][
                        'bucket_path']:
                    fields = part.split(':')
                    if 1 == len(fields):
                        if 'endpoint_type' == part:
                            key_path_components += [endpt_type]
                        else:
                            key_path_components += [metadata[0][part]]
                    elif 'alt' == fields[0]:
                        if fields[1] in metadata[0] and metadata[0][fields[1]]:
                            key_path_components += [metadata[0][fields[1]]]
                        else:
                            key_path_components += [metadata[0][fields[2]]]

                key_name = basefolder + (keypath_template %
                                         tuple(key_path_components))
                log.info('\t\tuploading %s' % (key_name))
                upload_file(config, outputdir + path,
                            config['buckets']['open'], key_name, log)
        else:
            log.info('\t\t\tnot uploading files for %s:%s' %
                     (project, data_type))

        etl_uploaded = False
        if config['upload_etl_files'] and data_type in config[program_name][
                'process_files']['datatype2bqscript'] and etl_class is not None:
            etl_uploaded = etl_class.upload_batch_etl(config, outputdir, paths,
                                                      file2info, endpt_type,
                                                      program_name, project,
                                                      data_type, log)
        else:
            log.warning(
                '\t\tnot processing files for ETL for project %s and datatype %s%s'
                % (project, data_type, ' because there is no script specified'
                   if config['upload_etl_files'] else ''))
        return etl_uploaded
    except:
        log.exception(
            'problem process file %s for project %s and data_type %s' %
            (filepath, project, data_type))
        raise
    finally:
        if 'delete_dir_contents' not in config or config['delete_dir_contents']:
            delete_dir_contents(outputdir)
Пример #13
0
def update():
    api_url = 'https://wiki.factorio.com/api.php'
    session = requests.Session()
    edit_token = get_edit_token(session, api_url)
    with open('C:\\Users\\Win 10\\Documents\\Wiki-data\\moves_and_more.json'
              ) as f:
        moves_and_more_data = json.load(f)

    # move that one really special page - need to update the infobox on the page too + archive the page
    for title in moves_and_more_data['special_move']:
        page = get_page(session, api_url, title)
        page = page.replace('{{:Infobox:Wood}}',
                            '{{:Infobox:Wood (archived)}}')
        if 'Infobox' not in title:
            page = '{{archive}}' + page

        print(
            edit_page(session, api_url, edit_token, title, page,
                      'Archived wood (removed in 0.17)').text)

        print(
            move_page(session, api_url, edit_token, title,
                      title.replace('Wood', 'Wood (archived)'),
                      'Archived wood (removed in 0.17)',
                      False).text)  # no redirect

    # archive pages + files = prepend edit {{archive}} onto them
    for title in moves_and_more_data['archive']:
        print(
            edit_page(session, api_url, edit_token, title, '{{archive}}',
                      'Archived page (removed in 0.17)',
                      True).text)  # prepend edit

    # move pages + files - leave redirects - also do infoboxes on the pages
    for move_data in moves_and_more_data['move']:
        if 'Infobox' not in move_data['from'] and 'File' not in move_data[
                'from']:
            page = get_page(session, api_url, move_data['from'])
            from_title_no_lang_suffix = re.search('([^/]+)(\/\S+)?',
                                                  move_data['from']).group(1)
            to_title_no_lang_suffix = re.search('([^/]+)(\/\S+)?',
                                                move_data['to']).group(1)
            page = page.replace(
                '{{:Infobox:' + from_title_no_lang_suffix + '}}',
                '{{:Infobox:' + to_title_no_lang_suffix + '}}')

            print(
                edit_page(session, api_url, edit_token, move_data['from'],
                          page, 'Renamed in 0.17').text)

        print(
            move_page(session, api_url, edit_token, move_data['from'],
                      move_data['to'], 'Renamed in 0.17').text)

    # upload files
    for filename in moves_and_more_data['upload']:
        file = open('C:\\Users\\Win 10\\Documents\\Wiki-data\\icons\\', 'rb')

        print(
            upload_file(session, api_url, edit_token, filename, file,
                        '{{Game image}}').text)

    #create pages
    with open('C:\\Users\\Win 10\\Documents\\Wiki-data\\new_pages.json') as f:
        create_page_data = json.load(f)

    for name, page in create_page_data.items():
        print(
            edit_page(session, api_url, edit_token, name, page,
                      'Added in 0.17').text)

    # infobox update
    InfoboxUpdate([
        InfoboxType.Entity, InfoboxType.Technology, InfoboxType.Item,
        InfoboxType.Recipe, InfoboxType.Prototype
    ], api_url, '0.17.0', False)

    # updating https://wiki.factorio.com/Template:VersionNav
    versionnav = get_page(session, api_url, 'Template:VersionNav')
    versionnav = versionnav.replace(
        '}}\n<noinclude>',
        '|group10 = {{Translation|0.17}}\n|list10 =\n* {{TransLink|Version history/0.17.0#0.17.0|0.17.0}}\n}}\n<noinclude>'
    )
    print(
        edit_page(session, api_url, edit_token, 'Template:VersionNav',
                  versionnav, '0.17').text)

    # updating https://wiki.factorio.com/Main_Page/Latest_versions
    latest_versions = get_page(session, api_url, 'Main_Page/Latest_versions')
    latest_versions = latest_versions.replace(
        '[[File:Space science pack.png|link=]]',
        '[[File:Automation science pack.png|link=]]')
    latest_versions = latest_versions.replace(
        '[[File:Speed module 3.png|link=]]', '[[File:Speed module.png|link=]]')
    latest_versions = latest_versions.replace(
        '{{Translation|The wiki is based on version}} [[Version history/0.16.0|0.16]]',
        '{{Translation|The wiki is based on version}} [[Version history/0.17.0|0.17]]'
    )
    print(
        edit_page(session, api_url, edit_token, 'Main_Page/Latest_versions',
                  latest_versions, 'Experimental 0.17').text)

    # sitenotice https://wiki.factorio.com/MediaWiki:Sitenotice
    sitenotice = "'''This wiki is about [[Tutorial:Cheat_sheet#0.17_change_overview|0.17]], the current [[Install_guide#Downloading_and_installing_experimental_versions|experimental version]] of ''Factorio''.'''\n\nInformation about 0.16, the current stable version of ''Factorio'', can be found on [https://stable.wiki.factorio.com/ stable.wiki.factorio.com]."
    print(
        edit_page(session, api_url, edit_token, 'MediaWiki:Sitenotice',
                  sitenotice, 'Experimental 0.17').text)
Пример #14
0
def test_train_classifier():

    if os.getenv('DATA_DIR', None) is None:
        return

    # Get access token
    response = requests.post(uri('auth', '/tokens'),
                             headers=login_header('ralph', 'secret'))
    assert response.status_code == 201
    token = response.json()['token']

    # Create storage repository
    name = 'repository-{}'.format(generate_string(8))
    response = requests.post(uri('storage', '/repositories'),
                             headers=token_header(token),
                             json={'name': name})
    assert response.status_code == 201
    repository_id = response.json()['id']

    # Get CSV file type ID
    response = requests.get(uri('storage', '/file-types?name=csv'),
                            headers=token_header(token))
    assert response.status_code == 200
    file_type_id = response.json()[0]['id']

    # Get scan type ID
    response = requests.get(uri('storage', '/scan-types?name=none'),
                            headers=token_header(token))
    assert response.status_code == 200
    scan_type_id = response.json()[0]['id']

    # Load features, extract HC and SZ subjects, remove categorical columns and
    # save the feature file back to disk
    file_path = os.path.join(os.getenv('DATA_DIR'), 'data.csv')
    features = pd.read_csv(file_path, index_col='MRid')
    subject_labels = list(features['Diagnosis'])

    # Upload CSV file with brain features
    file_id, _ = upload_file(file_path, file_type_id, scan_type_id,
                             repository_id, token)
    assert file_id

    # Train classifier using the uploaded CSV file. As parameters we specify the
    # pipeline ID (which in this case is a classifier training pipeline). The 'file_id'
    # refers to the CSV file. The parameter 'subject_labels' contains a list of diagnostic
    # labels. This list is used to pre-calculate training and testing indices which can be
    # passed to the different workers handling the cross-validation folds in parallel.
    response = requests.post(uri('compute', '/tasks'),
                             headers=token_header(token),
                             json={
                                 'pipeline_name': 'svm_train',
                                 'params': {
                                     'repository_id': repository_id,
                                     'file_id': file_id,
                                     'subject_labels': subject_labels,
                                     'nr_folds': 2,
                                     'index_column': 'MRid',
                                     'target_column': 'Diagnosis',
                                     'kernel': 'rbf',
                                 }
                             })

    assert response.status_code == 201
    task_id = response.json()['id']

    # Retrieve task status periodically until it finishes successfully. In practice,
    # this means the task status == SUCCESS and result != None
    classifier_id = 0
    while True:
        response = requests.get(uri('compute', '/tasks/{}'.format(task_id)),
                                headers=token_header(token))
        assert response.status_code == 200
        status = response.json()['status']
        assert status == 'PENDING' or status == 'SUCCESS'
        result = response.json()['result']
        sys.stdout.write('.')
        sys.stdout.flush()
        if status == 'SUCCESS' and result is not None:
            classifier_id = result['classifier_id']
            break
        time.sleep(2)

    # Remove diagnosis column from the feature data. Then select the first subject
    # so we can send it to the classifier for prediction.
    features.drop('Diagnosis', axis=1, inplace=True)
    subject_data = list(features.iloc[0])
    subject_label = subject_labels[0]

    # Send some data to the trained classifier for prediction
    response = requests.post(uri('compute', '/tasks'),
                             headers=token_header(token),
                             json={
                                 'pipeline_name': 'svm_predict',
                                 'params': {
                                     'classifier_id': classifier_id,
                                     'subjects': [
                                         subject_data,
                                     ],
                                 }
                             })

    assert response.status_code == 201
    task_id = response.json()['id']

    while True:
        response = requests.get(uri('compute', '/tasks/{}'.format(task_id)),
                                headers=token_header(token))
        assert response.status_code == 200
        status = response.json()['status']
        assert status == 'PENDING' or status == 'SUCCESS'
        result = response.json()['result']
        sys.stdout.write('.')
        sys.stdout.flush()
        if status == 'SUCCESS' and result is not None:
            assert subject_label == result['predicted_labels'][0]
            break
        time.sleep(2)