def add_file(): error, conf = "", "" if "create_file_submit" in request.form: if request.form["file_name"] == "": error = messages.name_file_err; else: util.create_file(request.form["file_name"], request.form["file_content"]) conf = messages.file_created_conf elif "upload_file_submit" in request.form: if not request.files.get("file", None): error = messages.choose_file_err else: util.upload_file(request.files["file"]) conf = messages.file_uploaded_conf elif "sync_files_submit" in request.form: protocol.epidemic_sync_files() conf = messages.sync_files_conf elif "listen_sw_submit" in request.form: conf = messages.listen_sw_conf; protocol.sw_listen() elif "wlan0_submit" in request.form: protocol.interface = "wlan0" print protocol.interface elif "wlan1_submit" in request.form: protocol.interface = "wlan1" print protocol.interface return main(error=error, conf=conf)
def process(self, element): from similarity import compare_image from util import upload_file import pandas as pd res1, res2, res3, res4, res5, w, h = range(7) el1, el2 = element.strip().split(",") pel1 = "" + el1 pel2 = "" + el2 try: res1, res2, res3, res4, res5, w, h = compare_image(pel1, pel2) except Exception as e: print(e) print(e) try: res1, res2, res3, res4, res5, w, h = compare_image(pel1, pel2) except Exception as e: print(e) print(e) row = pd.DataFrame([[el1, el2, res1, res2, res3, res4, res5, w, h]], columns=["truth", "scale", "ssim", "nssim", "ssim2", "nssim2", "psnr", "width", "height"]) print(row) path = os.path.basename(el1) + "_" + os.path.basename(el2) path = os.path.join("output", path) csvpath = path + ".csv" row.to_csv(csvpath, index=None, header=None) upload_file("gs://"+BUCKET_NAME+"/"+csvpath, csvpath) print("Task finished: " + csvpath)
def upload_file(config, file_path, key_name, log): ''' upload the file to GCS parameters: file_path: path to the local file key_name: GCS object key to use log: logger to log any messages ''' bucket_name = config['buckets']['open'] if config['upload_files']: log.info('\tuploading %s' % (key_name)) util.upload_file(config, file_path, bucket_name, key_name, log)
def transformation(): """Do an inference on a single batch of data. In this sample server, we take data as CSV, convert it to a pandas data frame for internal use and then convert the predictions back to CSV (which really just means one prediction per line, since there's a single column. """ data = None # Convert from CSV to pandas if flask.request.content_type == 'application/json': form = json.loads(flask.request.data.decode('utf-8')) # decoded = url.decode('utf-8') data = preprocess.video_to_npy(form["video_url"]) else: return flask.Response(response='this predictor only recieve url', status=415, mimetype='text/plain') print('Invoked with {} records'.format(data.shape[0])) # Do the prediction predictions = ScoringService.predict(data) statistics = np.mean(predictions, axis=0) statistics = statistics.tolist() np.savetxt('data/emotion.csv', predictions, delimiter=",") filename = util.upload_file('data/emotion.csv', form, '.csv', 'mind-gitter-diary') audio = util.upload_file('data/audio.wav', form, '.wav', 'mind-gitter-diary') fulltext, text = util.speaking_to_text(audio, form) komoran = Komoran() keywords = textrank_keyword(fulltext, komoran, 2, 2, 0, df=0.85, max_iter=30, topk=30) keysents = textrank_keysentence(fulltext, tokenize=komoran, min_count=2) result = { "emotions": filename, "fulltext": text, "tags": keywords, "abb": keysents, "statistics": statistics } return flask.Response(response=json.dumps(result), status=200, mimetype='application/json')
def route_ask_new_question(): if request.method == 'POST': util.upload_file(request.files['image']) user_name = session['username'] new_question = question_manager.add_question( request.form, request.files['image'].filename, user_name) question_manager.add_question_to_db(new_question) return redirect('/') return render_template('new_question.html')
def route_new_answer(question_id=None): if request.method == 'POST': answer = request.form.get('answer') user_name = session['username'] util.upload_file(request.files['image']) answer_manager.add_answer(question_id, answer, request.files['image'].filename, user_name) return redirect( url_for('route_question_with_answer', question_id=question_id)) question = question_manager.get_question_by_id(question_id=question_id) return render_template('add_answer.html', question=question, question_id=question_id)
def upload_sdrf_file(config, archive_path, file_name, metadata, log): center2platform = config['upload_archives']['mage-tab'] try: if metadata['DataCenterName'] not in center2platform or metadata[ 'Platform'] not in center2platform[metadata['DataCenterName']]: log.info('\t\tskipping uploading %s from sdrf archive' % (file_name)) return except Exception as e: log.exception('problem checking uploading SDRF file') raise e bucket_name = config['buckets']['open'] key_name = getSDRFKeyName(file_name, metadata, log) if config['upload_files'] and config['upload_open']: util.upload_file(config, archive_path + file_name, bucket_name, key_name, log) else: log.info('\t\tnot uploading %s from sdrf archive to %s' % (file_name, key_name))
def upload_files(config, archive_path, file2metadata, log): ''' uploads the files in the archive_path folder parameters: config: the configuration map archive_path: folder archive was downloaded to and the files extracted to file2metadata: map of file to its metadata log: logger to log any messages ''' files = os.listdir(archive_path) if 0 < len(files): bucket_name, key_prefix = get_bucket_key_prefix(config, file2metadata[files[0]]) for file_name in files: metadata = file2metadata[file_name] key_name = key_prefix + metadata['DataLevel'].replace(' ', '_') + '/'+ file_name if config['upload_files']: util.upload_file(config, archive_path + file_name, bucket_name, key_name, log) else: log.warning('\tno files for %s' % (archive_path))
def test_upload_and_download(): if os.getenv('DATA_DIR', None) is None: return response = requests.post(uri('auth', '/tokens'), headers=login_header('ralph', 'secret')) assert response.status_code == 201 token = response.json()['token'] response = requests.get(uri('storage', '/file-types?names=txt'), headers=token_header(token)) assert response.status_code == 200 file_type_id = response.json()[0]['id'] response = requests.get(uri('storage', '/scan-types?name=none'), headers=token_header(token)) assert response.status_code == 200 scan_type_id = response.json()[0]['id'] name = 'repository-{}'.format(generate_string(8)) response = requests.post(uri('storage', '/repositories'), json={'name': name}, headers=token_header(token)) assert response.status_code == 201 repository_id = response.json()['id'] file_name = os.path.join(os.getenv('DATA_DIR'), 'data.nii.gz') file_id, _ = upload_file(file_name, file_type_id, scan_type_id, repository_id, token) response = requests.get(uri( 'storage', '/repositories/{}/files/{}'.format(repository_id, file_id)), headers=token_header(token)) assert response.status_code == 200 storage_id = response.json()['storage_id'] response = requests.get(uri('storage', '/downloads/{}'.format(storage_id)), headers=token_header(token)) assert response.status_code == 200 assert response.content with open('tmp.nii.gz', 'wb') as f: for chunk in response.iter_content(1024 * 1024): f.write(chunk) n = os.path.getsize('tmp.nii.gz') m = os.path.getsize(file_name) assert n == m os.system('rm -f tmp.nii.gz')
def execute(): metrics = generate_metrics() util.upload_file("v1/containers.json", util.json_dump(metrics)) print "Updated metrics file uploaded to S3"
def main(configfilename): try: with open(configfilename) as configFile: config = json.load(configFile) log_dir = str(date.today()).replace( '-', '_') + '_' + config['log_dir_tag'] + '/' log_name = create_log(log_dir, 'create_snapshot') log = logging.getLogger(log_name) log.info('begin create snapshot') gcs_wrapper_gcloud.open_connection(config, log) latestarchive07jun16path = '/titan/cancerregulome11/TCGA/repositories/dcc-mirror/datareports/resources/latestarchive_07jun16' latestarchivepath = 'latestarchive' snapshotprefix = '/titan/cancerregulome11/TCGA/repositories/dcc-mirror/public' dccprefixlen = len( 'https://tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous' ) googlebucket = 'dcc_repository' googlefolderprefix = '2016_06_07/public' googlelinkprefix = 'https://console.cloud.google.com/m/cloudstorage/b/dcc_repository/o/2016_06_07/public' count = 1 with open(latestarchive07jun16path) as latestarchive07jun16, open( latestarchivepath, 'w') as latestarchive: # copy the header latestarchive.write(latestarchive07jun16.readline()) for line in latestarchive07jun16: try: fields = line.strip().split('\t') if 'tcga4yeo' in fields[2]: # skip controlled access archives continue # translate the location in the dcc to the location in our mirror pathsuffix = fields[2][dccprefixlen:] fields[2] = googlelinkprefix + pathsuffix latestarchive.write('\t'.join(fields) + '\n') # upload to GCS snapshotloc = snapshotprefix + pathsuffix uploadpath = googlefolderprefix + pathsuffix try: if os.path.exists(snapshotloc): upload_file(config, snapshotloc, googlebucket, uploadpath, log) except ValueError as ve: if ('%s' % ve) != ('found %s in %s' % (uploadpath, googlebucket)): raise ve except: raise if 1 == count % 250: log.info( '\t==================================\n\tgoogle path: %s\n\tgoogle link: %s\n\tsnapshot location: %s\n' % (uploadpath, fields[2], snapshotloc)) count += 1 except: log.exception('problem on row %d: %s(%s)' % (count, line, fields)) raise log.info('finished create snapshot, found %s archives' % (count)) finally: gcs_wrapper_gcloud.close_connection()
def process_files(config, endpt_type, file2info, outputdir, start, end, program_name, project, data_type, etl_class, log): try: filepath = outputdir + config['download_output_file_template'] % ( start, end - 1) with tarfile.open(filepath) as tf: log.info('\t\textract tar files from %s' % (filepath)) tf.extractall(outputdir) log.info('\t\tdone extract tar files from %s' % (filepath)) with open(outputdir + 'MANIFEST.txt') as manifest: lines = manifest.read().split('\n') paths = [] filenames = set() for line in lines[1:]: filepath = line.split('\t')[1] paths += [filepath] filenames.add(filepath.split('/')[1]) paths.sort(key=lambda path: path.split('/')[1]) if config['upload_files']: for path in paths: basefolder = config['buckets']['folders']['base_file_folder'] metadata = flatten_map( file2info[path], config[program_name]['process_files'] ['data_table_mapping']) keypath_template = config[program_name]['process_files'][ 'bucket_path_template'] key_path_components = [] for part in config[program_name]['process_files'][ 'bucket_path']: fields = part.split(':') if 1 == len(fields): if 'endpoint_type' == part: key_path_components += [endpt_type] else: key_path_components += [metadata[0][part]] elif 'alt' == fields[0]: if fields[1] in metadata[0] and metadata[0][fields[1]]: key_path_components += [metadata[0][fields[1]]] else: key_path_components += [metadata[0][fields[2]]] key_name = basefolder + (keypath_template % tuple(key_path_components)) log.info('\t\tuploading %s' % (key_name)) upload_file(config, outputdir + path, config['buckets']['open'], key_name, log) else: log.info('\t\t\tnot uploading files for %s:%s' % (project, data_type)) etl_uploaded = False if config['upload_etl_files'] and data_type in config[program_name][ 'process_files']['datatype2bqscript'] and etl_class is not None: etl_uploaded = etl_class.upload_batch_etl(config, outputdir, paths, file2info, endpt_type, program_name, project, data_type, log) else: log.warning( '\t\tnot processing files for ETL for project %s and datatype %s%s' % (project, data_type, ' because there is no script specified' if config['upload_etl_files'] else '')) return etl_uploaded except: log.exception( 'problem process file %s for project %s and data_type %s' % (filepath, project, data_type)) raise finally: if 'delete_dir_contents' not in config or config['delete_dir_contents']: delete_dir_contents(outputdir)
def update(): api_url = 'https://wiki.factorio.com/api.php' session = requests.Session() edit_token = get_edit_token(session, api_url) with open('C:\\Users\\Win 10\\Documents\\Wiki-data\\moves_and_more.json' ) as f: moves_and_more_data = json.load(f) # move that one really special page - need to update the infobox on the page too + archive the page for title in moves_and_more_data['special_move']: page = get_page(session, api_url, title) page = page.replace('{{:Infobox:Wood}}', '{{:Infobox:Wood (archived)}}') if 'Infobox' not in title: page = '{{archive}}' + page print( edit_page(session, api_url, edit_token, title, page, 'Archived wood (removed in 0.17)').text) print( move_page(session, api_url, edit_token, title, title.replace('Wood', 'Wood (archived)'), 'Archived wood (removed in 0.17)', False).text) # no redirect # archive pages + files = prepend edit {{archive}} onto them for title in moves_and_more_data['archive']: print( edit_page(session, api_url, edit_token, title, '{{archive}}', 'Archived page (removed in 0.17)', True).text) # prepend edit # move pages + files - leave redirects - also do infoboxes on the pages for move_data in moves_and_more_data['move']: if 'Infobox' not in move_data['from'] and 'File' not in move_data[ 'from']: page = get_page(session, api_url, move_data['from']) from_title_no_lang_suffix = re.search('([^/]+)(\/\S+)?', move_data['from']).group(1) to_title_no_lang_suffix = re.search('([^/]+)(\/\S+)?', move_data['to']).group(1) page = page.replace( '{{:Infobox:' + from_title_no_lang_suffix + '}}', '{{:Infobox:' + to_title_no_lang_suffix + '}}') print( edit_page(session, api_url, edit_token, move_data['from'], page, 'Renamed in 0.17').text) print( move_page(session, api_url, edit_token, move_data['from'], move_data['to'], 'Renamed in 0.17').text) # upload files for filename in moves_and_more_data['upload']: file = open('C:\\Users\\Win 10\\Documents\\Wiki-data\\icons\\', 'rb') print( upload_file(session, api_url, edit_token, filename, file, '{{Game image}}').text) #create pages with open('C:\\Users\\Win 10\\Documents\\Wiki-data\\new_pages.json') as f: create_page_data = json.load(f) for name, page in create_page_data.items(): print( edit_page(session, api_url, edit_token, name, page, 'Added in 0.17').text) # infobox update InfoboxUpdate([ InfoboxType.Entity, InfoboxType.Technology, InfoboxType.Item, InfoboxType.Recipe, InfoboxType.Prototype ], api_url, '0.17.0', False) # updating https://wiki.factorio.com/Template:VersionNav versionnav = get_page(session, api_url, 'Template:VersionNav') versionnav = versionnav.replace( '}}\n<noinclude>', '|group10 = {{Translation|0.17}}\n|list10 =\n* {{TransLink|Version history/0.17.0#0.17.0|0.17.0}}\n}}\n<noinclude>' ) print( edit_page(session, api_url, edit_token, 'Template:VersionNav', versionnav, '0.17').text) # updating https://wiki.factorio.com/Main_Page/Latest_versions latest_versions = get_page(session, api_url, 'Main_Page/Latest_versions') latest_versions = latest_versions.replace( '[[File:Space science pack.png|link=]]', '[[File:Automation science pack.png|link=]]') latest_versions = latest_versions.replace( '[[File:Speed module 3.png|link=]]', '[[File:Speed module.png|link=]]') latest_versions = latest_versions.replace( '{{Translation|The wiki is based on version}} [[Version history/0.16.0|0.16]]', '{{Translation|The wiki is based on version}} [[Version history/0.17.0|0.17]]' ) print( edit_page(session, api_url, edit_token, 'Main_Page/Latest_versions', latest_versions, 'Experimental 0.17').text) # sitenotice https://wiki.factorio.com/MediaWiki:Sitenotice sitenotice = "'''This wiki is about [[Tutorial:Cheat_sheet#0.17_change_overview|0.17]], the current [[Install_guide#Downloading_and_installing_experimental_versions|experimental version]] of ''Factorio''.'''\n\nInformation about 0.16, the current stable version of ''Factorio'', can be found on [https://stable.wiki.factorio.com/ stable.wiki.factorio.com]." print( edit_page(session, api_url, edit_token, 'MediaWiki:Sitenotice', sitenotice, 'Experimental 0.17').text)
def test_train_classifier(): if os.getenv('DATA_DIR', None) is None: return # Get access token response = requests.post(uri('auth', '/tokens'), headers=login_header('ralph', 'secret')) assert response.status_code == 201 token = response.json()['token'] # Create storage repository name = 'repository-{}'.format(generate_string(8)) response = requests.post(uri('storage', '/repositories'), headers=token_header(token), json={'name': name}) assert response.status_code == 201 repository_id = response.json()['id'] # Get CSV file type ID response = requests.get(uri('storage', '/file-types?name=csv'), headers=token_header(token)) assert response.status_code == 200 file_type_id = response.json()[0]['id'] # Get scan type ID response = requests.get(uri('storage', '/scan-types?name=none'), headers=token_header(token)) assert response.status_code == 200 scan_type_id = response.json()[0]['id'] # Load features, extract HC and SZ subjects, remove categorical columns and # save the feature file back to disk file_path = os.path.join(os.getenv('DATA_DIR'), 'data.csv') features = pd.read_csv(file_path, index_col='MRid') subject_labels = list(features['Diagnosis']) # Upload CSV file with brain features file_id, _ = upload_file(file_path, file_type_id, scan_type_id, repository_id, token) assert file_id # Train classifier using the uploaded CSV file. As parameters we specify the # pipeline ID (which in this case is a classifier training pipeline). The 'file_id' # refers to the CSV file. The parameter 'subject_labels' contains a list of diagnostic # labels. This list is used to pre-calculate training and testing indices which can be # passed to the different workers handling the cross-validation folds in parallel. response = requests.post(uri('compute', '/tasks'), headers=token_header(token), json={ 'pipeline_name': 'svm_train', 'params': { 'repository_id': repository_id, 'file_id': file_id, 'subject_labels': subject_labels, 'nr_folds': 2, 'index_column': 'MRid', 'target_column': 'Diagnosis', 'kernel': 'rbf', } }) assert response.status_code == 201 task_id = response.json()['id'] # Retrieve task status periodically until it finishes successfully. In practice, # this means the task status == SUCCESS and result != None classifier_id = 0 while True: response = requests.get(uri('compute', '/tasks/{}'.format(task_id)), headers=token_header(token)) assert response.status_code == 200 status = response.json()['status'] assert status == 'PENDING' or status == 'SUCCESS' result = response.json()['result'] sys.stdout.write('.') sys.stdout.flush() if status == 'SUCCESS' and result is not None: classifier_id = result['classifier_id'] break time.sleep(2) # Remove diagnosis column from the feature data. Then select the first subject # so we can send it to the classifier for prediction. features.drop('Diagnosis', axis=1, inplace=True) subject_data = list(features.iloc[0]) subject_label = subject_labels[0] # Send some data to the trained classifier for prediction response = requests.post(uri('compute', '/tasks'), headers=token_header(token), json={ 'pipeline_name': 'svm_predict', 'params': { 'classifier_id': classifier_id, 'subjects': [ subject_data, ], } }) assert response.status_code == 201 task_id = response.json()['id'] while True: response = requests.get(uri('compute', '/tasks/{}'.format(task_id)), headers=token_header(token)) assert response.status_code == 200 status = response.json()['status'] assert status == 'PENDING' or status == 'SUCCESS' result = response.json()['result'] sys.stdout.write('.') sys.stdout.flush() if status == 'SUCCESS' and result is not None: assert subject_label == result['predicted_labels'][0] break time.sleep(2)