return None if not 'entries' in folder_list['item_collection']: return None for entry in folder_list['item_collection']['entries']: if 'name' in entry and 'type' in entry: if entry['name'] == item_name and entry['type'] == item_type: return entry['id'] return None # Authentication from settings file box_auth = JWTAuth.from_settings_file(BOX_CONFIG) # Get auth client box_client = Client(box_auth) # Loop through the arguments downloading the files for idx in range(1, argc): # Get the filename to download and the path file_path = sys.argv[idx] filefolder, filename = os.path.split(file_path) if not filename: print("An invalid file download was requested: '" + file_path + "'") continue # Find the ID of the folder the file is to be in folder_id = "0" if filefolder: parts = filefolder.split('/') for one_folder in parts:
'EM': 'EM', 'PIPELINE': 'PI' }, inplace=True) #df['folderid'] = '' root_mt = df.dropna(subset=['MT']) root_em = df.dropna(subset=['EM']) root_pi = df.dropna(subset=['PI']) print(root_mt.count()) print(root_em.count()) print(root_pi.count()) #root_dowload = pd.DataFrame(columns = ['Archivo_planilla','Archivo_box','Carpeta']) root_box = pd.DataFrame(columns=['Archivo_box', 'Nombre', 'Id', 'Carpeta']) client = Client(oauth) #root_folder = client.folder(folder_id='0') items = client.folder(folder_id='134563649698').get_items() #for archivo in root_mt["name"]: # largo = len(archivo) for item in items: #print('{0} {1} FOLDER "{2}"'.format(item.type.capitalize(), item.id, item)) sub_items = client.folder(folder_id=item.id).get_items() for sub_item in sub_items: print('{0} {1} SUB "{2}"'.format(sub_item.type.capitalize(), sub_item.id, sub_item.name)) new_row_box = { 'Archivo_box': sub_item.name, 'Nombre': sub_item.name[0:25],
def sync_module(Lochness: 'lochness.config', subject: 'subject.metadata', module_name: 'box.module_name', dry: bool): '''Sync box data for the subject''' # only the module_name string without 'box.' module_basename = module_name.split('.')[1] # delete on success delete = delete_on_success(Lochness, module_basename) logger.debug(f'delete_on_success for {module_basename} is {delete}') for bx_sid in subject.box[module_name]: logger.debug(f'exploring {subject.study}/{subject.id}') _passphrase = keyring.passphrase(Lochness, subject.study) enc_key = enc.kdf(_passphrase) client_id, client_secret, api_token = keyring.box_api_token( Lochness, module_name) # box authentication auth = OAuth2( client_id=client_id, client_secret=client_secret, access_token=api_token, ) client = Client(auth) bx_base = base(Lochness, module_basename) # get the id of the bx_base path in box bx_base_obj = get_box_object_based_on_name(client, bx_base, '0') if bx_base_obj == None: logger.debug('Root of the box is not found') continue # loop through the items defined for the BOX data for datatype, products in iter( Lochness['box'][module_basename]['file_patterns'].items()): subject_obj = get_box_object_based_on_name(client, bx_sid, bx_base_obj.id) if subject_obj == None: logger.debug(f'{bx_sid} is not found under {bx_base_obj}') continue datatype_obj = get_box_object_based_on_name( client, datatype, subject_obj.id) # full path bx_head = join(bx_base, datatype, bx_sid) logger.debug('walking %s', bx_head) # if the directory is empty if datatype_obj == None: continue # walk through the root directory for root, dirs, files in walk_from_folder_object( bx_head, datatype_obj): for box_file_object in files: bx_tail = join(basename(root), box_file_object.name) product = _find_product(bx_tail, products, subject=bx_sid) if not product: continue protect = product.get('protect', False) output_base = subject.protected_folder \ if protect else subject.general_folder encrypt = product.get('encrypt', False) key = enc_key if encrypt else None processed = product.get('processed', False) # For DPACC, get processed from the config.yml output_base = tree.get(datatype, output_base, processed=processed, BIDS=Lochness['BIDS']) compress = product.get('compress', False) save(box_file_object, (root, box_file_object.name), output_base, key=key, compress=compress, delete=False, dry=False)
from boxsdk import OAuth2, Client auth = OAuth2( client_id='c0hjuh3tjr90sloycc4g0jcdvid1yjs2', client_secret='w5oJLroCuN4TQ8bEjmJycwFmOexZXB2g', access_token='pQS9qEYWw6aelLvibViRpxk6bigoP7Ji', ) client = Client(auth)
def _authenticate(self, developer_token): # https://*.app.box.com/developers/console oauth2 = OAuth2(client_id=self.box_client_id, client_secret=self.box_client_secret, access_token=developer_token) self.box = Client(oauth2)
print '\x1b[36m{} {} {}\x1b[0m'.format(method, url, pformat(kwargs)) response = super(LoggingNetwork, self).request(method, url, access_token, **kwargs) if response.ok: print '\x1b[32m{}\x1b[0m'.format(response.content) else: print '\x1b[31m{}\n{}\n{}\x1b[0m'.format( response.status_code, response.headers, pformat(response.content), ) return response oauth2 = OAuth2(CLIENT_ID, CLIENT_SECRET, access_token=ACCESS_TOKEN) client = Client(oauth2, LoggingNetwork()) def upload(filepath, typechange=None, headers=True, namefile='output'): """ upload is a function that uploads files directly from local to the Box cloud Inputs: filepath: string path to the filepath typechange: string such as "html", "xml" or "json" denoting the type of file to be saved as headers: boolean if the file has a header or not namefile: string of the file name to be saved as """ if typechange == None: box_file = client.folder('0').upload(filepath) elif headers == False: #defaults to abc... if no headers given
def jwt(cred_fp=DEFAULT_CONFIG): # Load JWT config file from default location config = JWTAuth.from_settings_file(os.path.expanduser(cred_fp)) return Client(config)
#!/usr/bin/python3 import threading import os, random import time from boxsdk import JWTAuth from boxsdk import Client from queue import Queue from itertools import cycle sdk = JWTAuth.from_settings_file('./38487735_v17qtrdu_config.json') Box = Client(sdk) API_users = cycle( (Box.user(user_id='6713645944'), Box.user(user_id='6713647144'), Box.user(user_id='6713648344'), Box.user(user_id='6713649544'), Box.user(user_id='6713650744'), Box.user(user_id='6713651944'), Box.user(user_id='6713653144'), Box.user(user_id='6713654344'), Box.user(user_id='6713655544'), Box.user(user_id='6713656744'), Box.user(user_id='6713657944'), Box.user(user_id='6713659144'), Box.user(user_id='6713660344'), Box.user(user_id='6713661544'), Box.user(user_id='6713662744'))) total_files = 10000 max_threads = 60 target_folder = 61251909299 def upload_file(filename): Box.as_user(next(API_users)).folder(folder_id=target_folder).upload( "/tmp/files/" + filename, file_name=filename)
def box_client(box_oauth, mock_box_network): # pylint:disable=redefined-outer-name return Client(box_oauth, network_layer=mock_box_network)
from boxsdk import JWTAuth, Client # Get config.json from box.com dev console config = JWTAuth.from_settings_file('config.json') client = Client(config) user_to_impersonate = client.user(user_id='12687310242') user_client = client.as_user(user_to_impersonate) def upload_large_file(file, size): print("sdk_jwt") file_size = size upload_session = client.as_user(user_to_impersonate).folder( '112221918845').create_upload_session(file_size, file.filename) print('Created upload session {0} with chunk size of {1} bytes'.format( upload_session.id, upload_session.part_size)) chunked_uploader = upload_session.get_chunked_uploader(file, file_size) uploaded_file = chunked_uploader.start() print('File "{0}" uploaded to Box with file ID {1}'.format( uploaded_file.name, uploaded_file.id)) print(uploaded_file) return "done" # To commit the file - looks like it's not needed # print(chunked_uploader) # sha1 = hashlib.sha1() # file_attributes = { # "description": "File has been uploaded via Chunked Upload"
def create_appuser(): service_account_auth = create_service_auth() service_account_client = Client(service_account_auth) app_user = service_account_client.user(user_id='15143905465') return app_user
def handle(self, *args, **options): def wrap_p(text): return "<p>{}</p>".format(text) MISSING_VALUES = ["", " ", ".", "NA", "N/A", "N.A."] # Box Auth config = JWTAuth.from_settings_file( os.path.join(settings.BASE_DIR, 'box_config.json')) client = Client(config) def recurse_items(folder_items, box_items): for item in folder_items: if type(item) is Folder: sub_folder_items = client.folder( folder_id=item.id).get_items() box_items = recurse_items(sub_folder_items, box_items) else: box_items[item.name.lower()] = item.id return box_items box_items = {} folder_items = client.folder(folder_id="93089112686").get_items() box_items = recurse_items(folder_items, box_items) # Create IA if not already present dataset_listing = DataSetListing.objects.live().first() if not dataset_listing: data_section = DataSectionPage.objects.live().first() if not data_section: home_page = HomePage.objects.live().first() data_section = DataSectionPage(title="Data") home_page.add_child(instance=data_section) data_section.save_revision().publish() dataset_listing = DataSetListing(title="Datasets") data_section.add_child(instance=dataset_listing) dataset_listing.save_revision().publish() # Fetch data and parse source_csv_url = "https://docs.google.com/spreadsheets/d/1pDbdncnm1TF41kJJX2WjZ2Wq9juOvUqU/export?format=csv&id=1pDbdncnm1TF41kJJX2WjZ2Wq9juOvUqU&gid=2086173829" dataset_csv_url = "https://docs.google.com/spreadsheets/d/1pDbdncnm1TF41kJJX2WjZ2Wq9juOvUqU/export?format=csv&id=1pDbdncnm1TF41kJJX2WjZ2Wq9juOvUqU&gid=1736754230" source_response = requests.get(source_csv_url) source_response.encoding = 'utf-8' source_text = source_response.iter_lines(decode_unicode=True) dataset_response = requests.get(dataset_csv_url) dataset_response.encoding = 'utf-8' dataset_text = dataset_response.iter_lines(decode_unicode=True) # Data sources """ (Pdb) source_dict.keys() dict_keys(['Source ID', 'Source title', 'Organisation ', 'Long description of the data source', 'Date of access', 'Link to the source', 'Geography information', 'Keyword search', 'Internal notes', 'Analyst that worked on the data', 'Licence', 'Check', 'Signed-off and ready?']) """ skip = True source_reader = csv.DictReader(source_text) for source_dict in source_reader: if skip: skip = False else: source_check = DataSource.objects.filter( source_id=source_dict['Source ID']) if not source_check and source_dict[ 'Source title'] not in MISSING_VALUES and source_dict[ 'Signed-off and ready?'].lower() == "yes": print("source: ", source_dict['Source title']) if type(source_dict['Date of access']) is not datetime: try: date_of_access = datetime.strptime( source_dict['Date of access'], "%d/%m/%Y") except (ValueError, TypeError) as e: date_of_access = None else: date_of_access = source_dict['Date of access'] # try: # tag_list = [tag.strip() for tag in source_dict['Keyword search'].split(",") if len(tag.strip()) < 100 and len(tag.strip()) > 0] # except AttributeError: # tag_list = [] new_source = DataSource( source_id=source_dict['Source ID'], title=source_dict['Source title'], organisation=source_dict['Organisation '], description=source_dict[ 'Long description of the data source'], date_of_access=date_of_access, link_to_data=source_dict['Link to the source'], geography=source_dict['Geography information'], internal_notes=source_dict['Internal notes'], licence=source_dict['Licence']) # new_source.topics.add(*tag_list) # Authors author_names = source_dict[ 'Analyst that worked on the data'] authors = [] if author_names not in MISSING_VALUES: author_names_list = [ author.strip() for author in author_names.split(",") ] for author_name in author_names_list: internal_author_page_qs = TeamMemberPage.objects.filter( name=author_name) if internal_author_page_qs: author_obj = { "type": "internal_author", "value": internal_author_page_qs.first().pk } else: author_obj = { "type": "external_author", "value": { "name": author_name, "title": "", "photograph": None, "page": "" } } authors.append(author_obj) if authors: new_source.authors = json.dumps(authors) new_source.save() # Datasets """ (Pdb) dataset_dict.keys() dict_keys(['Dataset ID', 'What is the title of the data set?', 'What DI publication is this dataset associated with?', 'What is a long description of the data set?', 'Release date?', 'Geography information', 'Geographic coding', 'Unit', 'Keyword search', 'Internal notes', 'Analyst that worked on the data', 'Licence', 'Suggested citation', 'Source 1', 'Source 2 (optional)', 'Source 3 (optional)', 'Source 4 (optional)', 'Source 5 (optional)', 'Source 6 (optional)', 'Source 7 (optional)', 'Source 8 (optional)', 'Source 9 (optional)', 'Done', 'File location Excel', 'File name Excel', 'File location csv', 'File name csv', 'File notes', 'Signed-off and ready?']) """ source_keys = [ 'Source 1', 'Source 2 (optional)', 'Source 3 (optional)', 'Source 4 (optional)', 'Source 5 (optional)', 'Source 6 (optional)', 'Source 7 (optional)', 'Source 8 (optional)', 'Source 9 (optional)' ] skip = True dataset_reader = csv.DictReader(dataset_text) for dataset_dict in dataset_reader: if skip: skip = False else: dataset_check = DatasetPage.objects.filter( dataset_id=dataset_dict['Dataset ID']) if not dataset_check and dataset_dict[ 'What is the title of the data set?'] not in MISSING_VALUES and dataset_dict[ 'Signed-off and ready?'].lower() == "yes": print("Dataset: ", dataset_dict['What is the title of the data set?']) if type(dataset_dict['Release date?']) is not datetime: try: release_date = datetime.strptime( dataset_dict['Release date?'], "%d/%m/%Y") except (ValueError, TypeError) as e: release_date = datetime.now() else: release_date = dataset_dict['Release date?'] meta_json = [] if dataset_dict[ 'What is a long description of the data set?'] not in MISSING_VALUES: meta_json.append({ "type": "description", "value": wrap_p(dataset_dict[ 'What is a long description of the data set?']) }) if dataset_dict[ 'Geography information'] not in MISSING_VALUES: meta_json.append({ "type": "geography", "value": wrap_p(dataset_dict['Geography information']) }) if dataset_dict['Geographic coding'] not in MISSING_VALUES: meta_json.append({ "type": "geographic_coding", "value": wrap_p(dataset_dict['Geographic coding']) }) if dataset_dict['Unit'] not in MISSING_VALUES: meta_json.append({ "type": "unit", "value": wrap_p(dataset_dict['Unit']) }) if dataset_dict['Internal notes'] not in MISSING_VALUES: meta_json.append({ "type": "internal_notes", "value": wrap_p(dataset_dict['Internal notes']) }) if dataset_dict['Licence'] not in MISSING_VALUES: meta_json.append({ "type": "licence", "value": wrap_p(dataset_dict['Licence']) }) if dataset_dict[ 'Suggested citation'] not in MISSING_VALUES: meta_json.append({ "type": "citation", "value": wrap_p(dataset_dict['Suggested citation']) }) new_dataset = DatasetPage( title=dataset_dict[ 'What is the title of the data set?'], dataset_id=dataset_dict['Dataset ID'], dataset_title=dataset_dict[ 'What is the title of the data set?'], release_date=release_date, meta_data=json.dumps(meta_json)) # try: # tag_list = [tag.strip() for tag in dataset_dict['Keyword search'].split(",") if len(tag.strip()) < 100 and len(tag.strip()) > 0] # except AttributeError: # tag_list = [] # new_dataset.topics.add(*tag_list) dataset_listing.add_child(instance=new_dataset) # Authors author_names = dataset_dict[ 'Analyst that worked on the data'] authors = [] if author_names not in MISSING_VALUES: author_names_list = [ author.strip() for author in author_names.split(",") ] for author_name in author_names_list: internal_author_page_qs = TeamMemberPage.objects.filter( name=author_name) if internal_author_page_qs: author_obj = { "type": "internal_author", "value": internal_author_page_qs.first().pk } else: author_obj = { "type": "external_author", "value": { "name": author_name, "title": "", "photograph": None, "page": "" } } authors.append(author_obj) if authors: new_dataset.authors = json.dumps(authors) new_dataset.save_revision().publish() if dataset_dict[ 'What DI publication is this dataset associated with?'] not in MISSING_VALUES: pub_titles = [ pub_title.strip() for pub_title in dataset_dict[ 'What DI publication is this dataset associated with?'] .split("|") ] for pub_title in pub_titles: pub_check = Page.objects.filter( title=pub_title).live() if pub_check: pub_page = pub_check.first().specific if isinstance(pub_page, PublicationPage): PublicationPageDataset( item=pub_page, dataset=new_dataset).save() elif isinstance(pub_page, PublicationSummaryPage): PublicationSummaryPageDataset( item=pub_page, dataset=new_dataset).save() elif isinstance(pub_page, PublicationChapterPage): PublicationChapterPageDataset( item=pub_page, dataset=new_dataset).save() elif isinstance(pub_page, PublicationAppendixPage): PublicationAppendixPageDataset( item=pub_page, dataset=new_dataset).save() elif isinstance(pub_page, LegacyPublicationPage): LegacyPublicationPageDataset( item=pub_page, dataset=new_dataset).save() elif isinstance(pub_page, ShortPublicationPage): ShortPublicationPageDataset( item=pub_page, dataset=new_dataset).save() for source_key in source_keys: key_val = dataset_dict[source_key] if key_val not in MISSING_VALUES: try: related_datasource = DataSource.objects.get( title=key_val) DataSetSource( page=new_dataset, source=related_datasource).save() except DataSource.DoesNotExist: pass if dataset_dict["File name Excel"] not in MISSING_VALUES: item_name = dataset_dict["File name Excel"].lower( ) + ".xlsx" try: item_id = box_items[item_name] f = BytesIO() client.file(item_id).download_to(f) doc = Document( title=dataset_dict["File name Excel"]) doc.file.save(item_name, File(f), save=True) doc.save() download = DatasetDownloads( page=new_dataset, title=dataset_dict["File name Excel"], file=doc) download.save() except KeyError: self.stdout.write( self.style.WARNING(item_name + " not found.")) if dataset_dict["File name csv"] not in MISSING_VALUES: item_name = dataset_dict["File name csv"].lower( ) + ".csv" try: item_id = box_items[item_name] f = BytesIO() client.file(item_id).download_to(f) doc = Document(title=dataset_dict["File name csv"]) doc.file.save(item_name, File(f), save=True) doc.save() download = DatasetDownloads( page=new_dataset, title=dataset_dict["File name csv"], file=doc) download.save() except KeyError: self.stdout.write( self.style.WARNING(item_name + " not found.")) self.stdout.write(self.style.SUCCESS('Called successfully'))
import sys from boxsdk import OAuth2, Client # handle auth and create box api client TOKEN = 'VY7Hizc9HrkYG4KzqOfRiwamAtv1vC0C' auth = OAuth2(None, None, access_token=TOKEN) box = Client(auth) # get user details from box me = box.user().get() # print function writes to screen # here we write the login attribute print('logged in to box as', me.login) # print api response in dictionary print(me.response_object) # 0 is always the user's root folder MY_FOLDER_ID = 0 # sys.argv is list of command line arguments # len function returns count of list items # get second command line arg if present # indexes are 0-based if len(sys.argv) > 1: MY_FOLDER_ID = sys.argv[1] # get folders details from box my_folder = box.folder(MY_FOLDER_ID).get() # print info about folder