return None
    if not 'entries' in folder_list['item_collection']:
        return None

    for entry in folder_list['item_collection']['entries']:
        if 'name' in entry and 'type' in entry:
            if entry['name'] == item_name and entry['type'] == item_type:
                return entry['id']

    return None

# Authentication from settings file
box_auth = JWTAuth.from_settings_file(BOX_CONFIG)

# Get auth client
box_client = Client(box_auth)

# Loop through the arguments downloading the files
for idx in range(1, argc):
    # Get the filename to download and the path
    file_path = sys.argv[idx]
    filefolder, filename = os.path.split(file_path)
    if not filename:
        print("An invalid file download was requested: '" + file_path + "'")
        continue

    # Find the ID of the folder the file is to be in
    folder_id = "0"
    if filefolder:
        parts = filefolder.split('/')
        for one_folder in parts:
Exemplo n.º 2
0
    'EM': 'EM',
    'PIPELINE': 'PI'
},
          inplace=True)
#df['folderid'] = ''

root_mt = df.dropna(subset=['MT'])
root_em = df.dropna(subset=['EM'])
root_pi = df.dropna(subset=['PI'])
print(root_mt.count())
print(root_em.count())
print(root_pi.count())
#root_dowload = pd.DataFrame(columns = ['Archivo_planilla','Archivo_box','Carpeta'])
root_box = pd.DataFrame(columns=['Archivo_box', 'Nombre', 'Id', 'Carpeta'])

client = Client(oauth)
#root_folder = client.folder(folder_id='0')
items = client.folder(folder_id='134563649698').get_items()

#for archivo in root_mt["name"]:
#    largo = len(archivo)

for item in items:
    #print('{0} {1} FOLDER "{2}"'.format(item.type.capitalize(), item.id, item))
    sub_items = client.folder(folder_id=item.id).get_items()
    for sub_item in sub_items:
        print('{0} {1} SUB "{2}"'.format(sub_item.type.capitalize(),
                                         sub_item.id, sub_item.name))
        new_row_box = {
            'Archivo_box': sub_item.name,
            'Nombre': sub_item.name[0:25],
Exemplo n.º 3
0
def sync_module(Lochness: 'lochness.config', subject: 'subject.metadata',
                module_name: 'box.module_name', dry: bool):
    '''Sync box data for the subject'''

    # only the module_name string without 'box.'
    module_basename = module_name.split('.')[1]

    # delete on success
    delete = delete_on_success(Lochness, module_basename)
    logger.debug(f'delete_on_success for {module_basename} is {delete}')

    for bx_sid in subject.box[module_name]:
        logger.debug(f'exploring {subject.study}/{subject.id}')
        _passphrase = keyring.passphrase(Lochness, subject.study)
        enc_key = enc.kdf(_passphrase)

        client_id, client_secret, api_token = keyring.box_api_token(
            Lochness, module_name)

        # box authentication
        auth = OAuth2(
            client_id=client_id,
            client_secret=client_secret,
            access_token=api_token,
        )
        client = Client(auth)

        bx_base = base(Lochness, module_basename)

        # get the id of the bx_base path in box
        bx_base_obj = get_box_object_based_on_name(client, bx_base, '0')

        if bx_base_obj == None:
            logger.debug('Root of the box is not found')
            continue

        # loop through the items defined for the BOX data
        for datatype, products in iter(
                Lochness['box'][module_basename]['file_patterns'].items()):
            subject_obj = get_box_object_based_on_name(client, bx_sid,
                                                       bx_base_obj.id)

            if subject_obj == None:
                logger.debug(f'{bx_sid} is not found under {bx_base_obj}')
                continue

            datatype_obj = get_box_object_based_on_name(
                client, datatype, subject_obj.id)

            # full path
            bx_head = join(bx_base, datatype, bx_sid)

            logger.debug('walking %s', bx_head)

            # if the directory is empty
            if datatype_obj == None:
                continue

            # walk through the root directory
            for root, dirs, files in walk_from_folder_object(
                    bx_head, datatype_obj):

                for box_file_object in files:
                    bx_tail = join(basename(root), box_file_object.name)
                    product = _find_product(bx_tail, products, subject=bx_sid)
                    if not product:
                        continue

                    protect = product.get('protect', False)
                    output_base = subject.protected_folder \
                                  if protect else subject.general_folder

                    encrypt = product.get('encrypt', False)
                    key = enc_key if encrypt else None

                    processed = product.get('processed', False)

                    # For DPACC, get processed from the config.yml
                    output_base = tree.get(datatype,
                                           output_base,
                                           processed=processed,
                                           BIDS=Lochness['BIDS'])

                    compress = product.get('compress', False)

                    save(box_file_object, (root, box_file_object.name),
                         output_base,
                         key=key,
                         compress=compress,
                         delete=False,
                         dry=False)
Exemplo n.º 4
0
from boxsdk import OAuth2, Client

auth = OAuth2(
    client_id='c0hjuh3tjr90sloycc4g0jcdvid1yjs2',
    client_secret='w5oJLroCuN4TQ8bEjmJycwFmOexZXB2g',
    access_token='pQS9qEYWw6aelLvibViRpxk6bigoP7Ji',
)
client = Client(auth)
Exemplo n.º 5
0
 def _authenticate(self, developer_token):
     # https://*.app.box.com/developers/console
     oauth2 = OAuth2(client_id=self.box_client_id,
                     client_secret=self.box_client_secret,
                     access_token=developer_token)
     self.box = Client(oauth2)
Exemplo n.º 6
0
        print '\x1b[36m{} {} {}\x1b[0m'.format(method, url, pformat(kwargs))
        response = super(LoggingNetwork, self).request(method, url,
                                                       access_token, **kwargs)
        if response.ok:
            print '\x1b[32m{}\x1b[0m'.format(response.content)
        else:
            print '\x1b[31m{}\n{}\n{}\x1b[0m'.format(
                response.status_code,
                response.headers,
                pformat(response.content),
            )
        return response


oauth2 = OAuth2(CLIENT_ID, CLIENT_SECRET, access_token=ACCESS_TOKEN)
client = Client(oauth2, LoggingNetwork())


def upload(filepath, typechange=None, headers=True, namefile='output'):
    """
	upload is a function that uploads files directly from local to the Box cloud
	Inputs:
		filepath: string path to the filepath
		typechange: string such as "html", "xml" or "json" denoting the type of file to be saved as
		headers: boolean if the file has a header or not
		namefile: string of the file name to be saved as 
	"""
    if typechange == None:
        box_file = client.folder('0').upload(filepath)
    elif headers == False:
        #defaults to abc... if no headers given
Exemplo n.º 7
0
def jwt(cred_fp=DEFAULT_CONFIG):
    # Load JWT config file from default location
    config = JWTAuth.from_settings_file(os.path.expanduser(cred_fp))
    return Client(config)
Exemplo n.º 8
0
#!/usr/bin/python3

import threading
import os, random
import time
from boxsdk import JWTAuth
from boxsdk import Client
from queue import Queue
from itertools import cycle

sdk = JWTAuth.from_settings_file('./38487735_v17qtrdu_config.json')

Box = Client(sdk)
API_users = cycle(
    (Box.user(user_id='6713645944'), Box.user(user_id='6713647144'),
     Box.user(user_id='6713648344'), Box.user(user_id='6713649544'),
     Box.user(user_id='6713650744'), Box.user(user_id='6713651944'),
     Box.user(user_id='6713653144'), Box.user(user_id='6713654344'),
     Box.user(user_id='6713655544'), Box.user(user_id='6713656744'),
     Box.user(user_id='6713657944'), Box.user(user_id='6713659144'),
     Box.user(user_id='6713660344'), Box.user(user_id='6713661544'),
     Box.user(user_id='6713662744')))

total_files = 10000
max_threads = 60
target_folder = 61251909299


def upload_file(filename):
    Box.as_user(next(API_users)).folder(folder_id=target_folder).upload(
        "/tmp/files/" + filename, file_name=filename)
Exemplo n.º 9
0
def box_client(box_oauth, mock_box_network):
    # pylint:disable=redefined-outer-name
    return Client(box_oauth, network_layer=mock_box_network)
Exemplo n.º 10
0
from boxsdk import JWTAuth, Client

# Get config.json from box.com dev console
config = JWTAuth.from_settings_file('config.json')

client = Client(config)

user_to_impersonate = client.user(user_id='12687310242')
user_client = client.as_user(user_to_impersonate)


def upload_large_file(file, size):
    print("sdk_jwt")
    file_size = size
    upload_session = client.as_user(user_to_impersonate).folder(
        '112221918845').create_upload_session(file_size, file.filename)
    print('Created upload session {0} with chunk size of {1} bytes'.format(
        upload_session.id, upload_session.part_size))

    chunked_uploader = upload_session.get_chunked_uploader(file, file_size)
    uploaded_file = chunked_uploader.start()
    print('File "{0}" uploaded to Box with file ID {1}'.format(
        uploaded_file.name, uploaded_file.id))
    print(uploaded_file)
    return "done"

    # To commit the file - looks like it's not needed
    # print(chunked_uploader)
    # sha1 = hashlib.sha1()
    # file_attributes = {
    #     "description": "File has been uploaded via Chunked Upload"
Exemplo n.º 11
0
def create_appuser():
    service_account_auth = create_service_auth()
    service_account_client = Client(service_account_auth)
    app_user = service_account_client.user(user_id='15143905465')
    return app_user
Exemplo n.º 12
0
    def handle(self, *args, **options):
        def wrap_p(text):
            return "<p>{}</p>".format(text)

        MISSING_VALUES = ["", " ", ".", "NA", "N/A", "N.A."]

        # Box Auth
        config = JWTAuth.from_settings_file(
            os.path.join(settings.BASE_DIR, 'box_config.json'))
        client = Client(config)

        def recurse_items(folder_items, box_items):
            for item in folder_items:
                if type(item) is Folder:
                    sub_folder_items = client.folder(
                        folder_id=item.id).get_items()
                    box_items = recurse_items(sub_folder_items, box_items)
                else:
                    box_items[item.name.lower()] = item.id
            return box_items

        box_items = {}
        folder_items = client.folder(folder_id="93089112686").get_items()
        box_items = recurse_items(folder_items, box_items)

        # Create IA if not already present
        dataset_listing = DataSetListing.objects.live().first()
        if not dataset_listing:
            data_section = DataSectionPage.objects.live().first()
            if not data_section:
                home_page = HomePage.objects.live().first()
                data_section = DataSectionPage(title="Data")
                home_page.add_child(instance=data_section)
                data_section.save_revision().publish()
            dataset_listing = DataSetListing(title="Datasets")
            data_section.add_child(instance=dataset_listing)
            dataset_listing.save_revision().publish()

        # Fetch data and parse
        source_csv_url = "https://docs.google.com/spreadsheets/d/1pDbdncnm1TF41kJJX2WjZ2Wq9juOvUqU/export?format=csv&id=1pDbdncnm1TF41kJJX2WjZ2Wq9juOvUqU&gid=2086173829"
        dataset_csv_url = "https://docs.google.com/spreadsheets/d/1pDbdncnm1TF41kJJX2WjZ2Wq9juOvUqU/export?format=csv&id=1pDbdncnm1TF41kJJX2WjZ2Wq9juOvUqU&gid=1736754230"

        source_response = requests.get(source_csv_url)
        source_response.encoding = 'utf-8'
        source_text = source_response.iter_lines(decode_unicode=True)
        dataset_response = requests.get(dataset_csv_url)
        dataset_response.encoding = 'utf-8'
        dataset_text = dataset_response.iter_lines(decode_unicode=True)

        # Data sources
        """
        (Pdb) source_dict.keys()
        dict_keys(['Source ID', 'Source title', 'Organisation ', 'Long description of the data source', 'Date of access', 'Link to the source', 'Geography information', 'Keyword search', 'Internal notes', 'Analyst that worked on the data', 'Licence', 'Check', 'Signed-off and ready?'])
        """
        skip = True
        source_reader = csv.DictReader(source_text)
        for source_dict in source_reader:
            if skip:
                skip = False
            else:
                source_check = DataSource.objects.filter(
                    source_id=source_dict['Source ID'])
                if not source_check and source_dict[
                        'Source title'] not in MISSING_VALUES and source_dict[
                            'Signed-off and ready?'].lower() == "yes":
                    print("source: ", source_dict['Source title'])
                    if type(source_dict['Date of access']) is not datetime:
                        try:
                            date_of_access = datetime.strptime(
                                source_dict['Date of access'], "%d/%m/%Y")
                        except (ValueError, TypeError) as e:
                            date_of_access = None
                    else:
                        date_of_access = source_dict['Date of access']

                    # try:
                    #     tag_list = [tag.strip() for tag in source_dict['Keyword search'].split(",") if len(tag.strip()) < 100 and len(tag.strip()) > 0]
                    # except AttributeError:
                    #     tag_list = []
                    new_source = DataSource(
                        source_id=source_dict['Source ID'],
                        title=source_dict['Source title'],
                        organisation=source_dict['Organisation '],
                        description=source_dict[
                            'Long description of the data source'],
                        date_of_access=date_of_access,
                        link_to_data=source_dict['Link to the source'],
                        geography=source_dict['Geography information'],
                        internal_notes=source_dict['Internal notes'],
                        licence=source_dict['Licence'])
                    # new_source.topics.add(*tag_list)

                    # Authors
                    author_names = source_dict[
                        'Analyst that worked on the data']
                    authors = []
                    if author_names not in MISSING_VALUES:
                        author_names_list = [
                            author.strip()
                            for author in author_names.split(",")
                        ]
                        for author_name in author_names_list:
                            internal_author_page_qs = TeamMemberPage.objects.filter(
                                name=author_name)
                            if internal_author_page_qs:
                                author_obj = {
                                    "type": "internal_author",
                                    "value": internal_author_page_qs.first().pk
                                }
                            else:
                                author_obj = {
                                    "type": "external_author",
                                    "value": {
                                        "name": author_name,
                                        "title": "",
                                        "photograph": None,
                                        "page": ""
                                    }
                                }
                            authors.append(author_obj)
                    if authors:
                        new_source.authors = json.dumps(authors)
                    new_source.save()

        # Datasets
        """
        (Pdb) dataset_dict.keys()
        dict_keys(['Dataset ID', 'What is the title of the data set?', 'What DI publication is this dataset associated with?', 'What is a long description of the data set?', 'Release date?', 'Geography information', 'Geographic coding', 'Unit', 'Keyword search', 'Internal notes', 'Analyst that worked on the data', 'Licence', 'Suggested citation', 'Source 1', 'Source 2 (optional)', 'Source 3 (optional)', 'Source 4 (optional)', 'Source 5 (optional)', 'Source 6 (optional)', 'Source 7 (optional)', 'Source 8 (optional)', 'Source 9 (optional)', 'Done', 'File location Excel', 'File name Excel', 'File location csv', 'File name csv', 'File notes', 'Signed-off and ready?'])
        """
        source_keys = [
            'Source 1', 'Source 2 (optional)', 'Source 3 (optional)',
            'Source 4 (optional)', 'Source 5 (optional)',
            'Source 6 (optional)', 'Source 7 (optional)',
            'Source 8 (optional)', 'Source 9 (optional)'
        ]
        skip = True
        dataset_reader = csv.DictReader(dataset_text)
        for dataset_dict in dataset_reader:
            if skip:
                skip = False
            else:
                dataset_check = DatasetPage.objects.filter(
                    dataset_id=dataset_dict['Dataset ID'])
                if not dataset_check and dataset_dict[
                        'What is the title of the data set?'] not in MISSING_VALUES and dataset_dict[
                            'Signed-off and ready?'].lower() == "yes":
                    print("Dataset: ",
                          dataset_dict['What is the title of the data set?'])
                    if type(dataset_dict['Release date?']) is not datetime:
                        try:
                            release_date = datetime.strptime(
                                dataset_dict['Release date?'], "%d/%m/%Y")
                        except (ValueError, TypeError) as e:
                            release_date = datetime.now()
                    else:
                        release_date = dataset_dict['Release date?']

                    meta_json = []
                    if dataset_dict[
                            'What is a long description of the data set?'] not in MISSING_VALUES:
                        meta_json.append({
                            "type":
                            "description",
                            "value":
                            wrap_p(dataset_dict[
                                'What is a long description of the data set?'])
                        })
                    if dataset_dict[
                            'Geography information'] not in MISSING_VALUES:
                        meta_json.append({
                            "type":
                            "geography",
                            "value":
                            wrap_p(dataset_dict['Geography information'])
                        })
                    if dataset_dict['Geographic coding'] not in MISSING_VALUES:
                        meta_json.append({
                            "type":
                            "geographic_coding",
                            "value":
                            wrap_p(dataset_dict['Geographic coding'])
                        })
                    if dataset_dict['Unit'] not in MISSING_VALUES:
                        meta_json.append({
                            "type": "unit",
                            "value": wrap_p(dataset_dict['Unit'])
                        })
                    if dataset_dict['Internal notes'] not in MISSING_VALUES:
                        meta_json.append({
                            "type":
                            "internal_notes",
                            "value":
                            wrap_p(dataset_dict['Internal notes'])
                        })
                    if dataset_dict['Licence'] not in MISSING_VALUES:
                        meta_json.append({
                            "type":
                            "licence",
                            "value":
                            wrap_p(dataset_dict['Licence'])
                        })
                    if dataset_dict[
                            'Suggested citation'] not in MISSING_VALUES:
                        meta_json.append({
                            "type":
                            "citation",
                            "value":
                            wrap_p(dataset_dict['Suggested citation'])
                        })

                    new_dataset = DatasetPage(
                        title=dataset_dict[
                            'What is the title of the data set?'],
                        dataset_id=dataset_dict['Dataset ID'],
                        dataset_title=dataset_dict[
                            'What is the title of the data set?'],
                        release_date=release_date,
                        meta_data=json.dumps(meta_json))

                    # try:
                    #     tag_list = [tag.strip() for tag in dataset_dict['Keyword search'].split(",") if len(tag.strip()) < 100 and len(tag.strip()) > 0]
                    # except AttributeError:
                    #     tag_list = []
                    # new_dataset.topics.add(*tag_list)

                    dataset_listing.add_child(instance=new_dataset)

                    # Authors
                    author_names = dataset_dict[
                        'Analyst that worked on the data']
                    authors = []
                    if author_names not in MISSING_VALUES:
                        author_names_list = [
                            author.strip()
                            for author in author_names.split(",")
                        ]
                        for author_name in author_names_list:
                            internal_author_page_qs = TeamMemberPage.objects.filter(
                                name=author_name)
                            if internal_author_page_qs:
                                author_obj = {
                                    "type": "internal_author",
                                    "value": internal_author_page_qs.first().pk
                                }
                            else:
                                author_obj = {
                                    "type": "external_author",
                                    "value": {
                                        "name": author_name,
                                        "title": "",
                                        "photograph": None,
                                        "page": ""
                                    }
                                }
                            authors.append(author_obj)
                    if authors:
                        new_dataset.authors = json.dumps(authors)

                    new_dataset.save_revision().publish()

                    if dataset_dict[
                            'What DI publication is this dataset associated with?'] not in MISSING_VALUES:
                        pub_titles = [
                            pub_title.strip() for pub_title in dataset_dict[
                                'What DI publication is this dataset associated with?']
                            .split("|")
                        ]
                        for pub_title in pub_titles:
                            pub_check = Page.objects.filter(
                                title=pub_title).live()
                            if pub_check:
                                pub_page = pub_check.first().specific
                                if isinstance(pub_page, PublicationPage):
                                    PublicationPageDataset(
                                        item=pub_page,
                                        dataset=new_dataset).save()
                                elif isinstance(pub_page,
                                                PublicationSummaryPage):
                                    PublicationSummaryPageDataset(
                                        item=pub_page,
                                        dataset=new_dataset).save()
                                elif isinstance(pub_page,
                                                PublicationChapterPage):
                                    PublicationChapterPageDataset(
                                        item=pub_page,
                                        dataset=new_dataset).save()
                                elif isinstance(pub_page,
                                                PublicationAppendixPage):
                                    PublicationAppendixPageDataset(
                                        item=pub_page,
                                        dataset=new_dataset).save()
                                elif isinstance(pub_page,
                                                LegacyPublicationPage):
                                    LegacyPublicationPageDataset(
                                        item=pub_page,
                                        dataset=new_dataset).save()
                                elif isinstance(pub_page,
                                                ShortPublicationPage):
                                    ShortPublicationPageDataset(
                                        item=pub_page,
                                        dataset=new_dataset).save()

                    for source_key in source_keys:
                        key_val = dataset_dict[source_key]
                        if key_val not in MISSING_VALUES:
                            try:
                                related_datasource = DataSource.objects.get(
                                    title=key_val)
                                DataSetSource(
                                    page=new_dataset,
                                    source=related_datasource).save()
                            except DataSource.DoesNotExist:
                                pass

                    if dataset_dict["File name Excel"] not in MISSING_VALUES:
                        item_name = dataset_dict["File name Excel"].lower(
                        ) + ".xlsx"
                        try:
                            item_id = box_items[item_name]
                            f = BytesIO()
                            client.file(item_id).download_to(f)
                            doc = Document(
                                title=dataset_dict["File name Excel"])
                            doc.file.save(item_name, File(f), save=True)
                            doc.save()
                            download = DatasetDownloads(
                                page=new_dataset,
                                title=dataset_dict["File name Excel"],
                                file=doc)
                            download.save()
                        except KeyError:
                            self.stdout.write(
                                self.style.WARNING(item_name + " not found."))

                    if dataset_dict["File name csv"] not in MISSING_VALUES:
                        item_name = dataset_dict["File name csv"].lower(
                        ) + ".csv"
                        try:
                            item_id = box_items[item_name]
                            f = BytesIO()
                            client.file(item_id).download_to(f)
                            doc = Document(title=dataset_dict["File name csv"])
                            doc.file.save(item_name, File(f), save=True)
                            doc.save()
                            download = DatasetDownloads(
                                page=new_dataset,
                                title=dataset_dict["File name csv"],
                                file=doc)
                            download.save()
                        except KeyError:
                            self.stdout.write(
                                self.style.WARNING(item_name + " not found."))

        self.stdout.write(self.style.SUCCESS('Called successfully'))
Exemplo n.º 13
0
import sys
from boxsdk import OAuth2, Client

# handle auth and create box api client
TOKEN = 'VY7Hizc9HrkYG4KzqOfRiwamAtv1vC0C'
auth = OAuth2(None, None, access_token=TOKEN)
box = Client(auth)

# get user details from box
me = box.user().get()

# print function writes to screen
# here we write the login attribute
print('logged in to box as', me.login)

# print api response in dictionary
print(me.response_object)

# 0 is always the user's root folder
MY_FOLDER_ID = 0

# sys.argv is list of command line arguments
# len function returns count of list items
# get second command line arg if present
# indexes are 0-based
if len(sys.argv) > 1:
    MY_FOLDER_ID = sys.argv[1]

# get folders details from box
my_folder = box.folder(MY_FOLDER_ID).get()
# print info about folder