示例#1
0
def index(request):
    pip = os.path.join(sys.exec_prefix, 'bin', 'pip')
    if not os.path.isfile(pip):
        pip = 'pip'
    SHELL_COMMANDS = (
        ('Hostname','hostname'),
        ('hg version', 'hg id'),
        ('git version', "git log --pretty=format:'%h' -n 1"),
        ('hg branch', 'hg branch'),
        ('git branch', 'git rev-parse --abbrev-ref HEAD'),
        ('MySQL version', 'mysql --version'),
        ('Local Packages', '%s freeze -l' % pip)
    )
    SD = OrderedDict()
    for k,v in sorted(settings_list(), key=lambda x: x[0]):
        SD[k] = v
    context = RequestContext(request, {
        'args': sys.argv,
        'exe': sys.executable,
        'settings': SD,
        })

    context['versions'] = OrderedDict()
    # get versions
    curr_dir = os.path.realpath(os.path.dirname(__file__))
    for name, shell_command in SHELL_COMMANDS:
        try:
            result = utils.run_shell_command(shell_command, curr_dir)
            if result:
                if isinstance(result, list):
                    result = '<br>'.split(result)
                context['versions'][name] = result
        except:
            pass
    # machine status    
    context['machine'] = OrderedDict()
    if sys.platform == 'darwin':
        context['machine']['Uptime'] = 'not done yet on MacOS'
        context['machine']['Disk Space'] = 'not done yet on MacOS'
    elif sys.platform == 'win32':
        context['machine']['Uptime'] = 'not done yet on Windows'
        context['machine']['Disk Space'] = 'not done yet on Windows'
    else:
        context['machine']['Uptime'] = utils.server_uptime()
        context['machine']['Disk Space'] = utils.disk_usage('/')._asdict()
    if os.path.exists(settings.MEDIA_ROOT):
        context['machine']['Media Folder'] = utils.sizeof_fmt(utils.folder_size(settings.MEDIA_ROOT))

    context['stats'] = utils.get_available_stats()
    context['apps'] = [(app.__name__, ', '.join([model.__name__ for model in models])) for app, models in all_concrete_models()]
    context['relations'] = [[(model.__name__, ', '.join(['%s (%s) through %s' % (relation.__name__, relation.__module__, field.__class__.__name__)
                                                        for field, relation in relations]), app.__name__) 
                                                            for model, relations in rel_info] 
                                                                for app, rel_info in all_relations()]
    #context['rel_graph'] = 
    
    context['config_warnings'] = utils.get_configuration_warnings()

    return render_to_response('dashboard/index.html', context)
示例#2
0
 def download(self):
     work_folder = os.path.dirname(smart_str(self.resource.content_root()))
     print "- CREATING DIR..."
     subprocess.call("mkdir -vp %s" % work_folder, shell=True)
     # run downloader
     urltodownload = str(self.resource.resource_download_url)
     basetarget = str(work_folder)
     try:
         downloader(urltodownload, basetarget)
         self.resource.status = 'downloaded'
         size = folder_size(work_folder)
         self.resource.size = size
         contents = os.listdir(work_folder)
         if contents:
             self.resource.trigger = contents[0]
             self.resource.trigger_extensions = self.resource.trigger.rsplit('.')[-1]
             self.resource.resource_downloaded_file = self.resource.trigger
     except:
         self.resource.status = 'error'
         self.resource.save()
         raise
示例#3
0
 def post_save(self):         
     """
         Called when dataset save process (tar, dumps, etc.) is done
     """
     ftp_backup = self.get_option('ftp_backup', []).split(',')
     for host in ftp_backup:
         host_options = self.savior.hosts[host]
         kwargs = {
             "dataset_name": self.name,
             "local_saves_directory":self.savior.save_path, 
             "dataset_save_id":self.savior.stamp_str, 
             }
         connector = mapping.MAPPING['ftpupload'](
                 host_options=host_options,
                 **kwargs 
                 )
         connector.upload()
     keep_local_saves = self.convert_to_boolean(self.get_option('keep_local_saves'))
     self.size += folder_size(self.current_save_directory, human=False)
     if not keep_local_saves:
         self.remove_local_save()
     self.remove_old_saves()
     return True
示例#4
0
def upload_manager():
    global config, default_check_interval
    try:
        default_check_interval = config['local_folder_check_interval']
        logger.debug("Started upload manager for %r", config['local_folder'])
        while True:
            time.sleep(60 * config['local_folder_check_interval'])

            # restore check interval to original after an extended sleep after a rate limit ban (25hrs)
            if config['local_folder_check_interval'] == 1500:
                config['local_folder_check_interval'] = default_check_interval
                logger.info(
                    "Restored local_folder_check_interval to %d minutes after an extended sleep (25 hours) due "
                    "to the last upload being cancelled due to rate limits!",
                    config['local_folder_check_interval'])
                if config['pushover_app_token'] and config[
                        'pushover_user_token']:
                    utils.send_pushover(
                        config['pushover_app_token'],
                        config['pushover_user_token'],
                        "local_folder_check_interval has been reset back to %d minutes after a 25 hour "
                        "sleep due to ratelimits!" %
                        config['local_folder_check_interval'])

            logger.debug("Checking size of %r", config['local_folder'])
            size = utils.folder_size(config['local_folder'],
                                     config['du_excludes'])
            if size is not None and size > 0:
                if size >= config['local_folder_size']:
                    logger.debug("Local folder has %d gigabytes, %d too many!",
                                 size, size - config['local_folder_size'])

                    # check if files are opened, skip this upload if so
                    opened_files = utils.opened_files(config['local_folder'],
                                                      config['lsof_excludes'])
                    if opened_files:
                        for item in opened_files:
                            logger.debug("File is being accessed: %r", item)
                        logger.debug(
                            "Local folder has %d file(s) open, skipping upload until next check...",
                            len(opened_files))
                        # send skip notification
                        if config['pushover_app_token'] and config[
                                'pushover_user_token']:
                            utils.send_pushover(
                                config['pushover_app_token'],
                                config['pushover_user_token'],
                                "Upload process of %d gigabytes temporarily skipped.\n"
                                "%d file(s) are currently being accessed." %
                                (size, len(opened_files)))
                        continue

                    # remove hidden before upload
                    # (we don't want to delete a hidden from remote, after already replacing it)
                    logger.debug("Purging _HIDDEN~ before upload commences")
                    remove_hidden()

                    # send start notification
                    if config['pushover_app_token'] and config[
                            'pushover_user_token']:
                        utils.send_pushover(
                            config['pushover_app_token'],
                            config['pushover_user_token'],
                            "Upload process started. %d gigabytes to upload." %
                            size)

                    # rclone move local_folder to local_remote
                    logger.debug("Moving data from %r to %r...",
                                 config['local_folder'],
                                 config['local_remote'])
                    upload_cmd = utils.rclone_move_command(
                        config['local_folder'], config['local_remote'],
                        config['rclone_transfers'], config['rclone_checkers'],
                        config['rclone_bwlimit'], config['rclone_excludes'],
                        config['rclone_chunk_size'], config['dry_run'])
                    logger.debug("Using: %r", upload_cmd)

                    start_time = timeit.default_timer()
                    utils.run_command(upload_cmd, config)
                    time_taken = timeit.default_timer() - start_time
                    logger.debug("Moving finished in %s",
                                 utils.seconds_to_string(time_taken))

                    # remove empty directories
                    if len(config['rclone_remove_empty_on_upload']):
                        time.sleep(5)
                        utils.remove_empty_directories(config)

                    new_size = utils.folder_size(config['local_folder'],
                                                 config['du_excludes'])
                    logger.debug("Local folder is now left with %d gigabytes",
                                 new_size)

                    # send finish notification
                    if config['pushover_app_token'] and config[
                            'pushover_user_token']:
                        utils.send_pushover(
                            config['pushover_app_token'],
                            config['pushover_user_token'],
                            "Upload process finished in %s. %d gigabytes left over."
                            % (utils.seconds_to_string(time_taken), new_size))

                else:
                    logger.debug(
                        "Local folder is still under the max size by %d gigabytes",
                        config['local_folder_size'] - size)

    except Exception as ex:
        logger.exception("Exception occurred: ")
示例#5
0
    def handle(self, *args, **options):
        # REGISTERING SOURCE
        source, created = Source.objects.get_or_create(pk=SOURCE_ID, url=SOURCE_URL, slug=SOURCE_SLUG, name=SOURCE_NAME)
        logger.info("SOURCE: %s, Created: %s " % (source, created))
        # pagesets as arguments
        sync = options.get('sync')
        get = options.get('get')
        nodownload = options.get('nodownload')
        force_download = options.get('force_download')
        range_values = options.get('range_values')
        # GET SPECIFIC ITEMS, AND DO SOMETHING
        if sync:
            if args:
                print "ARGS:",args
                user = args[0]
                language_code = YOUTUBE_USERS[user]
                language,created = Language.objects.get_or_create(code=language_code)
            # try to get the source from database. this will create
            source, created = Source.objects.get_or_create(pk=SOURCE_ID, url=SOURCE_URL, slug=SOURCE_SLUG, name=SOURCE_NAME)
            print "Source created?",created
            # get total of videos per user:
            try:
                logger.info("USER: %s, LANGUAGE: %s" % (user, language))
                print "GETTING TOTAL OF VIDEOS..."
                BASE_URL = "https://gdata.youtube.com/feeds/api/users/%s/uploads" % user
                f = urllib2.urlopen(BASE_URL)
                data = f.read()
                f.close()
                p = parseString(data)
                a = p.getElementsByTagName('openSearch:totalResults')
                try:
                    total_items = int(a[0].childNodes[0].data)
                    logger.info("TOTAL VIDEOS: %d" % total_items)
                    # loop in all items
                    for index in range(1,total_items,50):#[0:1]:
                        logger.info("ITEM INDEX ID: %d" % index)
                        MOD_URL = BASE_URL + "?start-index=" + str(index) + "&max-results=50"
                        logger.info("HITTING: %s" % MOD_URL)
                        f = urllib2.urlopen(MOD_URL)
                        data = f.read()
                        f.close()
                        p = parseString(data)
                        urls = []
                        # debug
                        print "URLS"
                        for entry in p.getElementsByTagName("entry"):
                            print entry.getElementsByTagName('id')[0].childNodes[0].data
                        for entry in p.getElementsByTagName("entry"):
                            url = entry.getElementsByTagName('id')[0].childNodes[0].data
                            title = entry.getElementsByTagName('title')[0].childNodes[0].data
                            youtubeid = url.split("/")[-1]
                            youtube_url = "http://www.youtube.com/watch?v=%s" % youtubeid
                            logger.info("URL to HIT: %s" % youtube_url)
                            # get or create resource
                            resource,created = Resource.objects.get_or_create(
                                resource_reference_string=youtubeid, source=source, resource_url=youtube_url, language=language
                            )
                            resource.category = Category.objects.filter(code__in=['video', 'video-class'])
                            if not os.path.isdir(resource.content_root_path()):
                                try:
                                    os.makedirs(resource.content_root_path())
                                except:
                                    print "ERROR! CANT CREATE %s!" %  resource.content_root_path()
                                    raise

                            logger.info("GRID: %s, CREATED: %s STATUS: %s" % (resource.id, created, resource.status))
                            if resource.status == "installed":
                                logger.info("installed. passing")
                            else:                                
                                #get more data from youtube
                                json_url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2&alt=jsonc" % youtubeid
                                request = urllib2.urlopen(json_url)
                                json_data = json.load(request)
                                description = json_data['data'].get('description')
                                duration = json_data['data'].get('duration')
                                resource_pageviews = json_data['data'].get('viewCount')
                                #tags = ', '.join(json_data['data'].get('tags'))
                                tags = ''
                                # download using : https://github.com/NFicano/python-youtube-download
                                if downloader == "youtube.py":
                                    yt = youtube.YouTube()
                                    yt.url = youtube_url
                                    print "URL",yt.url
                                    yt.filename = youtubeid
                                    yt.filter("mp4")[0].download(resource.content_root_path(), youtubeid)
                                    resource.trigger = "%s.mp4" % yt.filename
                                    reload(youtube)
                                else:
                                    dlcmd = 'python %s/youtube-dl.py -c --write-info-json --write-description -f 18 %s' % (settings.INSTANCE(), youtube_url)
                                    logger.info("COMMAND: %s" % dlcmd)
                                    resource.create_content_root()
                                    os.chdir(resource.content_root_path())
                                    try:
                                        p = subprocess.call(dlcmd, shell=True)
                                        resource.status = "installed"
                                        resource.enabled = True
                                        resource.trigger = "%s.%s" % (youtubeid, "mp4")
                                    except:
                                        resource.enabled = False
                                        resource.status = "error"
                                resource.tags = tags
                                resource.title = title
                                resource.author = "http://www.youtube.com/user/%s" % user
                                resource.duration = duration
                                resource.size = folder_size(resource.content_root_path())
                                resource.resource_pageviews = resource_pageviews
                                resource.save()
                                # generate thumbs
                                resource.generate_thumb()
                except:
                    print "ERROR!"
                    raise

            except:
                raise
                print "ERROR, USER NOT LISTED ON SCRIPT"
                print "OPTIONS ARE --sync: %s" % ", ".join(YOUTUBE_USERS)
        else:
            print "OPTIONS ARE --sync: %s" % ", ".join(YOUTUBE_USERS) 
            
            
            
 def handle(self, *args, **options):
     # pagesets as arguments
     if args:
         try:
             start,finish = args[0].split(',')
         except:
             pass
     else:
         start,finish = 1,SOURCE_TOTAL_PAGES
     sync = options.get('sync')
     get = options.get('get')
     nodownload = options.get('nodownload')
     force_download = options.get('force_download')
     range_values = options.get('range_values')
     if get:
         grids = get[0].split(",")
         for grid in grids:
             print "GRID:",grid
             resource = Resource.objects.get(pk=grid)
             resourceitem = PortalDoProfessorItem(resource, False)
             print "GRID TITLE: %s" % resourceitem.title
             resourceitem.download()                
     if sync:
         # try to get the source from database
         source, created = Source.objects.get_or_create(pk=SOURCE_ID, url=SOURCE_URL, slug=SOURCE_SLUG, name=SOURCE_NAME)
         logger.info("Source created? %s" %created)
         print "TOTAL PAGES: %s" % SOURCE_TOTAL_PAGES
         all_pages = range(SOURCE_TOTAL_PAGES)
         #all_pages.reverse()
         for page in all_pages[int(start):int(finish)]:
             logger.info("PAGE %s" % page)
             url = "http://portaldoprofessor.mec.gov.br/recursos.html?pagina=%s&tamanhoPagina=%s&ajax" % (page, SOURCE_ITEMS_PER_PAGE)
             logger.info("hitting %s" % url)
             f = urllib.urlopen(url)
             s = f.read()
             f.close()
             logger.info("parsing...")
             print "parsing..."
             soup = BeautifulSoup(s)
             in_page_items = len(soup.findAll('tr'))
             logger.info("IN_PAGE_ITEMS: %s" % in_page_items)
             # for each individual resource
             i = 0
             for resource_item in range(1,in_page_items):
                 logger.info("#######"*4)
                 try:
                     id = soup('tr')[resource_item].first('a').attrs[0][1].split('=')[1]
                     cat = soup('tr')[resource_item].findAll('img')[0].attrs[0][1].split("/")[1].split("_")[1].split(".")[0]
                 except:
                     id = "error%s" % i
                     i += 1
                     cat = ''
                 # resource informations
                 resource_url = "%sfichaTecnica.html?id=%s" % (SOURCE_URL, id)
                 resource,created = Resource.objects.get_or_create(
                     resource_reference_string=id, source=source, resource_url=resource_url
                 )
                 first_status = resource.status
                 logger.info("Created? %s" % created)
                 logger.info("DBITEM? %s" % resource.pk)
                 logger.info("PAGE? %s" % page)
                 logger.info("FIRST STATUS: %s" % first_status)
                 try:
                     category_object = Category.objects.get(pk=CATEGORY_DICT[cat])
                 except:
                     category_object = ""
                 if resource.status != 'installed' and resource.status != 'downloaded' and resource.status != 'error':
                     resource.status = "processing"
                 # START CLASS
                 r = PortalDoProfessorItem(resource, created)
                 r.parse()
                 logger.info("TITLE: %s" % r.title)
                 try:
                     r.resource.category.add(category_object)
                 except:
                     pass
                 try:
                     r.save()
                 except Exception, e:
                     logger.error('ERROR PARSING ID: %d', r.resource.pk)
                     r.resource.status = 'error'
                     logger.error('EXCEPTION: %s', e)
                     # even here the tag field can be truncated
                     # and break the save
                     try:
                         r.save()
                     except Exception, e:
                         logger.error('EXCEPTION: %s', e)
                         r.resource.tags = ''
                         try:
                             r.resource.save()
                         except:
                             pass
                 if nodownload:
                     logger.info("NOT DOWNLOADING! STATUS: %s" % r.resource.status)
                 else:
                     if force_download or first_status != 'downloaded' and first_status != 'installed' and first_status != 'error':
                         try:
                             logger.info("FORCING DOWNLOAD? %s, FIRST STATUS: %s" % (force_download, first_status))
                             r.download()
                             r.resource.status = 'downloaded'
                             r.resource.save()
                             r.finish()
                         except Exception, e:
                             logger.error("ERROR DOWNLOADING")
                             logger.error('EXCEPTION: %s', e)
                             r.resource.status = 'error'
                             try:
                                 r.resource.save()
                             except Exception, e:
                                 logger.error('EXCEPTION: %s', e)
                                 logger.error("DEAD END")
                                 pass
                     else:
                         logger.info("-- CONTENT ALREADY MARKED AS DOWNLOADED")
                         r.size = folder_size(r.resource.content_root())
                         try:
                             r.save()
                         except: 
                             pass
 def handle(self, *args, **options):
     # pagesets as arguments
     if args:
         try:
             start, finish = args[0].split(',')
         except:
             pass
     else:
         start, finish = 1, SOURCE_TOTAL_PAGES
     sync = options.get('sync')
     get = options.get('get')
     nodownload = options.get('nodownload')
     force_download = options.get('force_download')
     range_values = options.get('range_values')
     if get:
         grids = get[0].split(",")
         for grid in grids:
             print "GRID:", grid
             resource = Resource.objects.get(pk=grid)
             resourceitem = PortalDoProfessorItem(resource, False)
             print "GRID TITLE: %s" % resourceitem.title
             resourceitem.download()
     if sync:
         # try to get the source from database
         source, created = Source.objects.get_or_create(pk=SOURCE_ID,
                                                        url=SOURCE_URL,
                                                        slug=SOURCE_SLUG,
                                                        name=SOURCE_NAME)
         logger.info("Source created? %s" % created)
         print "TOTAL PAGES: %s" % SOURCE_TOTAL_PAGES
         all_pages = range(SOURCE_TOTAL_PAGES)
         #all_pages.reverse()
         for page in all_pages[int(start):int(finish)]:
             logger.info("PAGE %s" % page)
             url = "http://portaldoprofessor.mec.gov.br/recursos.html?pagina=%s&tamanhoPagina=%s&ajax" % (
                 page, SOURCE_ITEMS_PER_PAGE)
             logger.info("hitting %s" % url)
             f = urllib.urlopen(url)
             s = f.read()
             f.close()
             logger.info("parsing...")
             print "parsing..."
             soup = BeautifulSoup(s)
             in_page_items = len(soup.findAll('tr'))
             logger.info("IN_PAGE_ITEMS: %s" % in_page_items)
             # for each individual resource
             i = 0
             for resource_item in range(1, in_page_items):
                 logger.info("#######" * 4)
                 try:
                     id = soup('tr')[resource_item].first(
                         'a').attrs[0][1].split('=')[1]
                     cat = soup('tr')[resource_item].findAll(
                         'img')[0].attrs[0][1].split("/")[1].split(
                             "_")[1].split(".")[0]
                 except:
                     id = "error%s" % i
                     i += 1
                     cat = ''
                 # resource informations
                 resource_url = "%sfichaTecnica.html?id=%s" % (SOURCE_URL,
                                                               id)
                 resource, created = Resource.objects.get_or_create(
                     resource_reference_string=id,
                     source=source,
                     resource_url=resource_url)
                 first_status = resource.status
                 logger.info("Created? %s" % created)
                 logger.info("DBITEM? %s" % resource.pk)
                 logger.info("PAGE? %s" % page)
                 logger.info("FIRST STATUS: %s" % first_status)
                 try:
                     category_object = Category.objects.get(
                         pk=CATEGORY_DICT[cat])
                 except:
                     category_object = ""
                 if resource.status != 'installed' and resource.status != 'downloaded' and resource.status != 'error':
                     resource.status = "processing"
                 # START CLASS
                 r = PortalDoProfessorItem(resource, created)
                 r.parse()
                 logger.info("TITLE: %s" % r.title)
                 try:
                     r.resource.category.add(category_object)
                 except:
                     pass
                 try:
                     r.save()
                 except Exception, e:
                     logger.error('ERROR PARSING ID: %d', r.resource.pk)
                     r.resource.status = 'error'
                     logger.error('EXCEPTION: %s', e)
                     # even here the tag field can be truncated
                     # and break the save
                     try:
                         r.save()
                     except Exception, e:
                         logger.error('EXCEPTION: %s', e)
                         r.resource.tags = ''
                         try:
                             r.resource.save()
                         except:
                             pass
                 if nodownload:
                     logger.info("NOT DOWNLOADING! STATUS: %s" %
                                 r.resource.status)
                 else:
                     if force_download or first_status != 'downloaded' and first_status != 'installed' and first_status != 'error':
                         try:
                             logger.info(
                                 "FORCING DOWNLOAD? %s, FIRST STATUS: %s" %
                                 (force_download, first_status))
                             r.download()
                             r.resource.status = 'downloaded'
                             r.resource.save()
                             r.finish()
                         except Exception, e:
                             logger.error("ERROR DOWNLOADING")
                             logger.error('EXCEPTION: %s', e)
                             r.resource.status = 'error'
                             try:
                                 r.resource.save()
                             except Exception, e:
                                 logger.error('EXCEPTION: %s', e)
                                 logger.error("DEAD END")
                                 pass
                     else:
                         logger.info(
                             "-- CONTENT ALREADY MARKED AS DOWNLOADED")
                         r.size = folder_size(r.resource.content_root())
                         try:
                             r.save()
                         except:
                             pass
示例#8
0
def index(request):
    pip = os.path.join(sys.exec_prefix, 'bin', 'pip')
    if not os.path.isfile(pip):
        pip = 'pip'
    SHELL_COMMANDS = (('Hostname', 'hostname'), ('hg version', 'hg id'),
                      ('git version', "git log --pretty=format:'%h' -n 1"),
                      ('hg branch', 'hg branch'),
                      ('git branch', 'git rev-parse --abbrev-ref HEAD'),
                      ('MySQL version', 'mysql --version'),
                      ('Local Packages', '%s freeze -l' % pip))
    SD = OrderedDict()
    for k, v in sorted(settings_list(), key=lambda x: x[0]):
        SD[k] = v
    context = RequestContext(request, {
        'args': sys.argv,
        'exe': sys.executable,
        'settings': SD,
    })

    context['versions'] = OrderedDict()
    # get versions
    curr_dir = os.path.realpath(os.path.dirname(__file__))
    for name, shell_command in SHELL_COMMANDS:
        try:
            result = utils.run_shell_command(shell_command, curr_dir)
            if result:
                if isinstance(result, list):
                    result = '<br>'.split(result)
                context['versions'][name] = result
        except:
            pass
    # machine status
    context['machine'] = OrderedDict()
    if sys.platform == 'darwin':
        context['machine']['Uptime'] = 'not done yet on MacOS'
        context['machine']['Disk Space'] = 'not done yet on MacOS'
    elif sys.platform == 'win32':
        context['machine']['Uptime'] = 'not done yet on Windows'
        context['machine']['Disk Space'] = 'not done yet on Windows'
    else:
        context['machine']['Uptime'] = utils.server_uptime()
        context['machine']['Disk Space'] = utils.disk_usage('/')._asdict()
    if os.path.exists(settings.MEDIA_ROOT):
        context['machine']['Media Folder'] = utils.sizeof_fmt(
            utils.folder_size(settings.MEDIA_ROOT))

    context['stats'] = utils.get_available_stats()
    context['apps'] = [(app.__name__,
                        ', '.join([model.__name__ for model in models]))
                       for app, models in all_concrete_models()]
    context['relations'] = [[(model.__name__, ', '.join([
        '%s (%s) through %s' %
        (relation.__name__, relation.__module__, field.__class__.__name__)
        for field, relation in relations
    ]), app.__name__) for model, relations in rel_info]
                            for app, rel_info in all_relations()]
    #context['rel_graph'] =

    context['config_warnings'] = utils.get_configuration_warnings()

    return render_to_response('dashboard/index.html', context)
示例#9
0
def org_clone(org):
    """Clone all public non-forked repos from the specified org.

    Repos are cloned to subfolders under the 'folder' setting in config.json.
    """
    # optional list of org/repos to be skipped ...
    if os.path.isfile("skiplist.txt"):
        skiplist = open("skiplist.txt").read().lower().splitlines()
    else:
        skiplist = []

    print("Org".ljust(21) + "Repo".ljust(61) +
          "KB estimate  KB actual  seconds KB/sec")
    print(20 * "-" + " " + 60 * "-" + " " +
          "----------- ----------- ------- -------")
    # if log file doesn't exist, create it
    logfile = os.path.join(SETTINGS["folder"], "logfile.csv")
    if not os.path.isfile(logfile):
        open(logfile, "w").write(
            "datetime,org,repo,KB-estimate,KB-actual,seconds,KB/second\n")

    org_folder = os.path.join(SETTINGS["folder"], org)
    if SETTINGS["overwrite"]:
        folder_del(org_folder)  # delete existing org data
        os.makedirs(org_folder)  # create empty org folder
    else:
        # In non-overwrite mode, only create org folder if it doesn't exist.
        if not os.path.exists(org_folder):
            os.makedirs(org_folder)

    tot_estimate = 0  # total estimated repo size (from GitHub API)
    tot_actual = 0  # total actual size on disk
    tot_seconds = 0  # total elapsed time

    for repo, size_api in repolist(org):

        if f"{org}/{repo}".lower() in skiplist:
            continue  # repos in skiplist are not cloned

        start = default_timer()
        folder = os.path.join(org_folder, repo)

        if not SETTINGS["overwrite"]:
            # Don't clone this repo if target folder exists and is non-empty.
            if non_empty_folder(folder):
                continue

        print(f"{org:20} {repo:60}   ", end="")

        Repo.clone_from("https://github.com/" + org + "/" + repo + ".git",
                        folder)

        size_actual = folder_size(folder) / 1024
        elapsed = default_timer() - start

        tot_estimate += size_api
        tot_actual += size_actual
        tot_seconds += elapsed

        print(
            f"{size_api:9,.0f}   {size_actual:9,.0f} {elapsed:7.2f} {size_actual/elapsed:7.0f}"
        )

        timestamp = str(datetime.datetime.now())[:19]
        open(logfile, "a").write(",".join([
            timestamp,
            org,
            repo,
            str(round(size_api)),
            str(round(size_actual)),
            str(round(elapsed, 2)),
            str(round(size_actual / elapsed)),
        ]) + "\n")

    avg_kb_per_second = 0 if tot_seconds == 0 else tot_actual / tot_seconds
    print("TOTALS:".rjust(84) + f"{tot_estimate:9,.0f}   {tot_actual:9,.0f} "
          f"{tot_seconds:7.2f} {avg_kb_per_second:7.0f}\n")