示例#1
0
文件: dashclient.py 项目: Neyt/portia
def deploy_project(name, apikey, changed_files=None, repo=None,
                   branch='master'):
    """Archive a GIT project and upload it to Dash."""
    if repo is None:
        repo = Repoman.open_repo(name)
    archiver = GitProjectArchiver(repo,
                                  branch=branch,
                                  ignore_deleted=False,
                                  version=(0, 9),
                                  required_files=REQUIRED_FILES)
    spiders = None
    if changed_files is not None:
        spiders = {archiver._spider_name(name)
                   for name in changed_files if name.startswith('spiders/')}
    zbuff = archiver.archive(spiders)
    payload = {'apikey': apikey, 'project': name}
    req = requests.post(
        DASH_API_URL + 'as/import.json?version=portia',
        files=[('archive', ('archive', zbuff, 'application/zip'))],
        params=payload
    )
    if req.status_code == 200:
        project_url = DASH_API_URL.rsplit('/', 2)[0] + '/p/' + name
        return {
            'status': 'ok',
            'schedule_url': project_url
        }
    else:
        raise DeployError('Deploy to Dash failed: %s' % req.text)
示例#2
0
def _archive_project(name, buff):
    """Archive a project stored in GIT into a zip file."""
    repo = Repoman.open_repo(name)
    now = datetime.now().timetuple()[:6]
    archive = zipfile.ZipFile(buff, "w", zipfile.ZIP_DEFLATED)
    files_list = repo.list_files_for_branch('master')
    extractors = {}
    for file_path in files_list:
        if file_path == 'extractors.json':
            extractors = json.loads(
                repo.file_contents_for_branch(file_path, 'master'))

    seen_files = set()
    spiders = {}
    templates = defaultdict(list)
    for file_path in files_list:
        file_contents = repo.file_contents_for_branch(file_path, 'master')
        if file_path.startswith('spiders'):
            as_json = json.loads(file_contents)
            try:
                parts = file_path.split("/")
                if len(parts) == 2:
                    # spider json
                    spider_name = parts[1].rsplit(".", 1)[0]
                    spiders[spider_name] = file_path, as_json
                elif len(parts) == 3:
                    # template json
                    existing = {}
                    for field, eids in as_json.get('extractors', {}).items():
                        existing[field] = [
                            eid for eid in eids if eid in extractors
                        ]
                    as_json['extractors'] = existing
                    spider_name = parts[1]
                    templates[spider_name].append(as_json)
            except ValueError:
                continue
        else:
            _add_to_archive(archive, file_path, file_contents, now)
        seen_files.add(file_path)

    # Add empty placeholders for missing files required by dash
    for file_path in {'extractors.json', 'items.json'} - seen_files:
        _add_to_archive(archive, file_path, '{}', now)

    for name, (path, json_spec) in spiders.iteritems():
        json_spec.pop('template_names')
        json_spec['templates'] = templates[name]
        _add_to_archive(archive, path, json.dumps(json_spec), now)
    archive.close()
示例#3
0
def _archive_project(name, buff, files=None, repo=None, branch='master'):
    """Archive a project stored in GIT into a zip file."""
    if repo is None:
        repo = Repoman.open_repo(name)
    now = datetime.now().timetuple()[:6]
    archive = zipfile.ZipFile(buff, "w", zipfile.ZIP_DEFLATED)
    files_list = files if files is not None else \
        repo.list_files_for_branch(branch)
    all_files = files_list if files is None else \
        repo.list_files_for_branch(branch)

    template_paths = defaultdict(list)
    for file_path in all_files:
        split_file_path = file_path.split('/')
        if len(split_file_path) > 2:
            template_paths[split_file_path[1]].append(file_path)
    extractors = json.loads(
        repo.file_contents_for_branch('extractors.json', branch) or '{}')

    seen_files = set()
    spiders = set()
    for file_path in files_list:
        if file_path.startswith('spiders'):
            try:
                parts = file_path.split("/")
                if len(parts) >= 2:
                    spider_name = parts[1]
                    if spider_name.endswith('.json'):
                        spider_name = spider_name[:-5]
                    if spider_name not in spiders:
                        # Load spider if necessary
                        if len(parts) > 2:
                            file_path = 'spiders/' + spider_name + '.json'
                        file_contents = repo.file_contents_for_branch(
                            file_path, branch)
                        as_json = json.loads(file_contents)
                        templates = []
                        # Load all spider templates
                        spider_templates = template_paths.get(spider_name, [])
                        for template_path in spider_templates:
                            seen_files.add(template_path)
                            existing = {}
                            # Ignore deleted templates
                            try:
                                templ_contents = repo.file_contents_for_branch(
                                    template_path, branch)
                            except (TypeError, ValueError):
                                continue
                            json_template = json.loads(templ_contents)
                            # Validate extractors
                            template_extractors = json_template.get(
                                'extractors', {})
                            for field, eids in template_extractors.items():
                                existing[field] = [
                                    eid for eid in eids if eid in extractors
                                ]
                            json_template['extractors'] = existing
                            spider_name = parts[1]
                            templates.append(json_template)
                        spiders.add(spider_name)
                        as_json.pop('template_names', None)
                        as_json['templates'] = templates
                        _add_to_archive(archive, file_path,
                                        json.dumps(as_json), now)
            except TypeError:
                if not ALLOW_DELETE:
                    continue
                # Handle Deleted Spiders
                file_contents = repo.file_contents_for_branch(
                    file_path, 'master')
                file_info = {'deleted': True}
                if file_contents:
                    as_json = json.loads(file_contents)
                _add_to_archive(archive, file_path, json.dumps(file_info), now)
        else:
            file_contents = repo.file_contents_for_branch(file_path, branch)
            _add_to_archive(archive, file_path, file_contents, now)
        seen_files.add(file_path)

    # Add empty placeholders for missing files required by dash
    for file_path in {'extractors.json', 'items.json'} - seen_files:
        _add_to_archive(archive, file_path, '{}', now)
    archive.close()
示例#4
0
def _archive_project(name, buff, files=None, repo=None, branch='master',
                     ignore_deleted=False):
    """Archive a project stored in GIT into a zip file."""
    if repo is None:
        repo = Repoman.open_repo(name)
    now = datetime.now().timetuple()[:6]
    archive = zipfile.ZipFile(buff, "w", zipfile.ZIP_DEFLATED)
    files_list = files if files is not None else \
        repo.list_files_for_branch(branch)
    all_files = files_list if files is None else \
        repo.list_files_for_branch(branch)

    template_paths = defaultdict(list)
    for file_path in all_files:
        split_file_path = file_path.split('/')
        if len(split_file_path) > 2:
            template_paths[split_file_path[1]].append(file_path)
    extractors = json.loads(repo.file_contents_for_branch('extractors.json',
                                                          branch) or '{}')

    seen_files = set()
    spiders = set()
    for file_path in files_list:
        if file_path.startswith('spiders'):
            try:
                parts = file_path.split("/")
                if len(parts) >= 2:
                    spider_name = parts[1]
                    if spider_name.endswith('.json'):
                        spider_name = spider_name[:-5]
                    if spider_name not in spiders:
                        # Load spider if necessary
                        if len(parts) > 2:
                            file_path = 'spiders/' + spider_name + '.json'
                        file_contents = repo.file_contents_for_branch(
                            file_path, branch)
                        as_json = json.loads(file_contents)
                        templates = []
                        # Load all spider templates
                        spider_templates = template_paths.get(spider_name, [])
                        for template_path in spider_templates:
                            seen_files.add(template_path)
                            existing = {}
                            # Ignore deleted templates
                            try:
                                templ_contents = repo.file_contents_for_branch(
                                    template_path, branch)
                            except (TypeError, ValueError):
                                continue
                            json_template = json.loads(templ_contents)
                            # Validate extractors
                            template_extractors = json_template.get(
                                'extractors', {})
                            for field, eids in template_extractors.items():
                                existing[field] = [eid for eid in eids
                                                   if eid in extractors]
                            json_template['extractors'] = existing
                            spider_name = parts[1]
                            templates.append(json_template)
                        spiders.add(spider_name)
                        as_json.pop('template_names', None)
                        as_json['templates'] = templates
                        _add_to_archive(archive, file_path,
                                        json.dumps(as_json), now)
            except TypeError:
                if ignore_deleted:
                    continue
                # Handle Deleted Spiders
                file_contents = repo.file_contents_for_branch(file_path,
                                                              'master')
                file_info = {'deleted': True}
                if file_contents:
                    as_json = json.loads(file_contents)
                _add_to_archive(archive, file_path, json.dumps(file_info), now)
        else:
            file_contents = repo.file_contents_for_branch(file_path, branch)
            if file_contents:
                _add_to_archive(archive, file_path, file_contents, now)
                seen_files.add(file_path)

    # Add empty placeholders for missing files required by dash
    for file_path in {'extractors.json', 'items.json'} - seen_files:
        _add_to_archive(archive, file_path, '{}', now)
    archive.close()
示例#5
0
 def test_create(self):
     repoman = Repoman.open_repo('new_project_5', self._connection)
     print repoman.list_files_for_branch('marcos')
     print repoman.get_branch_changed_files('marcos')
示例#6
0
 def test_create(self):
     repoman = Repoman.open_repo('new_project_5', self._connection)
     print repoman.list_files_for_branch('marcos')
     print repoman.get_branch_changed_files('marcos')