示例#1
0
def discoverNamespace(namespace):
    if not wikifyNamespace(namespace) in DumpdataProvider.getMembers("") and not wikifyNamespace(namespace) == "":
        raise ResourceNotFoundException()

    response = {
        "classifier": "Namespace",
        "name": namespace,
        "members": [],
    }  # , 'github': DumpdataProvider.getGithub(namespace, '')}

    # gather wiki data
    wikiNS = wikifyNamespace(namespace)
    setWikidata(response, "Namespace", wikiNS)

    response["namespace"] = "Namespace"

    # gather member data
    members = DumpdataProvider.getMembers(namespace)
    for member in members:
        response["members"].append(
            {
                "resource": os.path.join(base_uri, namespace, member).replace(" ", "_"),
                "classifier": "Namespace member",
                "name": member,
            }
        )

    return response
示例#2
0
def discoverNamespace(namespace):
    if not wikifyNamespace(namespace) in DumpdataProvider.getMembers('') and not wikifyNamespace(namespace) == '':
        raise ResourceNotFoundException()

    response = {'classifier': 'Namespace', 'name': namespace, 'members': []}#, 'github': DumpdataProvider.getGithub(namespace, '')}

    #gather wiki data
    wikiNS = wikifyNamespace(namespace)
    setWikidata(response,'Namespace', wikiNS)

    response['namespace'] = 'Namespace'

    #gather member data
    members = DumpdataProvider.getMembers(namespace)
    for member in members:
        response['members'].append({
            'resource': os.path.join(base_uri, namespace, member).replace(' ', '_'),
            'classifier': 'Namespace member',
            'name'    : member
        })

    #response['endpoint'] = TripledataProvider.getEndpointLink('Namespace', wikiNS)
    #response['sesame']   = TripledataProvider.getSesameLink('namespaces', wikiNS)

    return response
示例#3
0
def discoverNamespaceMember(namespace, member):
    if not member.decode("utf_8") in DumpdataProvider.getMembers(namespace):
        raise ResourceNotFoundException()

    response = {
        "folders": [],
        "files": [],
        "classifier": "Namespace member",
        "name": member,
        "github": DumpdataProvider.getGithub(namespace, member),
    }

    # gather wiki data
    wikiNS = wikifyNamespace(namespace)
    setWikidata(response, wikiNS, member)

    response["namespace"] = wikiNS

    # gather member data
    dirPath = os.path.join(namespace, member.replace(" ", "_"))
    files, dirs = DumpdataProvider.getDirContent(dirPath)

    for d in dirs:
        response["folders"].append({"resource": os.path.join(base_uri, dirPath, d), "classifier": "Folder", "name": d})

    for f in files:
        response["files"].append({"resource": os.path.join(base_uri, dirPath, f), "classifier": "File", "name": f})

    # response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member)
    # response['sesame']   = TripledataProvider.getSesameLink(wikiNS, member)

    if namespace == "modules":
        response["module"] = DumpdataProvider.getModuleDescription(member)

    return response
示例#4
0
def discoverMemberPath(namespace, member, path):
    response = {
        "folders": [],
        "files": [],
        "classifier": "Folder",
        "name": os.path.basename(path),
        "github": DumpdataProvider.getGithub(namespace, member),
    }

    # update github data
    if response["github"]:
        response["github"] = os.path.join(response["github"], path)

    # gather wiki data
    wikiNS = wikifyNamespace(namespace)
    setWikidata(response, wikiNS, member)

    response["namespace"] = wikiNS

    # gather member data
    dirPath = os.path.join(namespace, member, path)
    files, dirs = DumpdataProvider.getDirContent(dirPath)

    for d in dirs:
        response["folders"].append({"resource": os.path.join(base_uri, dirPath, d), "classifier": "Folder", "name": d})

    for f in files:
        response["files"].append({"resource": os.path.join(base_uri, dirPath, f), "classifier": "File", "name": f})

    # response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member)
    # response['sesame']   = TripledataProvider.getSesameLink(wikiNS, member)

    return response
        def query_and_analyze_entity(entity):
            assigned_resources_mutex.acquire()  # MUTEX ACQUIRE
            try:
                if entity.resource in assigned_resources:
                    # URLs need to be unique for self.entity_errors
                    entity.resource += " (duplicate-" + str(uuid.uuid1()) + ")"
                    return entity, None, -1,\
                           ("ResourceAlreadyAssignedError",
                            "Resource url was already assigned to another "
                            "entity.")
                else:
                    assigned_resources.append(entity.resource)
            finally:
                assigned_resources_mutex.release()  # MUTEX RELEASE

            time_before = time.time()
            try:
                # For some reason this call still produces a traceback if we
                # catch the exception further down, how to suppress these?
                # Seems like Django logging. Do we want to disable that for the
                # test? How?
                response = client.get(entity.resource, {
                    'format': 'json',
                    'validate': 'true'
                })
            except Exception as e:
                time_taken = time.time() - time_before
                return entity, None, time_taken,\
                       (type(e).__name__, str(e))

            time_taken = time.time() - time_before

            if response.status_code != 200:
                return entity, None, time_taken,\
                       ("ResponseStatusNotOkError",
                        "Did not return status ok (200) but instead '{}'."
                        .format(response.status_code))

            parsed_response = json.loads(response.content)
            parsed_classifier = parsed_response['classifier']
            parsed_name = parsed_response['name']
            wikified_name = wikifyNamespace(parsed_name)
            if wikified_name and wikified_name != 'None':
                parsed_name = wikified_name

            if entity.classifier != parsed_classifier:
                return entity, None, time_taken,\
                       ("WrongClassifierError",
                        "Did not have expected classifier '{} but instead '{}'."
                        .format(entity.classifier, parsed_classifier))
            if entity.name != parsed_name:
                return entity, None, time_taken,\
                       ("WrongNameError",
                        "Did not have expected name '{}' but instead '{}'."
                        .format(entity.name, parsed_name))
            return entity, parsed_response, time_taken, None
        def query_and_analyze_entity(entity):
            assigned_resources_mutex.acquire()  # MUTEX ACQUIRE
            try:
                if entity.resource in assigned_resources:
                    # URLs need to be unique for self.entity_errors
                    entity.resource += " (duplicate-" + str(uuid.uuid1()) + ")"
                    return entity, None, -1,\
                           ("ResourceAlreadyAssignedError",
                            "Resource url was already assigned to another "
                            "entity.")
                else:
                    assigned_resources.append(entity.resource)
            finally:
                assigned_resources_mutex.release()  # MUTEX RELEASE

            time_before = time.time()
            try:
                # For some reason this call still produces a traceback if we
                # catch the exception further down, how to suppress these?
                # Seems like Django logging. Do we want to disable that for the
                # test? How?
                response = client.get(entity.resource,
                                      {'format': 'json', 'validate': 'true'})
            except Exception as e:
                time_taken = time.time() - time_before
                return entity, None, time_taken,\
                       (type(e).__name__, str(e))

            time_taken = time.time() - time_before

            if response.status_code != 200:
                return entity, None, time_taken,\
                       ("ResponseStatusNotOkError",
                        "Did not return status ok (200) but instead '{}'."
                        .format(response.status_code))

            parsed_response = json.loads(response.content)
            parsed_classifier = parsed_response['classifier']
            parsed_name = parsed_response['name']
            wikified_name = wikifyNamespace(parsed_name)
            if wikified_name and wikified_name != 'None':
                parsed_name = wikified_name

            if entity.classifier != parsed_classifier:
                return entity, None, time_taken,\
                       ("WrongClassifierError",
                        "Did not have expected classifier '{} but instead '{}'."
                        .format(entity.classifier, parsed_classifier))
            if entity.name != parsed_name:
                return entity, None, time_taken,\
                       ("WrongNameError",
                        "Did not have expected name '{}' but instead '{}'."
                        .format(entity.name, parsed_name))
            return entity, parsed_response, time_taken, None
示例#7
0
def discoverMemberFile(namespace, member, path, file):
    filePath = os.path.join(namespace, member, path, file)
    if not DumpdataProvider.exists(filePath):
        raise ResourceNotFoundException()

    # if no geshi code is defined, then we'll return basically "geshi : null" and nothing else
    locator, extractor, geshi, language = DumpdataProvider.getMetadata(filePath)

    response = {
        "geshi": geshi,
        "fragments": [],
        "classifier": "File",
        "name": file,
        "github": DumpdataProvider.getGithub(namespace, member),
    }

    if language:
        response["language"] = language

    # update github data
    if response["github"]:
        response["github"] = os.path.join(response["github"], path, file)

    # gather wiki data
    wikiNS = wikifyNamespace(namespace)
    setWikidata(response, wikiNS, member)

    response["namespace"] = wikiNS

    # gather member data - if there is a fact extractor, then we also want give back selectable fragments

    try:
        extractedFacts = DumpdataProvider.getFacts(filePath, extractor)
        print extractedFacts.get("fragments")
        for fragment in extractedFacts.get("fragments", []):
            print fragment
            fragmentPath = os.path.join(fragment["classifier"], fragment["name"])
            response["fragments"].append(mapFragment(filePath, fragmentPath, fragment))
    except OSError:
        pass

    # gather content - if there is a geshi code, we should be able to get content
    if geshi:
        response["content"] = DumpdataProvider.read(filePath)

    # commit infos
    setCommitInfos(response, filePath)

    # response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member)
    # response['sesame']   = TripledataProvider.getSesameLink(wikiNS, member)

    response["derived"] = DumpdataProvider.getDerivedFiles(filePath)

    return response
示例#8
0
def discoverMemberFile(namespace, member, path, file):
    filePath = os.path.join(namespace, member, path, file)
    if not DumpdataProvider.exists(filePath):
        raise ResourceNotFoundException()

    #if no geshi code is defined, then we'll return basically "geshi : null" and nothing else
    locator, extractor, geshi, language = DumpdataProvider.getMetadata(filePath)

    response = {
        'geshi'     : geshi,
        'fragments' : [],
        'classifier': 'File',
        'name'      : file,
        'github'    : DumpdataProvider.getGithub(namespace,member)
    }

    if language:
        response['language'] = language

    #update github data
    if response['github']:
        response['github'] = os.path.join(response['github'], path, file)

    #gather wiki data
    wikiNS = wikifyNamespace(namespace)
    setWikidata(response, wikiNS, member)

    response['namespace'] = wikiNS

    #gather member data - if there is a fact extractor, then we also want give back selectable fragments

    try:
        extractedFacts = DumpdataProvider.getFacts(filePath, extractor)
        for fragment in extractedFacts.get('fragments', []):
            fragmentPath = os.path.join(fragment['classifier'], fragment['name'])
            response['fragments'].append( mapFragment(filePath, fragmentPath, fragment) )
    except OSError:
        pass


    #gather content - if there is a geshi code, we should be able to get content
    if geshi:
        response['content'] = DumpdataProvider.read(filePath)

    #commit infos
    setCommitInfos(response,filePath)

    #response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member)
    # response['sesame']   = TripledataProvider.getSesameLink(wikiNS, member)

    response['derived'] = DumpdataProvider.getDerivedFiles(filePath)

    return response
示例#9
0
def discoverNamespaceMember(namespace, member):
    if not member.decode('utf_8') in DumpdataProvider.getMembers(namespace):
        raise ResourceNotFoundException()

    response = {
        'folders'   : [],
        'files'     : [],
        'classifier': 'Namespace member',
        'name'      : member,
        'github'    : DumpdataProvider.getGithub(namespace, member)
    }

    #gather wiki data
    wikiNS = wikifyNamespace(namespace)
    setWikidata(response, wikiNS, member)

    response['namespace'] = wikiNS

    #gather member data
    dirPath = os.path.join(namespace, member.replace(' ','_'))
    files, dirs = DumpdataProvider.getDirContent(dirPath)

    for d in dirs:
        response['folders'].append({
            'resource'  : os.path.join(base_uri, dirPath, d),
            'classifier': 'Folder',
            'name'      : d
        })

    for f in files:
        response['files'].append({
            'resource'  : os.path.join(base_uri, dirPath, f),
            'classifier': 'File',
            'name'      : f
            })

    #response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member)
    # response['sesame']   = TripledataProvider.getSesameLink(wikiNS, member)

    if namespace == 'modules':
        response['module'] = DumpdataProvider.getModuleDescription(member)

    return response
示例#10
0
def discoverMemberPath(namespace, member, path):
    response = {
        'folders'   : [],
        'files'     : [],
        'classifier': 'Folder',
        'name'      : os.path.basename(path),
        'github'    : DumpdataProvider.getGithub(namespace, member)
    }

    #update github data
    if response['github']:
        response['github'] = os.path.join(response['github'], path)

    #gather wiki data
    wikiNS = wikifyNamespace(namespace)
    setWikidata(response, wikiNS, member)

    response['namespace'] = wikiNS

    #gather member data
    dirPath = os.path.join(namespace, member, path)
    files, dirs = DumpdataProvider.getDirContent(dirPath)

    for d in dirs:
        response['folders'].append({
            'resource': os.path.join(base_uri, dirPath, d),
            'classifier': 'Folder',
            'name'    : d
        })

    for f in files:
        response['files'].append({
            'resource': os.path.join(base_uri, dirPath, f),
            'classifier': 'File',
            'name'    : f,
        })

    #response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member)
    # response['sesame']   = TripledataProvider.getSesameLink(wikiNS, member)

    return response
示例#11
0
def discoverFileFragment(namespace, member, path, file, fragment):
    filePath = os.path.join(namespace, member, path, file)
    if not DumpdataProvider.exists(filePath):
        raise ResourceNotFoundException()

    # remove tailing slash, if there is one
    if fragment.endswith("/"):
        fragment = fragment[:-1]

    # if no geshi code is defined, then we'll return basically "geshi : null"
    locator, extractor, geshi, language = DumpdataProvider.getMetadata(filePath)

    # name and classifier are set later (in the extractor phase
    response = {"geshi": geshi, "fragments": [], "github": DumpdataProvider.getGithub(namespace, member)}

    if language:
        response["language"] = language

    # update github data
    if response["github"]:
        response["github"] = os.path.join(response["github"], path, file)

    # gather wiki data
    wikiNS = wikifyNamespace(namespace)
    setWikidata(response, wikiNS, member)

    response["namespace"] = wikiNS

    # gather member data
    lineNumbers = None
    print extractor
    # if extractor:
    try:
        extractedFacts = DumpdataProvider.getFacts(filePath, extractor)
        print extractedFacts
        # TODO There has to be a better way to do this
        for f1 in extractedFacts["fragments"]:
            selected, fragmentPath = find(f1, fragment)
            if selected:
                response["classifier"] = selected["classifier"]
                response["name"] = selected["name"]
                if "startLine" in selected:
                    lineNumbers = {"from": selected["startLine"], "to": selected["endLine"]}
                for f2 in selected.get("fragments", []):
                    response["fragments"].append(mapFragment(filePath, fragmentPath, f2))
                break
    except:
        pass

    # gather content
    if lineNumbers or locator:
        if not lineNumbers:
            lineNumbers = DumpdataProvider.getFragment(filePath, fragment, locator)

        fragmentText = DumpdataProvider.read(filePath, range(lineNumbers["from"] - 1, lineNumbers["to"]))
        response["content"] = fragmentText
        if response["github"]:
            response["github"] += "#L{0}-{1}".format(lineNumbers["from"], lineNumbers["to"])
    # except Exception as e:
    #    raise DiscoveryException('500 Internal Server Error', 'Fragment location failed:\n' + str(e))

    setCommitInfos(response, filePath)

    # response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member)
    # response['sesame']   = TripledataProvider.getSesameLink(wikiNS, member)

    return response
示例#12
0
def discoverFileFragment(namespace, member, path, file, fragment):
    filePath = os.path.join(namespace, member, path, file)
    if not DumpdataProvider.exists(filePath):
        raise ResourceNotFoundException()

    #remove tailing slash, if there is one
    if fragment.endswith('/'):
        fragment = fragment[:-1]

    #if no geshi code is defined, then we'll return basically "geshi : null"
    locator, extractor, geshi, language = DumpdataProvider.getMetadata(filePath)

    #name and classifier are set later (in the extractor phase
    response = {
        'geshi'    : geshi,
        'fragments': [],
        'github'   : DumpdataProvider.getGithub(namespace,member)
    }

    if language:
        response['language'] = language

    #update github data
    if response['github']:
        response['github'] = os.path.join(response['github'], path, file)

    #gather wiki data
    wikiNS = wikifyNamespace(namespace)
    setWikidata(response, wikiNS, member)

    response['namespace'] = wikiNS

    #gather member data
    lineNumbers = None
    print extractor
    #if extractor:
    try:
        extractedFacts = DumpdataProvider.getFacts(filePath, extractor)
        print extractedFacts
        #TODO There has to be a better way to do this
        for f1 in extractedFacts['fragments']:
            selected, fragmentPath = find(f1, fragment)
            if selected:
                response['classifier'] = selected['classifier']
                response['name'] = selected['name']
                if 'startLine' in selected:
                    lineNumbers = {'from':selected['startLine'], 'to': selected['endLine']}
                for f2 in selected.get('fragments',[]):
                    response['fragments'].append(mapFragment(filePath, fragmentPath, f2))
                break
    except:
        pass

    #gather content
    if lineNumbers or locator:
        if not lineNumbers:
            lineNumbers = DumpdataProvider.getFragment(filePath, fragment, locator)

        fragmentText = DumpdataProvider.read(filePath, range(lineNumbers['from'] - 1, lineNumbers['to']))
        response['content'] = fragmentText
        if response['github']:
            response['github'] += '#L{0}-{1}'.format(lineNumbers['from'], lineNumbers['to'])
    #except Exception as e:
        #    raise DiscoveryException('500 Internal Server Error', 'Fragment location failed:\n' + str(e))

    setCommitInfos(response, filePath)

    #response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member)
    # response['sesame']   = TripledataProvider.getSesameLink(wikiNS, member)

    return response