示例#1
0
    def test_get_course_key_from_nonascii_url(self):
        url = u"https://courses.edx.org/courses/{course_id}/stuff".format(course_id=VALID_NONASCII_LEGACY_COURSE_ID)
        course_key = opaque_key_util.get_course_key_from_url(url)
        self.assertEquals(unicode(course_key), VALID_NONASCII_LEGACY_COURSE_ID)

        url = u"https://courses.edx.org/courses/{course_id}/stuff".format(course_id=INVALID_NONASCII_LEGACY_COURSE_ID)
        course_key = opaque_key_util.get_course_key_from_url(url)
        self.assertIsNone(course_key)
示例#2
0
    def _parse_server_event(self, event):
        # Always check context first for server events.
        org_id = event.get('context', {}).get('org_id')
        if org_id:
            return org_id

        # Try to infer the institution from the event data
        evt_type = event['event_type']
        if '/courses/' in evt_type:
            course_key = opaque_key_util.get_course_key_from_url(evt_type)
            if course_key and '/' not in unicode(course_key):
                return course_key.org
            else:
                # It doesn't matter if we found a good deprecated key.
                # We need to provide backwards-compatibility.
                return get_slash_value(evt_type, 2)
        elif '/' in evt_type:
            return None
        else:
            # Specific server logging. One-off parser for each type.
            # Survey of logs showed 4 event types:
            # reset_problem, save_problem_check,
            # save_problem_check_fail, save_problem_fail.  All
            # four of these have a problem_id, which for legacy events
            # we could extract from.  For newer events, we assume this
            # won't be needed, because context will be present.
            try:
                return get_slash_value(event['event']['problem_id'], 2)
            except Exception:  # pylint: disable=broad-except
                return None

        return None
    def _parse_server_event(self, event):
        # Always check context first for server events.
        org_id = event.get('context', {}).get('org_id')
        if org_id:
            return org_id

        # Try to infer the institution from the event data
        evt_type = event['event_type']
        if '/courses/' in evt_type:
            course_key = opaque_key_util.get_course_key_from_url(evt_type)
            if course_key and '/' not in unicode(course_key):
                return course_key.org
            else:
                # It doesn't matter if we found a good deprecated key.
                # We need to provide backwards-compatibility.
                return get_slash_value(evt_type, 2)
        elif '/' in evt_type:
            return None
        else:
            # Specific server logging. One-off parser for each type.
            # Survey of logs showed 4 event types:
            # reset_problem, save_problem_check,
            # save_problem_check_fail, save_problem_fail.  All
            # four of these have a problem_id, which for legacy events
            # we could extract from.  For newer events, we assume this
            # won't be needed, because context will be present.
            try:
                return get_slash_value(event['event']['problem_id'], 2)
            except Exception:  # pylint: disable=broad-except
                return None

        return None
def get_course_id(event, from_url=False):
    """Gets course_id from event's data."""

    # Get the event data:
    event_context = event.get('context')
    if event_context is None:
        # Assume it's old, and not worth logging...
        return None

    # Get the course_id from the data, and validate.
    course_id = event_context.get('course_id', '')
    if course_id:
        if opaque_key_util.is_valid_course_id(course_id):
            return course_id
        else:
            log.error("encountered event with bogus course_id: %s", event)
            return None

    # Try to get the course_id from the URLs in `event_type` (for implicit
    # server events) and `page` (for browser events).
    if from_url:
        source = event.get('event_source')

        if source == 'server':
            url = event.get('event_type', '')
        elif source == 'browser':
            url = event.get('page', '')
        else:
            url = ''

        course_key = opaque_key_util.get_course_key_from_url(url)
        if course_key:
            return unicode(course_key)

    return None
示例#5
0
def get_course_id(event, from_url=False):
    """Gets course_id from event's data."""

    # Get the event data:
    event_context = event.get('context')
    if event_context is None:
        # Assume it's old, and not worth logging...
        return None

    # Get the course_id from the data, and validate.
    course_id = event_context.get('course_id', '')
    if course_id:
        if opaque_key_util.is_valid_course_id(course_id):
            return course_id
        else:
            log.error("encountered event with bogus course_id: %s", event)
            return None

    # Try to get the course_id from the URLs in `event_type` (for implicit
    # server events) and `page` (for browser events).
    if from_url:
        source = event.get('event_source')

        if source == 'server':
            url = event.get('event_type', '')
        elif source == 'browser':
            url = event.get('page', '')
        else:
            url = ''

        course_key = opaque_key_util.get_course_key_from_url(url)
        if course_key:
            return unicode(course_key)

    return None
    def get_course_id(self, event):
        """Gets course_id from event."""

        # TODO: This is an arbitrary way to get the course_id. A more complete
        # routine should deal with all the corner cases as in the `get_org_id`
        # function below. The subset of event that will return a course_id is
        # considered a compromise between the events that are useful and
        # increasing the complexity of the code.

        # Try to get the course from the context

        course_id = event.get('context', {}).get('course_id')
        if course_id:
            return course_id

        # Try to get the course_id from the URLs in `event_type` (for implicit
        # server events) and `page` (for browser events).

        source = event.get('event_source')

        if source == 'server':
            url = event.get('event_type', '')
        elif source == 'browser':
            url = event.get('page', '')
        else:
            url = ''

        course_key = opaque_key_util.get_course_key_from_url(url)
        if course_key:
            return unicode(course_key)

        return None
    def get_course_id(self, event):
        """Gets course_id from event."""

        # TODO: This is an arbitrary way to get the course_id. A more complete
        # routine should deal with all the corner cases as in the `get_org_id`
        # function below. The subset of event that will return a course_id is
        # considered a compromise between the events that are useful and
        # increasing the complexity of the code.

        # Try to get the course from the context

        course_id = event.get('context', {}).get('course_id')
        if course_id:
            return course_id

        # Try to get the course_id from the URLs in `event_type` (for implicit
        # server events) and `page` (for browser events).

        source = event.get('event_source')

        if source == 'server':
            url = event.get('event_type', '')
        elif source == 'browser':
            url = event.get('page', '')
        else:
            url = ''

        course_key = opaque_key_util.get_course_key_from_url(url)
        if course_key:
            return unicode(course_key)

        return None
示例#8
0
    def _parse_browser_event(self, event):
        # TODO: Note that for browser events we are not using the org_id from the context.

        page = event['page']
        if 'courses' in page:
            # This is different than the original algorithm in that it assumes
            # the page contains a valid coursename.  The original code
            # merely looked for what followed "http[s]://<host>/courses/"
            # (and also hoped there were no extra slashes or different content).
            course_key = opaque_key_util.get_course_key_from_url(page)
            if course_key and '/' not in unicode(course_key):
                return course_key.org
            else:
                # It doesn't matter if we found a good deprecated key.
                # We need to provide backwards-compatibility.
                return get_slash_value(page, 4)

        return None
    def _parse_browser_event(self, event):
        # TODO: Note that for browser events we are not using the org_id from the context.

        page = event['page']
        if 'courses' in page:
            # This is different than the original algorithm in that it assumes
            # the page contains a valid coursename.  The original code
            # merely looked for what followed "http[s]://<host>/courses/"
            # (and also hoped there were no extra slashes or different content).
            course_key = opaque_key_util.get_course_key_from_url(page)
            if course_key and '/' not in unicode(course_key):
                return course_key.org
            else:
                # It doesn't matter if we found a good deprecated key.
                # We need to provide backwards-compatibility.
                return get_slash_value(page, 4)

        return None
 def test_get_course_key_from_url(self, course_id):
     url = u"https://courses.edx.org/courses/{course_id}/stuff".format(
         course_id=course_id)
     course_key = opaque_key_util.get_course_key_from_url(url)
     self.assertEquals(unicode(course_key), course_id)
 def test_get_course_key_from_invalid_url(self, course_id):
     url = u"https://courses.edx.org/courses/{course_id}/stuff".format(course_id=course_id)
     course_key = opaque_key_util.get_course_key_from_url(url)
     self.assertIsNone(course_key)
示例#12
0
 def test_get_course_key_from_invalid_url(self):
     url = "https://courses.edx.org/courses/{course_id}/stuff".format(course_id=INVALID_LEGACY_COURSE_ID)
     course_key = opaque_key_util.get_course_key_from_url(url)
     self.assertIsNone(course_key)
示例#13
0
 def test_get_course_key_from_legacy_url(self):
     url = "https://courses.edx.org/courses/{course_id}/stuff".format(course_id=VALID_LEGACY_COURSE_ID)
     course_key = opaque_key_util.get_course_key_from_url(url)
     self.assertEquals(unicode(course_key), VALID_LEGACY_COURSE_ID)
示例#14
0
 def test_get_course_key_from_url(self, block_id):
     url = u"https://courses.edx.org/xblock/{block_id}?stuff=things".format(
         block_id=block_id)
     print(url)
     course_key = opaque_key_util.get_course_key_from_url(url)
     self.assertEquals(unicode(course_key), VALID_COURSE_ID)
    def get_org_id(self, item):
        """
        Attempt to determine the organization that is associated with this particular event.

        This method may return incorrect results, so a white list of
        valid organization names is used to filter out the noise.

        None is returned if no org information is found in the item.
        """
        def get_slash_value(input_value, index):
            """Return index value after splitting input on slashes."""
            try:
                return input_value.split('/')[index]
            except IndexError:
                return None

        try:
            # Different behavior based on type of event source.
            if item['event_source'] == 'server':
                # Always check context first for server events.
                org_id = item.get('context', {}).get('org_id')
                if org_id:
                    return org_id

                # Try to infer the institution from the event data
                evt_type = item['event_type']
                if '/courses/' in evt_type:
                    course_key = opaque_key_util.get_course_key_from_url(
                        evt_type)
                    if course_key and '/' not in unicode(course_key):
                        return course_key.org
                    else:
                        # It doesn't matter if we found a good deprecated key.
                        # We need to provide backwards-compatibility.
                        return get_slash_value(evt_type, 2)
                elif '/' in evt_type:
                    return None
                else:
                    # Specific server logging. One-off parser for each type.
                    # Survey of logs showed 4 event types:
                    # reset_problem, save_problem_check,
                    # save_problem_check_fail, save_problem_fail.  All
                    # four of these have a problem_id, which for legacy events
                    # we could extract from.  For newer events, we assume this
                    # won't be needed, because context will be present.
                    try:
                        return get_slash_value(item['event']['problem_id'], 2)
                    except Exception:  # pylint: disable=broad-except
                        return None
            elif item['event_source'] == 'browser':
                # Note that the context of browser events is ignored.
                page = item['page']
                if 'courses' in page:
                    # This is different than the original algorithm in that it assumes
                    # the page contains a valid coursename.  The original code
                    # merely looked for what followed "http[s]://<host>/courses/"
                    # (and also hoped there were no extra slashes or different content).
                    course_key = opaque_key_util.get_course_key_from_url(page)
                    if course_key and '/' not in unicode(course_key):
                        return course_key.org
                    else:
                        # It doesn't matter if we found a good deprecated key.
                        # We need to provide backwards-compatibility.
                        return get_slash_value(page, 4)
            else:
                # TODO: Handle other event source values (e.g. task or mobile).
                return None

        except Exception:  # pylint: disable=broad-except
            log.exception('Unable to determine institution for event: %s',
                          unicode(item).encode('utf8'))

        return None
示例#16
0
 def test_get_course_key_from_invalid_block_url(self, block_id):
     url = u"https://courses.edx.org/xblock/{block_id}?stuff=things".format(
         block_id=block_id)
     print(url)
     course_key = opaque_key_util.get_course_key_from_url(url)
     self.assertIsNone(course_key)
 def test_get_course_key_from_invalid_url(self):
     url = "https://courses.edx.org/courses/{course_id}/stuff".format(course_id=INVALID_LEGACY_COURSE_ID)
     course_key = opaque_key_util.get_course_key_from_url(url)
     self.assertIsNone(course_key)
 def test_get_course_key_from_url(self, course_id):
     url = u"https://courses.edx.org/courses/{course_id}/stuff".format(course_id=course_id)
     course_key = opaque_key_util.get_course_key_from_url(url)
     self.assertEquals(unicode(course_key), course_id)
 def test_get_course_key_from_legacy_url(self):
     url = "https://courses.edx.org/courses/{course_id}/stuff".format(course_id=VALID_LEGACY_COURSE_ID)
     course_key = opaque_key_util.get_course_key_from_url(url)
     self.assertEquals(unicode(course_key), VALID_LEGACY_COURSE_ID)
 def test_get_course_key_from_invalid_url(self, course_id):
     url = u"https://courses.edx.org/courses/{course_id}/stuff".format(
         course_id=course_id)
     course_key = opaque_key_util.get_course_key_from_url(url)
     self.assertIsNone(course_key)
    def get_org_id(self, item):
        """
        Attempt to determine the organization that is associated with this particular event.

        This method may return incorrect results, so a white list of
        valid organization names is used to filter out the noise.

        None is returned if no org information is found in the item.
        """
        def get_slash_value(input_value, index):
            """Return index value after splitting input on slashes."""
            try:
                return input_value.split('/')[index]
            except IndexError:
                return None

        try:
            # Different behavior based on type of event source.
            if item['event_source'] == 'server':
                # Always check context first for server events.
                org_id = item.get('context', {}).get('org_id')
                if org_id:
                    return org_id

                # Try to infer the institution from the event data
                evt_type = item['event_type']
                if '/courses/' in evt_type:
                    course_key = opaque_key_util.get_course_key_from_url(evt_type)
                    if course_key and '/' not in unicode(course_key):
                        return course_key.org
                    else:
                        # It doesn't matter if we found a good deprecated key.
                        # We need to provide backwards-compatibility.
                        return get_slash_value(evt_type, 2)
                elif '/' in evt_type:
                    return None
                else:
                    # Specific server logging. One-off parser for each type.
                    # Survey of logs showed 4 event types:
                    # reset_problem, save_problem_check,
                    # save_problem_check_fail, save_problem_fail.  All
                    # four of these have a problem_id, which for legacy events
                    # we could extract from.  For newer events, we assume this
                    # won't be needed, because context will be present.
                    try:
                        return get_slash_value(item['event']['problem_id'], 2)
                    except Exception:  # pylint: disable=broad-except
                        return None
            elif item['event_source'] == 'browser':
                # Note that the context of browser events is ignored.
                page = item['page']
                if 'courses' in page:
                    # This is different than the original algorithm in that it assumes
                    # the page contains a valid coursename.  The original code
                    # merely looked for what followed "http[s]://<host>/courses/"
                    # (and also hoped there were no extra slashes or different content).
                    course_key = opaque_key_util.get_course_key_from_url(page)
                    if course_key and '/' not in unicode(course_key):
                        return course_key.org
                    else:
                        # It doesn't matter if we found a good deprecated key.
                        # We need to provide backwards-compatibility.
                        return get_slash_value(page, 4)
            else:
                # TODO: Handle other event source values (e.g. task or mobile).
                return None

        except Exception:  # pylint: disable=broad-except
            log.exception('Unable to determine institution for event: %s', unicode(item).encode('utf8'))

        return None