Exemplo n.º 1
0
def test_fifteen_retries(yahoo_response):
    r = yahoo_response('v1/groups/groupname/', ygError={}, status=500)
    yga = YahooGroupsAPI('groupname')
    with raises(yahoogroupsapi.Recoverable):
        yga.HackGroupInfo()

    assert len(r.calls) == 15
Exemplo n.º 2
0
def test_one_retry(yahoo_response):
    result = {'ok': 'on second try'}
    r = yahoo_response('v1/groups/groupname/', ygError={}, status=500)
    r = yahoo_response('v1/groups/groupname/', result)
    yga = YahooGroupsAPI('groupname')
    json = yga.HackGroupInfo()

    assert len(r.calls) == 2
    assert json == result
Exemplo n.º 3
0
def test_unauthorized_error(yahoo_response):
    r = yahoo_response('v1/groups/groupname/',
                       ygError={"hostname": "gapi17.grp.bf1.yahoo.com", "httpStatus": 401,
                                "errorMessage": "User does not have READ permission for GROUP. Mess...", "errorCode": 1103,
                                "sid": "SID:YHOO:groups.yahoo.com:00000000000000000000000000000000:0"},
                       ygPerms=YGPERMS_NONE, status=401)
    yga = YahooGroupsAPI('groupname')
    with raises(yahoogroupsapi.Unauthorized):
        yga.HackGroupInfo()
    assert len(r.calls) == 1
Exemplo n.º 4
0
def test_get_json(yahoo_response, cookies):
    response = yahoo_response('v2/groups/groupname/files/a/2?param1=c&param2=4', {'result': 'returned data'})

    yga = YahooGroupsAPI('groupname', cookies)
    json = yga.get_json('files', 'a', 2, param1='c', param2=4)

    request = response.calls[0].request
    request_cookies = SimpleCookie()
    request_cookies.load(request.headers['Cookie'])
    assert dict(cookies) == {k: v.value for k, v in request_cookies.items()}

    assert json == {'result': 'returned data'}
Exemplo n.º 5
0
def test_warc_enabled():
    # Note that this does not use the responses mocking framework, as it conflicts with the warc captures.
    # This makes a real request to Yahoo, so might fail.
    url = 'https://groups.yahoo.com/api/v1/groups/test/'

    yga = YahooGroupsAPI('test')
    writer = BufferWARCWriter(gzip=False)
    yga.set_warc_writer(writer)
    yga.HackGroupInfo()

    expected = [(url, 'response'), (url, 'request')]
    actual = [(record.rec_headers['WARC-Target-URI'], record.rec_type)
              for record in ArchiveIterator(writer.get_stream())]
    assert expected == actual
def test_get_json(response, cookies):
    response.add(
        responses.GET,
        'https://groups.yahoo.com/api/v2/groups/groupname/files/a/2?param1=c&param2=4',
        json={'ygData': {
            'result': 'returned data'
        }})

    yga = YahooGroupsAPI('groupname', cookies)
    json = yga.get_json('files', 'a', 2, param1='c', param2=4)

    request = response.calls[0].request
    request_cookies = Cookie.SimpleCookie()
    request_cookies.load(request.headers['Cookie'])
    assert dict(cookies) == {
        k: v.value
        for k, v in request_cookies.iteritems()
    }

    assert json == {'result': 'returned data'}
Exemplo n.º 7
0
            help='Only archive database')

    pe = p.add_argument_group(title='Email Options')
    pe.add_argument('-r', '--no-reattach', action='store_true',
            help="Don't reattach attachment files to email")
    pe.add_argument('-s', '--no-save', action='store_true',
            help="Don't save email attachments as individual files")

    p.add_argument('--overwrite', action='store_true',
                   help="Re-download and overwrite existing files and messages")

    p.add_argument('group', type=str)

    args = p.parse_args()

    yga = YahooGroupsAPI(args.group, args.cookie_t, args.cookie_y)
    if args.username:
        password = args.password or getpass.getpass()
        print "logging in..."
        if not yga.login(args.username, password):
            print "Login failed"
            sys.exit(1)

    if not (args.email or args.files or args.photos or args.database):
        args.email = args.files = args.photos = args.database = True
    skip_existing = not args.overwrite

    with Mkchdir(args.group):
        if args.email:
            with Mkchdir('email'):
                archive_email(yga,
Exemplo n.º 8
0
            raise e
        coloredlogs.install(level=log_level, **log_format)
    else:
        log_stdout_handler = logging.StreamHandler(sys.stdout)
        log_stdout_handler.setLevel(log_level)
        log_stdout_handler.setFormatter(log_formatter)
        root_logger.addHandler(log_stdout_handler)

    cookie_jar = init_cookie_jar(args.cookie_file, args.cookie_t,
                                 args.cookie_y, args.cookie_e)

    headers = {}
    if args.user_agent:
        headers['User-Agent'] = args.user_agent

    yga = YahooGroupsAPI(args.group, cookie_jar, headers, delay=args.delay)

    if not (args.email or args.files or args.photos or args.database
            or args.links or args.calendar or args.about or args.polls
            or args.attachments or args.members):
        args.email = args.files = args.photos = args.database = args.links = args.calendar = args.about = \
            args.polls = args.attachments = args.members = True

    with Mkchdir(args.group, sanitize=False):
        log_file_handler = logging.FileHandler('archive.log')
        log_file_handler.setFormatter(log_formatter)
        root_logger.addHandler(log_file_handler)

        if args.warc:
            try:
                from warcio import WARCWriter
Exemplo n.º 9
0
        help=
        'Output WARC file of raw network requests. [Requires warcio package installed]'
    )

    p.add_argument('-v', '--verbose', action='store_true')

    p.add_argument('group', type=str)

    args = p.parse_args()

    if not args.verbose:
        log_stdout_handler.setLevel(logging.INFO)

    cookie_jar = init_cookie_jar(args.cookie_file, args.cookie_t,
                                 args.cookie_y, args.cookie_e)
    yga = YahooGroupsAPI(args.group, cookie_jar)

    if not (args.email or args.files or args.photos or args.database
            or args.links or args.calendar or args.about or args.polls
            or args.attachments or args.members):
        args.email = args.files = args.photos = args.database = args.links = args.calendar = args.about = \
            args.polls = args.attachments = args.members = True

    with Mkchdir(args.group):
        log_file_handler = logging.FileHandler('archive.log')
        log_file_handler.setFormatter(log_formatter)
        root_logger.addHandler(log_file_handler)

        if args.warc:
            try:
                from warcio import WARCWriter
Exemplo n.º 10
0
def test_not_authenticated_error(yahoo_response):
    r = yahoo_response('v1/groups/groupname/', status=307)
    yga = YahooGroupsAPI('groupname')
    with raises(yahoogroupsapi.NotAuthenticated):
        yga.HackGroupInfo()
    assert len(r.calls) == 15
Exemplo n.º 11
0
def test_correct_ua(yahoo_response):
    r = yahoo_response('v1/groups/groupname/', {})
    yga = YahooGroupsAPI('groupname', headers={'User-Agent': 'test'})
    yga.HackGroupInfo()
    assert r.calls[0].request.headers['user-agent'] == 'test'
Exemplo n.º 12
0
    po.add_argument('-i', '--photos', action='store_true',
            help='Only archive photo galleries')
    po.add_argument('-d', '--database', action='store_true',
            help='Only archive database')

    pe = p.add_argument_group(title='Email Options')
    pe.add_argument('-r', '--no-reattach', action='store_true',
            help="Don't reattach attachment files to email")
    pe.add_argument('-s', '--no-save', action='store_true',
            help="Don't save email attachments as individual files")

    p.add_argument('group', type=str)

    args = p.parse_args()

    yga = YahooGroupsAPI(args.group, args.cookie_t, args.cookie_y)
    if args.username:
        password = args.password or getpass.getpass()
        print "logging in..."
        if not yga.login(args.username, password):
            print "Login failed"
            sys.exit(1)

    if not (args.email or args.files or args.photos or args.database):
        args.email = args.files = args.photos = args.database = True

    with Mkchdir(args.group):
        if args.email:
            with Mkchdir('email'):
                archive_email(yga, reattach=(not args.no_reattach), save=(not args.no_save))
        if args.files:
Exemplo n.º 13
0
    pe.add_argument('-s',
                    '--no-save',
                    action='store_true',
                    help="Don't save email attachments as individual files")
    pe.add_argument('--html',
                    action='store_false',
                    help="Don't save the non-raw version of message")

    p.add_argument('group', type=str)

    args = p.parse_args()

    if not args.verbose:
        log_stdout_handler.setLevel(logging.INFO)

    yga = YahooGroupsAPI(args.group, args.cookie_t, args.cookie_y,
                         args.cookie_e)

    if not (args.email or args.files or args.photos or args.database
            or args.links or args.calendar or args.about or args.polls
            or args.attachments or args.members):
        args.email = args.files = args.photos = args.database = args.links = args.calendar = args.about = \
            args.polls = args.attachments = args.members = True

    with Mkchdir(args.group):
        log_file_handler = logging.FileHandler('archive.log')
        log_file_handler.setFormatter(log_formatter)
        root_logger.addHandler(log_file_handler)

        if args.email:
            with Mkchdir('email'):
                archive_email(yga, save=(not args.no_save), html=args.html)
Exemplo n.º 14
0
def test_not_authenticated_error(yahoo_response):
    r = yahoo_response('v1/groups/groupname/', status=307)
    yga = YahooGroupsAPI('groupname')
    with raises(yahoogroupsapi.Recoverable):    # Temporary fix, replaced: yahoogroupsapi.NotAuthenticated
        yga.HackGroupInfo()
    assert len(r.calls) == 15
Exemplo n.º 15
0
def main():
    args = parse_arguments()

    # Setup logging
    root_logger = logging.getLogger()
    root_logger.setLevel(logging.DEBUG)

    log_format = {
        'fmt': '%(asctime)s %(levelname)s %(name)s %(message)s',
        'datefmt': '%Y-%m-%d %H:%M:%S.%f %Z'
    }
    log_formatter = CustomFormatter(**log_format)

    if args.verbose:
        log_level = logging.DEBUG
    elif args.quiet:
        log_level = logging.ERROR
    else:
        log_level = logging.INFO
    if args.colour:
        try:
            import coloredlogs
        except ImportError as e:
            sys.exit(
                "Error: Coloured logging output requires the 'coloredlogs' package to be installed."
            )
        coloredlogs.install(level=log_level, **log_format)
    else:
        log_stdout_handler = logging.StreamHandler(sys.stdout)
        log_stdout_handler.setLevel(log_level)
        log_stdout_handler.setFormatter(log_formatter)
        root_logger.addHandler(log_stdout_handler)

    cookie_jar = init_cookie_jar(args.cookie_file, args.cookie_t,
                                 args.cookie_y, args.cookie_e)

    headers = {}
    if args.user_agent:
        headers['User-Agent'] = args.user_agent

    yga = YahooGroupsAPI(args.group, cookie_jar, headers, min_delay=args.delay)

    # Default to all unique content. This includes topics and raw email,
    # but not the full email download since that would duplicate html emails we get through topics.
    if not (args.email or args.files or args.photos or args.database
            or args.links or args.calendar or args.about or args.polls
            or args.attachments or args.members or args.topics or args.raw):
        args.files = args.photos = args.database = args.links = args.calendar = args.about = \
            args.polls = args.attachments = args.members = args.topics = args.raw = True

    with Mkchdir(args.group, sanitize=False):
        log_file_handler = logging.FileHandler('archive.log', 'a', 'utf-8')
        log_file_handler.setFormatter(log_formatter)
        root_logger.addHandler(log_file_handler)

        if args.warc:
            try:

                from warcio import WARCWriter
                fhwarc = open('data.warc.gz', 'ab')
                warc_writer = WARCWriter(fhwarc)
                warcmeta = warc_writer.create_warcinfo_record(
                    fhwarc.name, WARC_META_PARAMS)
                warc_writer.write_record(warcmeta)
                yga.set_warc_writer(warc_writer)
            except ImportError:
                logging.error(
                    'WARC output requires the warcio package to be installed.')
                exit(1)
        if args.overwrite:
            hacky_vars['file'] = True
        if args.email:
            with Mkchdir('email'):
                archive_email(yga,
                              message_subset=args.ids,
                              start=args.start,
                              stop=args.stop,
                              noAttachments=args.noattachments)
        if args.files:
            with Mkchdir('files'):
                archive_files(yga)
        if args.photos:
            with Mkchdir('photos'):
                archive_photos(yga)
        if args.topics:
            with Mkchdir('topics'):
                archive_topics(yga, noAttachments=args.noattachments)
        if args.raw:
            with Mkchdir('email'):
                archive_email(yga,
                              message_subset=args.ids,
                              start=args.start,
                              stop=args.stop,
                              skipHTML=True)
        if args.database:
            with Mkchdir('databases'):
                archive_db(yga)
        if args.links:
            with Mkchdir('links'):
                archive_links(yga)
        if args.about:
            with Mkchdir('about'):
                archive_about(yga)
        if args.polls:
            with Mkchdir('polls'):
                archive_polls(yga)
        if args.attachments:
            with Mkchdir('attachments'):
                archive_attachments(yga)
        if args.members:
            with Mkchdir('members'):
                archive_members(yga)
        if args.calendar:
            with Mkchdir('calendar'):
                archive_calendar(yga)
        if args.warc:
            fhwarc.close()