Пример #1
0
def __main__():
    def get_content():
        try:
            return open(FILE)
        except FileNotFoundError as e:
            resp = requests.get(URL)
            f = open(FILE, 'wb')
            f.write(resp.content)
            print(resp.content)
            f.close()
            return resp.content
        return None

    c = get_content()
    soup = bs4.BeautifulSoup(get_content(), "lxml")
    lis = soup.select('ul#container > li')
    events = [li_to_event(li) for li in lis]
    c = UnscrollClient()

    #    W3C Web Standards
    TITLE = 'W3C Web Standards'
    favthumb = c.cache_thumbnail(
        'https://2.bp.blogspot.com/-70GFD8HsG3I/VMKLC7IoiBI/AAAAAAAAIck/GCu0LIY3PCU/s1600/Logo%2BW3C.png'
    )
    c.delete_scroll_with_title(TITLE)
    c.__batch__(scroll_title=TITLE, thumbnail=favthumb['url'], events=events)
    print(len(events))
Пример #2
0
def create(newsgroup, dir, maxyear):
    _title = '{}'.format(newsgroup)
    api = UnscrollClient()
    api.delete_scroll_with_title(_title)
    favthumb = api.cache_thumbnail(THUMBNAIL_URL)
    with_thumbnail = favthumb.get('url')
    scroll = api.create_or_retrieve_scroll(
        _title,
        description='Usenet message board archives',
        link='https://archive.org/details/usenethistorical',
        with_thumbnail=favthumb['url'], 
        subtitle='Collection via Usenet Historical Collection',        
    )
    newsgroup_to_events(newsgroup, scroll, api, dir, maxyear)
Пример #3
0
def load_data(begin=None,
              end=None,
              title=None,
              slug=None,
              thumbnail_url=None,
              delete=False):

    # Get the file listing
    _link = 'https://archive.org/details/{}'.format(slug)
    _r = requests.get('https://archive.org/metadata/{}'.format(slug))
    _data = _r.json()

    # Get metadata
    _md = _data.get('metadata')

    _title = title
    if (title is None):
        _title = _md.get('title')

    _description = _md.get('description')

    _events = extract_events(_data, 'MP3', _link, begin, end)

    _thumbnail_url = thumbnail_url
    if (thumbnail_url is None):
        _thumbnail_url = extract_thumbnail(_data)

    api = UnscrollClient()
    _thumb = api.cache_thumbnail(_thumbnail_url)
    _with_thumbnail = _thumb.get('url')

    if delete is True:
        api.delete_scroll_with_title(_title)

    print('XXXXXXX{}'.format(_title))
    scroll = api.create_or_retrieve_scroll(
        _title,
        subtitle='via Archive.org',
        public=True,
        description=_description,
        link=_link,
        citation='',
        with_thumbnail=_with_thumbnail,
    )

    for event in _events:
        pprint.pprint(event)
        j = api.create_event(event, scroll)
        pprint.pprint(j.json())
Пример #4
0
def __main__():

    c = UnscrollClient()
    c.login()    
    c.delete_scroll_with_title('Amazon PR')

    thumbnail = 'http://media.corporate-ir.net/media_files/IROL/17/176060/img/logos/amazon_logo_RGB.jpg'
    favthumb = c.cache_thumbnail(thumbnail)

    scroll = c.create_or_retrieve_scroll('Amazon PR',
                                         description='A set of press releases from the Amazon Press Room.',
                                         link='http://phx.corporate-ir.net/phoenix.zhtml?c=176060&p=irol-news&nyo=0',
                                         citation='Amazon Press Room',
                                         with_thumbnail=favthumb.get('url'))
    print(scroll)

    get_releases(c, scroll)
Пример #5
0
def __main__():
    events = []
    title = 'IETF RFCs'
    c = UnscrollClient()
    c.delete_scroll_with_title('IETF RFCs')
    favthumb = c.cache_thumbnail(
        'https://ietf.org/media/images/ietf-logo.original.png')

    # Load RFCs
    read = ''
    with open('cache/rfc/rfc-index.xml', 'r') as f:
        read = f.read()
    parsed = xmltodict.parse(read)
    docs = parsed['rfc-index']['rfc-entry']
    events = [rfc_to_event(x) for x in docs]

    # Do it
    scroll = c.__batch__(scroll_title=title,
                         thumbnail=favthumb['url'],
                         events=events)
    print(len(events))
Пример #6
0
def __main__():

    scroll_thumb = "https://upload.wikimedia.org/wikipedia/commons/0/0b/Studs_Terkel_-_1979-1.jpg"
    api = UnscrollClient()
    title = "Studs Terkel Interviews"
    favthumb = api.cache_thumbnail(scroll_thumb)
    with_thumbnail = favthumb.get('url')

    api.delete_scroll_with_title(title)

    scroll = api.create_or_retrieve_scroll(
        title,
        description='<b>Via the Studs Terkel Radio Archive at WFMT</b>: '
        'In his 45 years on WFMT radio, Studs Terkel talked to the 20th '
        'century’s most interesting people.',
        link='https://studsterkel.wfmt.com/',
        with_thumbnail=with_thumbnail,
        subtitle='Collection via WFMT',
    )

    post_shows(api, scroll)
Пример #7
0
def save_met():

    c = UnscrollClient()
    c.login()
    c.delete_scroll_with_title('The Met')
    scroll = c.create_or_retrieve_scroll('The Met')
    s = requests.Session()

    conn = sqlite3.connect('/home/unscroll/cache/met.db')
    conn.row_factory = sqlite3.Row

    sqlc = conn.cursor()

    sqlc.execute("SELECT * FROM collection LIMIT -1 OFFSET 0")

    for row in sqlc.fetchall():

        ud = UnscrollDate(row['date'], begin=-2000, end=2018)

        if ud.is_okay():
            with_thumbnail = None
            found = False
            img = row['image']
            local_img = re.sub(r'https?://images.metmuseum.org/', '', img)

            medium = ''
            if 'medium' in row:
                medium = ' ({})'.format(row['medium'])

            if img is not None and row['date'] is not None:
                local = '/home/unscroll/cache/met-images/{}'.format(
                    local_img, )
                if file_exists(local):
                    thumb = c.post_thumbnail(local)
                    if thumb is not None:
                        with_thumbnail = thumb.get('url')

                        d = {
                            'title':
                            row['title'] + medium,
                            'text':
                            row['description'],
                            'resolution':
                            ud.resolution,
                            'ranking':
                            0,
                            'content_url':
                            'https://www.metmuseum.org{}'.format(row['url'], ),
                            'with_thumbnail':
                            with_thumbnail,
                            'source_name':
                            'The Met',
                            'source_url':
                            'https://www.metmuseum.org/',
                            'when_happened':
                            ud.when_happened,
                            'when_original':
                            ud.when_original
                        }
                        e = c.create_event(d, scroll)
                        print(e)