Пример #1
0
  def test_input_stream(self):

    task.push(FakeTask())
    
    input_stream = datasources.input_stream_for(None, 0, 'repo://dir1/doc1.txt', None)
    eq_('Hi mom!', input_stream[0].next())
    task.pop()
Пример #2
0
  def test_input_stream(self):

    task.push(FakeTask())
    
    input_stream = datasources.input_stream_for(None, 0, 'repo://dir1/doc1.txt', None)
    eq_('Hi mom!', input_stream.next().read())
    task.pop()
Пример #3
0
 def test_http(self):
   url = 'http://google.com/'
   
   source = datasources.source_for(url)
   assert isinstance(source, HTTPSource)
   urls = source.segment_between(datetime(2011,5,31), datetime(2011,6,1))
   eq_(len(urls),1)
 
   params = Params()
   input_stream = datasources.input_stream_for(None, None, urls[0], params)
Пример #4
0
    def test_http(self):
        url = 'http://google.com/'

        source = datasources.source_for(url)
        assert isinstance(source, HTTPSource)
        urls = source.segment_between(datetime(2011, 5, 31),
                                      datetime(2011, 6, 1))
        eq_(len(urls), 1)

        params = Params()
        input_stream = datasources.input_stream_for(None, None, urls[0],
                                                    params)
Пример #5
0
    def test_look_order(self):
      """
      Ensure input_stream_for returns None if a url/mimetype combo
      that hasn't been registered is used.
      """
      
      url = "bogus-scheme://example.com/foo"
      params = Params()
      params.content_type = "application/not-registered"
      
      stream = datasources.input_stream_for(None, None, url, params)

      eq_(stream, None)
Пример #6
0
        def test_look_order(self):
            """
      Ensure input_stream_for returns None if a url/mimetype combo
      that hasn't been registered is used.
      """

            url = "bogus-scheme://example.com/foo"
            params = Params()
            params.content_type = "application/not-registered"

            stream = datasources.input_stream_for(None, None, url, params)

            eq_(stream, None)
Пример #7
0
def map_input_stream(stream, size, url, params):
  """
  Looks up an input stream if one is registered, if not
  falls back to disco's defaults.
  
  """
  
  # achtung! warning! when this function is called by the disco
  # node the globals in this module will no longer be visible
  # hence why we access everything through datasources
  from disco.util import schemesplit
  import disco.func
  from triv.io import datasources
  from triv.io.task import task


  # Note: Task is a global set by disco, but not necsarrily seen by other object, 
  # we push it onto the context stack which will allow it to be imported by our
  # modules that need it

  try:
    task.push(Task)
  except NameError:
    # it's a test
    pass
  input_stream = datasources.input_stream_for(stream, size, url, params)

  if not input_stream:
    # we don't handle the given url, see if vanilla disco moduels can...
    
    try:
      # this is normally cleared when we're done iterating
      
      task.pop()
    except IndexError:
       pass
       
    input_stream = disco.func.map_input_stream(stream,size,url,params)

  # same code in classic/worker...
  if isinstance(input_stream, tuple):
    if len(input_stream) == 3:
      input_stream, size, url = input_stream
    else:
      input_stream, url = input_stream

  if hasattr(params, 'content_type'):
    input_stream = datasources.reader_for_mimetype(params.content_type)(input_stream,size,url,params)

  print "using input stream {}".format(input_stream)
  return input_stream
Пример #8
0
def map_input_stream(stream, size, url, params):
  from disco.util import schemesplit
  import disco.func
  from triv.io import datasources, task
  
  datasources.load()
  task.push(Task)
  input_stream = datasources.input_stream_for(stream, size, url, params)
  if input_stream:
    # Note: Task is a global set by disco, we push it onto the context stap
    # which will allow it to be imported by the modules that need it
    return input_stream
  else:
    # we don't handle the given url, see if vanilla disco moduels can
    task.pop() # this is normally cleared when we're done iterating
    return disco.func.map_input_stream(stream,size,url,params)
Пример #9
0
def map_input_stream(stream, size, url, params):
    from disco.util import schemesplit
    import disco.func
    from triv.io import datasources, task

    datasources.load()
    task.push(Task)
    input_stream = datasources.input_stream_for(stream, size, url, params)
    if input_stream:
        # Note: Task is a global set by disco, we push it onto the context stap
        # which will allow it to be imported by the modules that need it
        return input_stream
    else:
        # we don't handle the given url, see if vanilla disco moduels can
        task.pop()  # this is normally cleared when we're done iterating
        return disco.func.map_input_stream(stream, size, url, params)
 def __test_warc_mime_type(self):
   params = Params()
   input_stream = datasources.input_stream_for(None, None, segment[0], params)
Пример #11
0
    def __test_mock_url_stream(self):
        input_stream = datasources.input_stream_for(None, 0, urls[0], None)
        record = input_stream.next()

        self.assertSequenceEqual(record, (0, ['1', '2', '3']))
Пример #12
0
  def __test_mock_url_stream(self):
    input_stream = datasources.input_stream_for(None, 0, urls[0], None)
    record = input_stream.next()

    self.assertSequenceEqual(record, (0, ['1', '2', '3']))
 def __test_warc_mime_type(self):
     params = Params()
     input_stream = datasources.input_stream_for(None, None, segment[0],
                                                 params)