from functools import partial
import os

from lxml import html, cssselect

from sync_media_function import sync_media
from metafuncs import branch, combine, maybe, tryit, getitem, cache, Pipe


# Support functions
is_not_none = lambda obj: obj is not None
unique = lambda sequence: list(set(sequence))
get_html = html.parse  # :: str -> ElementTree
img_tags = cssselect.CSSSelector('img')  # :: ElementTree -> List[Element]
get_src = maybe(getitem('src'),     # normal
            maybe(getitem('data-src'),  # photo pages
                maybe(getitem('data-srcset'))))  # home page
BACKGROUND_IMAGE_REGEX = re.compile(r'background-image: url\((.*?\))')
read_page = lambda url: urllib2.urlopen(url).read()
path_to_url = lambda path: "file://"+os.path.abspath(path)
PATH_SPLIT_REGEX = re.compile(r'^(/media)/(.*)/(.*?\..*?)$')
# PATH_SPLIT_REGEX = re.compile(r'/media/(.*)/(')
split_path_parts = lambda src: PATH_SPLIT_REGEX.match(src).groups()  # :: str -> PathPats
MEDIA_PATH_REGEX = re.compile(r'/media/')
is_media_path = lambda path: MEDIA_PATH_REGEX.search(path) is not None
fixup_cdn = lambda url: "http:"+url if url.startswith('//cdn') else url



def crop(prefix):
    def wrapped(word):
Exemplo n.º 2
0
from sync_media_function import sync_media
from metafuncs import branch, combine, maybe, tryit, getitem, cache

# Composable = Pipe
from monad.composable import Composable
import functools
F = functools.partial


# Support functions
is_not_none = lambda obj: obj is not None
unique = lambda sequence: list(set(sequence))
get_html = html.parse  # :: str -> ElementTree
img_tags = cssselect.CSSSelector('img')  # :: ElementTree -> List[Element]
get_src = maybe(getitem('src'),     # normal
            maybe(getitem('data-src'),  # photo pages
                maybe(getitem('data-srcset'))))  # home page
BACKGROUND_IMAGE_REGEX = re.compile(r'background-image: url\((.*?\))')
read_page = lambda url: urllib2.urlopen(url).read()
PATH_SPLIT_REGEX = re.compile(r'^(/media)/(.*)/(.*?\..*?)$')
# PATH_SPLIT_REGEX = re.compile(r'/media/(.*)/(')
split_path_parts = lambda src: PATH_SPLIT_REGEX.match(src).groups()  # :: str -> PathPats
MEDIA_PATH_REGEX = re.compile(r'/media/')
is_media_path = lambda path: MEDIA_PATH_REGEX.search(path) is not None

# Composite work-horse functions
# Retreive src-like properties from <img> tags
get_img_srcs = (
    Composable()  # :: Location
    >> cache(get_html)  # :: ElementTree
    >> img_tags  # :: List[Element]