Exemplo n.º 1
0
    def parse(self, response):
        sel = Selector(response)
        current_url = response.url

        # 详情页分析
        for detail_link in CONFIG['detail_link_rule']:
            if is_match(current_url, detail_link):
                item = LiepinItem()
                # items接收list,防止出现index out of range
                item['url'] = current_url
                item['name'] = sel.xpath('//div[@class="title-info "]/h1/text()').extract()  # [0]
                item['company_name'] = sel.xpath('//div[@class="title-info "]/h3/a/text()').extract()  # [0]
                item['company_size'] = sel.xpath('//div[@class="right-post-top"]/div/text()').extract()  # [4]
                item['company_address'] = sel.xpath('//div[@class="right-post-top"]/div/text()').extract()  # [-1]
                item['company_type'] = sel.xpath('//div[@class="right-post-top"]/div/text()').extract()  # [-3]
                item['pay'] = sel.xpath('//p[@class="job-main-title"]/text()').extract()  # [0]
                item['publish_time'] = sel.xpath('//p[@class="basic-infor"]/span[2]/text()').extract()  # [-1]
                item['requires'] = sel.xpath('//div[@class="resume clearfix"]/span/text()').extract()
                yield item

        # 过滤出所有的列表页和详情页进行回调。
        for url in sel.xpath('//a/@href').extract():
            url = urljoin(current_url, url)

            for list_link in CONFIG['list_link_rule']:
                if is_match(url, list_link):
                    # log.msg('list_url: %s' % url, level=log.INFO)
                    yield Request(url, callback=self.parse)

            for detail_link in CONFIG['detail_link_rule']:
                if is_match(url, detail_link):
                    # log.msg('detail_url: %s' % url, level=log.INFO)
                    yield Request(url, callback=self.parse)
Exemplo n.º 2
0
    def _match_song(self, song):
        query = f'{song.title} {song.artist}'
        results = self._api.search(query)

        hits = [song_hit['track'] for song_hit in results.get('song_hits')]
        # Check if song is a match
        for hit in hits:
            if is_match(hit['title'], song.title) and is_match(
                    hit['artist'], song.artist):
                return hit['storeId']

        return None
Exemplo n.º 3
0
def search_topic(args):
    results = []

    query = args["query"]
    difficulty_api = "difficulty=" + args["difficulty"] if args[
        "difficulty"] else ""
    difficulty = "[%s]" % args["difficulty"] if args["difficulty"] else ""

    for k, v in LC_TOPICS.items():
        if is_match(query, k):
            title = "%s %s" % (v[0], difficulty)
            subtitle = "Search LeetCode Topic '%s' %s" % (v[0], difficulty)
            url = "{0}?topicSlugs={1}&{2}".format(LEETCODE_URL, v[1],
                                                  difficulty_api)

            results.append(SearchResult(title, subtitle, url))

    if not results:
        title = query
        subtitle = "Search LeetCode for '%s'" % query
        url = "{0}?search={1}".format(LEETCODE_URL, query)

        results.append(SearchResult(title, subtitle, url))

    return results
Exemplo n.º 4
0
    def test_is_match_same(self):
        """ Tests is_match() for 2 string"""

        actual = utils.is_match('1', "1")
        expected = True

        self.assertEqual(actual, expected)
Exemplo n.º 5
0
    def find(self, prop, query, output_prop) -> [str, list]:
        if query:
            result = list(
                filter(lambda x: prop in x and utils.is_match(x[prop], query),
                       self.data))

            if len(result) == 0:
                return ""
            elif len(result) == 1:
                print(result)
                return result[0][output_prop]
            else:
                return list(map(lambda x: x[output_prop], result))
        else:
            return ""
Exemplo n.º 6
0
def match_song(artist, track):
    """
        Performs a Google Music search for the given artist and track.
        It then returns the first song search result that exceeds
        a certain threshold for fuzzy string matching on the artist and track.

    :param artist:  A string representing the name of the artist that should
                    be searched for and matched against.
    :param track:   A string representing the track name that should be searched
                    for and matched against.
    :return:    Either the Google Music store listing id for the first eligible search result.
                or None in the instance that no appropriate tracks could be found.
    """
    query = track + " " + artist

    search_results = gmusic.search(query)

    song_hits = [song_hit['track'] for song_hit in search_results['song_hits']]

    for hit in song_hits:
        if (is_match(hit['title'], track) and is_match(hit['artist'], artist)):
            return hit['storeId']

    return None
Exemplo n.º 7
0
def plain_search_match(search):
    """
       Performs a Google Music search for the given plaintext query.
       :return:    Either the Google Music store listing id for the first eligible search result.
                   or None in the instance that no appropriate tracks could be found.
       """
    search_results = gmusic.search(search)

    song_hits = [song_hit['track'] for song_hit in search_results['song_hits']]

    for hit in song_hits:
        if (is_match(hit['title'], search)):
            return hit['storeId']

    return None
Exemplo n.º 8
0
def TarIt(source, destDir, excludes=[]):
	""" take directory, tar it, put it in destination
		folder, file name generated from directory, replacing
		special characters with underscores; one success,
		return name of archive created """

	now = datetime.datetime.now()
        destFile = os.path.join(
            destDir, 
            socket.gethostname() + '-' + source.replace('/','_').replace(' ','_') + '-' + now.strftime('%Y%m%d-%s') + '.tar.gz'
        )
	out = tarfile.open(destFile, 'w:gz')
        if excludes:
            # exclude if we told it to exclude it or
            out.add(source, recursive=True, exclude = lambda x: True if (utils.is_match(x, excludes) or not utils.can_read(x)) else False)
        else:
            out.add(source, recursive=True)
        
	print('Successfully created archive %s' % destFile)
	return destFile
Exemplo n.º 9
0
output_folder = os.path.join(path, output)
if not output_folder in folders:
    try:
        os.mkdir(output_folder)
    except FileExistsError as err:
        print(f"Folder already exists {output_folder}... {err}")

# moved files
checked_files = []
moved_files_counter = 0

# go through all files
for doc in docs:
    print(f"> Looking for {doc.name}")
    matches = [
        file for file in docs
        if is_match(doc, file) and file not in checked_files
    ]
    checked_files.append(doc)
    if len(matches) == 0:
        print("Not matches")
        continue
    for match in matches:
        checked_files.append(match)
        new_doc_path = os.path.join(output_folder, match.name) + doc.filetype
        os.rename(doc.full_path, new_doc_path)
        moved_files_counter += 1
        print(f"\tMoved file {match.name} to {output}")

print(f"Moved {moved_files_counter} of {len(docs)} elements")
Exemplo n.º 10
0
                   help='Cuts common to both signal & background')
group.add_argument('--cuts_bkg', action='store_true', help='Background only cuts')
group.add_argument('--cuts_sig', action='store_true', help='Signal only cuts')
opts = optparser.parse_args()

import sys
import os
if not os.path.exists(opts.conf):
    sys.exit('File not found: {}'.format(opts.conf))

from tmvaconfig import ConfigFile
conf = ConfigFile(opts.conf)
n = conf.read()

# filter out when not a match or invalid
sessions = filter(lambda s: is_match(s, opts.sessions), conf.sessions())

if not sessions:
    sys.exit('No matching sessions!')
print ' '.join(sessions)

if not (opts.cvars or opts.nvars or opts.cuts_both or opts.cuts_sig
        or opts.cuts_bkg):
    sys.exit(0)

# helper
one_per_line = lambda i: '\n'.join(i)

for session in sessions:
    config = conf.get_session_config(session)
    print '\n{}:'.format(config._name)
Exemplo n.º 11
0
    def test_is_match_bool(self):
        """ Tests is_match() for 1 string and 1 bool"""
        actual = utils.is_match("true", True)
        expected = True

        self.assertEqual(actual, expected)
Exemplo n.º 12
0
    def test_is_match_mix(self):
        """ Tests is_match() for 1 integers and 1 string"""
        actual = utils.is_match(1, "1")
        expected = True

        self.assertEqual(actual, expected)
Exemplo n.º 13
0
                   help='Background only cuts')
group.add_argument('--cuts_sig', action='store_true', help='Signal only cuts')
opts = optparser.parse_args()

import sys
import os
if not os.path.exists(opts.conf):
    sys.exit('File not found: {}'.format(opts.conf))

from tmvaconfig import ConfigFile

conf = ConfigFile(opts.conf)
n = conf.read()

# filter out when not a match or invalid
sessions = filter(lambda s: is_match(s, opts.sessions), conf.sessions())

if not sessions:
    sys.exit('No matching sessions!')
print ' '.join(sessions)

if not (opts.cvars or opts.nvars or opts.cuts_both or opts.cuts_sig
        or opts.cuts_bkg):
    sys.exit(0)

# helper
one_per_line = lambda i: '\n'.join(i)

for session in sessions:
    config = conf.get_session_config(session)
    print '\n{}:'.format(config._name)
Exemplo n.º 14
0
parser = argparse.ArgumentParser(description="Script that orders alphabetically a typescript file with types on it")
parser.add_argument("--file", type=str, help="File containing the typescript's types")
parser.add_argument("--output", type=str, default="orderTypes.ts",help="File containing the ordered types")

args = parser.parse_args()
file_name = args.file
output_file_name = args.output

original_file = open(file_name, "r")
lines = original_file.readlines()
output_file = open(output_file_name, "w+")

list_types = []

for line in lines:
    if line != "\n":
        if is_match(line):
            list_types.append(TypescriptModel(line))
        else:
            list_types[len(list_types) - 1].push_property(line)

list_types.sort(key=lambda x: x.name) # order the list of objects by an attribute

for model in list_types:
    output_file.write(model.name)
    for prop in model.properties:
        output_file.write(prop)
    output_file.write(os.linesep)

print(f"Done! {output_file_name} created.")