示例#1
0
 def test_get_file(self):
     import pycorpora
     data = pycorpora.get_file('pycorpora_test', 'test')
     self.assertEqual(type(data), dict)
     self.assertEqual(data['tests'], ["one", "two", "three"])
     subdata = pycorpora.get_file('pycorpora_test/subdir', 'another_test')
     self.assertIsNotNone(subdata)
     self.assertEqual(type(data), dict)
     self.assertEqual(data['tests'], ["one", "two", "three"])
示例#2
0
 def test_get_file(self):
     import pycorpora
     data = pycorpora.get_file('pycorpora_test', 'test')
     self.assertEqual(type(data), dict)
     self.assertEqual(data['tests'], ["one", "two", "three"])
     subdata = pycorpora.get_file('pycorpora_test/subdir', 'another_test')
     self.assertIsNotNone(subdata)
     self.assertEqual(type(data), dict)
     self.assertEqual(data['tests'], ["one", "two", "three"])
示例#3
0
 def test_cache(self):
     import pycorpora
     self.assertNotIn('data/pycorpora_test/test.json', pycorpora.cache)
     data = pycorpora.get_file('pycorpora_test', 'test')
     self.assertIn('data/pycorpora_test/test.json', pycorpora.cache)
     data = pycorpora.pycorpora_test.subdir.another_test
     self.assertIsNotNone(data)
     self.assertIn('data/pycorpora_test/subdir/another_test.json',
                   pycorpora.cache)
示例#4
0
 def test_cache(self):
     import pycorpora
     self.assertNotIn('data/pycorpora_test/test.json', pycorpora.cache)
     data = pycorpora.get_file('pycorpora_test', 'test')
     self.assertIn('data/pycorpora_test/test.json', pycorpora.cache)
     data = pycorpora.pycorpora_test.subdir.another_test
     self.assertIsNotNone(data)
     self.assertIn('data/pycorpora_test/subdir/another_test.json',
                   pycorpora.cache)
示例#5
0
def get_random_corpus():
    d = choice(pc.get_categories())
    dict = pc.get_file(d, choice(pc.get_files(d)))
    global title
    for k,v in dict.items():
        if k == 'description':
            if 'List' in v or 'list' in v:
                v = v.replace('list', 'Marriage')
                v = v.replace('List', 'Marriage')
                title = '##'+v
            else:
                title = '##The Marriage of '+v
        else:
            corpus = list(v)
    return title, corpus
示例#6
0
def main():
    # TODO: Replace with better, sex-separated data
    namor = SoundNamor(
        pycorpora.get_file('humans', 'firstNames')['firstNames'])

    try:
        while True:
            cmd = input('Generate name?')

            if cmd.lower() in ['n', 'no', 'q', 'quit']:
                break

            sex = random.choice(['F', 'M'])
            print(sex, ':', ''.join(namor.generate_name(sex)))
    except KeyboardInterrupt:
        pass
示例#7
0
def get_tarot_cards(astrological_sign: str) -> List[str]:
    if len(ASTRO_TAROT) == 0:
        tarots = pycorpora.get_file(
            'divination', 'tarot_interpretations')['tarot_interpretations']
        for tarot in tarots:
            signs: Dict[str, float] = {}
            for sign, traits in zip(
                    ZODIAC.values(),
                    map(get_astrological_traits, ZODIAC.values())):
                keywords = get_tarot_keywords(tarot['name'])

                avg: float = 0
                for keyword in keywords:
                    avg += sum(
                        map(lambda x: word_similarity(keyword, x), traits))
                avg /= len(keywords)

                signs[sign] = avg

            ASTRO_TAROT[min(signs.items(), key=lambda x: x[1])[0]] = tarot

    return ASTRO_TAROT[astrological_sign]
示例#8
0
     pycorpora.materials.get_file("natural-materials")["natural materials"]
 ],
 "drunkeness":
 pycorpora.words.states_of_drunkenness.get("states_of_drunkenness"),
 "personal_noun":
 pycorpora.words.personal_nouns.get("personalNouns"),
 "person_description":
 pycorpora.humans.descriptions.get("descriptions"),
 "occupation":
 pycorpora.humans.occupations.get("occupations"),
 "mood":
 pycorpora.humans.moods.get("moods"),
 "diagnosis":
 [c.get("desc") for c in pycorpora.medicine.diagnoses.get("codes")],
 "greek_titans":
 pycorpora.get_file("mythology/greek_titans")["greek_titans"],
 "vegetable":
 pycorpora.foods.vegetables.get("vegetables"),
 "fruit":
 pycorpora.foods.fruits.get("fruits"),
 "wine_taste":
 pycorpora.foods.wine_descriptions.get("wine_descriptions"),
 "condiment":
 [c.lower() for c in pycorpora.foods.condiments.get("condiments")],
 "knot":
 pycorpora.technology.knots.get("knots"),
 "unusual_thing": [
     "<+feature gemstone>#gemstone#</+>",
     "<+feature fauna>#common_animal#</+>",
     "<+feature fruit flora>#fruit#</+>",
     "<+feature vegetable flora>#vegetable#</+>"
示例#9
0
def get_astrological_traits(astrological_sign: str) -> List[str]:
    return pycorpora.get_file(
        'divination',
        'zodiac')['western_zodiac'][astrological_sign.capitalize()]['keywords']
示例#10
0
                avg: float = 0
                for keyword in keywords:
                    avg += sum(
                        map(lambda x: word_similarity(keyword, x), traits))
                avg /= len(keywords)

                signs[sign] = avg

            ASTRO_TAROT[min(signs.items(), key=lambda x: x[1])[0]] = tarot

    return ASTRO_TAROT[astrological_sign]


tarot_keywords = {
    x['name']: x['keywords']
    for x in pycorpora.get_file('divination', 'tarot_interpretations')
    ['tarot_interpretations']
}


@lru_cache()
def get_tarot_keywords(tarot_card: str) -> List[str]:
    return tarot_keywords[tarot_card]


nltk.download('names')

FIRST_NAMES_FEMALE = frozenset(
    names.words('female.txt') + list(Provider.first_names_female))

FIRST_NAMES_MALE = frozenset(
    names.words('male.txt') + list(Provider.first_names_male))
示例#11
0
    def __init__(self, input_texts: str):
        nltk.download('punkt')

        self.nlp = spacy.load('en_core_web_lg')

        self.summarizer = LsaSummarizer(Stemmer('english'))
        self.summarizer.stop_words = get_stop_words('english')

        self.cleaner = CleaningProcessor()

        self.synonyms: Dict[str, Optional[List[str]]] = {}
        if path.isfile('src/syns.yaml'):
            with open('src/syns.yaml', 'r') as f:
                self.synonyms = yaml.safe_load(f)

        if self.synonyms is None:
            self.synonyms = {}

        self.patterns: Dict[str, str] = OrderedDict()
        self.rev_patterns: Dict[str, str] = OrderedDict()

        with open('src/spreadr_shreddr/data.yaml', 'r') as f:
            data = yaml.safe_load(f)

        self.patterns.update(data['shorten'])
        self.patterns.update(data['expand'])

        data['filler'].extend(
            pycorpora.get_file('humans', 'prefixes')['prefixes'])

        self.patterns.update({k: '' for k in data['filler']})

        for obj in pycorpora.get_file('words', 'compounds')['compounds']:
            key = '{} {}'.format(obj['firstWord'], obj['secondWord'])
            if key not in self.patterns:
                self.patterns[key] = obj['compoundWord']

        self.patterns.update(
            {k.capitalize(): v.capitalize()
             for k, v in self.patterns.items()})

        self.brits = data['brit_am']
        self.murcans = {v: k for k, v in self.brits.items()}

        changed = False
        api = Datamuse()
        for text in input_texts:
            text >>= self.cleaner

            for sent in sent_tokenize(text):
                for index, word in enumerate(self.nlp(sent)):
                    orth = word.orth_.lower()
                    key = self.separator.join((orth, word.tag_))

                    if key not in self.synonyms:
                        changed = True
                        syns: List[str] = []

                        if (word.pos_ in UNIVERSAL_TO_DATAMUSE
                                and len(wn.synsets(orth)) <= 1):
                            res = api.words(ml=orth)

                            if len(res) > 0:
                                syns = self._get_synonyms(
                                    ' '.join(sent), (index, word), res)

                        if len(syns) > 1:
                            self.synonyms[key] = syns
                        else:
                            self.synonyms[key] = None

                    if changed:
                        changed = False
                        with open('src/syns.yaml', 'a') as f:
                            f.write(yaml.dump({key: self.synonyms[key]}))
def assemble():
    # actually make the thing
    global credits
    credits = {
        'nouns':[],
        'houses':[],
        'characters':[],
        'concepts':[],
        'raw_txt':'',
        'chapter_count':7,
        'chapter_titles':[],
        'character_icons':[]
    }
    

    page_counter = 6
    # first, make the chapters
    animals = pycorpora.get_file("animals","common")['animals']
    
    
    for chapter_counter in range(credits['chapter_count']):
        result = 0
        while (result == 0):
            animal = random.randrange(0,len(animals))
            result = stack(animals[animal], 55)
            del animals[animal]
            
        
        a_chapter = prepare_chapter(result,int(page_counter) + 2,int(chapter_counter) + 1)
        page_counter = a_chapter
        
        # to make a chapter
        
        # pick an animal to stack
        
        # make sure it created a valid set
        
        # prepare_chapter should return a number, the next chapter should start two pages after, so 10 + 2 = 12, e.g.
        
        
    # prepare the frontmatter
    # 1r = frontcover
    # 2l = blank
    # 3r = title page
    # 4l = dedication
    # 5r = toc
    # 6l = blank
    # 7r = introduction
    # 8l = blank, chapter 1 page 0
    
    booktitle = book_title()
    print "This book is called " + booktitle
    
    tpl("templates/frontcover.html","pages/00001r.html",[("book_title",booktitle)])
    
    # blank page
    tpl("templates/template.html","pages/00002l.html",[("","")])
    
    # title page
    tpl("templates/titlepage.html","pages/00003r.html",[("book_title",booktitle)])
    
    # dedication page
    kid_icons = '<div id="kids">'
    for kid in ["Cecily","Daniel","Serena","Wendy"]:
        a_kid = re.sub(r"style=\".+?\"","",get_icon(kid,"333333"))
        kid_icons += a_kid
    kid_icons += "</div>"
    tpl("templates/dedication.html","pages/00004l.html",[("kids",kid_icons)])
    
    # toc -- this one has to be a bit more manual
    toc = ''
    for ch in range(len(credits['chapter_titles'])):
        toc_string = '<div class="toc-entry"><span>Chapter ' + str(ch + 1) + '</span><span>' + credits['chapter_titles'][ch][0] + '</span><span>' + str(credits['chapter_titles'][ch][1] - 8) + '</span></div>'                  
        toc += toc_string
        
    toc += '<div class="toc-entry"><span>&nbsp;</span><span>Credits</span><span>' + str(int(page_counter) - 3) + '</span></div>'
    tpl("templates/toc.html","pages/00005r.html",[("toc",toc)])
    
    # blank page
    tpl("templates/template.html","pages/00006l.html",[("","")])
     
    # introduction
    housecount = quantify("house",amount=len(credits['chapter_titles']))
    peoplecount = quantify("person",amount=len(credits['characters']))
    
    tpl("templates/preface.html","pages/00007r.html",[("house_count",housecount),("people_count",peoplecount),("character_names", ", ".join(list(credits['characters']))),("word_count",str(len(re.compile(r" +").split(credits['raw_txt']))))])
    
    # make a The End
    tpl("templates/last_page.html","pages/" + str(int(page_counter) + 2).zfill(5) + ".html",[("character_names", ", ".join(list(credits['characters'])))]) 
    
    tpl("templates/the_end.html","pages/" + str(int(page_counter) + 3).zfill(5) + ".html",[("character_icons"," ".join(list(credits['character_icons'])))])

    # make the credits
    make_credits(credits,str(int(page_counter) + 2))
    
    print "Generation complete"
def prepare_chapter(content,startpage,chapter_number):
    
    global credits
    
    
    print content
    # put the list from stack() in reverse
    ordered = list(content[::-1])
    
    # this will hold the chapter text as it accumulates
    chapter = ''
    
    # this chooses a color palette for this chapter
    colors = pal()
       
    # pick a name for our main character
    jack = random.choice(pycorpora.get_file("humans","firstnames")['firstNames'])
    credits['characters'].append(jack)
    
    # get an image for this character
    jack_icon_first = get_icon(jack,random.choice(colors),random.choice(["man","girl","boy","woman","baby","child","grandmother","dude"]))
    
    jack_icon = re.sub(r"style=\".+?\"","",jack_icon_first)

    credits["character_icons"].append(jack_icon_first)
    
    # chapter object 
    chapter_object = content[0]

    
    # make a chapter title
    # this will make a title for the chapter
    chapter_title = str( jack + 
                        " and the " + 
                        str(chapter_object) + 
                        " that " + 
                        a(specify(pastify(content[1][0]))) )
    
    credits['chapter_titles'].append((chapter_title, startpage + 3))
    # loop through each concept in the stack
    for page in range(len(ordered)):
        
        print "Working on page " + str(page)
        
        
        # the page number
        page_number = str( startpage + ((page * 2) + 2) )
        
        # a color for this concept
        color = random.choice(colors)
        
        # I don't remember what this does but it's probably important later
        if (page == len(ordered) - 1):            
            the_concept = chapter_object
        else:
            the_concept = pastify(specify(ordered[page][0]))
            
            
        # isolate the current object
        the_thing = the_concept.split(" the ")[-1]
        
        # find an icon. This returns an <img> tag for the icon
        the_icon_first = get_icon(the_thing,color)
        the_icon = re.sub(r"style=\".+?\"","",the_icon_first)
        
        if (page == 0): # at the beginning of the chapter
            
            
            # make the blank page
            tpl("templates/template.html", "pages/" + str(startpage).zfill(5) + "l.html",[])
            
            # make the chapter title page 
            print "Making the title page "
            tpl("templates/chaptertitlepage.html",
                "pages/" + str(startpage + 1).zfill(5) + "r.html",
                [("chapter_number",str(chapter_number)),("chapter_title",chapter_title),("character_name",jack),("character_icon",str(jack_icon))])

            next_concept = ""
            # start the chapter string
            chapter = "that " + jack + " " + random.choice(["built.","built.","built.","built.","built.","built.","built.","built of brick.","divided into several rooms.","found in a neighborhood.","located on an estate."])
        else:
            # I don't know why this is going backwards?
            next_concept = "<span> that " + pastify(specify(ordered[page - 1][0])).split(" the ")[0] + " the </span>"
    
        page_content = [
            ("pn",str(int(page_number) - 6)),
            ("chapter_content",chapter),
            ("icon",the_icon),
            ("first_line","This is the <span class='page-object' style='color:#" + color + "'>" + the_thing + "</span>" + next_concept)      
        ]
        
        tpl("templates/template.html","pages/" + page_number.zfill(5) + "l.html",page_content)
        
        
        if (page == 0):           
            if (not os.path.isfile("images/house-" + str(chapter_number) + "-watercolor.jpg")):
                get_flickr_image("house",chapter_number)
                
            tpl("templates/rtemplate.html","pages/" + str(int(page_number) + 1).zfill(5) + "r.html",[("imagery",jack_icon_first + "<!-- imagery -->"),("pgbackground","<div class='pg' style='background-image: url(../images/house-" + str(chapter_number) + "-watercolor.jpg)'>")])
        elif (page >= 1):
            tpl ("pages/" + str(int(page_number) - 1).zfill(5) + "r.html",  "pages/" + str(int(page_number) + 1).zfill(5) + "r.html",[("imagery",the_icon_first + "<!-- imagery -->")])
        
        # prepend the next_concept variable to the chapter text before it loops again
        chapter = "<span class='page-object' style='color:#" + color + "'>" + the_thing + "</span>" + next_concept + chapter
        #raw_txt = str(chapter)
        raw_txt = re.sub('<[^<]+?>', '', chapter)
        credits['raw_txt'] += " This is the " + raw_txt
        
    return page_number
示例#14
0
            for l in syn.lemmas():
                ret.append(l.name().replace("_", " "))
        ret.append(w)
        synonyms[w] = list(set(ret))
    if len(synonyms[w]) == 0: return [w]
    return synonyms[w]


def synexp(ws):
    ax = []
    for w in ws:
        ax += syns(w)
    return list(set(ax))


gross = (pycorpora.get_file(
    "materials", "abridged-body-fluids")["abridged body fluids"] + [
        "matted hair", "rotting meat", "maggots", "feces", "used chewing gum",
        "teeth", "fingers", "spiders", "snakes", "flies", "wasps", "bees",
        "yellowjackets", "worms"
    ])
gross = synexp(gross)
materials = (
    ((pycorpora.get_file("materials", "layperson-metals")["layperson metals"] +
      pycorpora.get_file("materials", "natural-materials")["natural materials"]
      + pycorpora.materials["packaging"]["packaging"] + pycorpora.get_file(
          "materials", "plastic-brands")["plastic brands"]) * 5) +
    pycorpora.plants.flowers["flowers"] +
    pycorpora.humans.bodyParts["bodyParts"] +
    pycorpora.get_file("materials", "decorative-stones")["decorative stones"] +
    gross)
cities = (list(