Exemplo n.º 1
0
 def setUp(self):
     rosie.load(librosiedir, quiet=True)
     self.engine = rosie.engine()
     self.assertTrue(self.engine)
     ok, pkgname, errs = self.engine.import_pkg(b'net')
     self.assertTrue(ok)
     self.net_any, errs = self.engine.compile(b'net.any')
     self.assertTrue(self.net_any)
Exemplo n.º 2
0
 def __init__(self):
     # Check to see if user prefers to use their own installation
     # of rosie, or one already installed on their system.
     rosie_home = os.getenv('ROSIE_HOME')
     if rosie_home:
         rosie.load(os.path.join(rosie_home, 'src/librosie/local'))
     self.engine = rosie.engine()
     self.engine.import_pkg(b'all')
     self.engine.import_pkg(b'csv')
     self.csv_pattern, errs = self.engine.compile(b'csv.comma')
     self.all_pattern, errs = self.engine.compile(b'all.things')
Exemplo n.º 3
0
 def loadRosieEngine(self):
     librosiedir = './lib'
     rosie.load(librosiedir, quiet=True)
     engine = rosie.engine()
     engine.import_package("date")
     self.date_patterns = engine.compile("date.any")
     engine.load("short_year = [0-9]{ 2 }")
     self.date_check = [
         engine.compile(
             "{ { \"APR\" / \"OCT\" / \"MARCH\" / \"SEPT\" } [:digit:]* }"),
         engine.compile(
             "{ { date.month { \"-\" / [/] } short_year} / { date.day { \"-\" / [/] } date.month } }"
         )
     ]
Exemplo n.º 4
0
def readFile():
    librosiedir = './lib'
    rosie.load(librosiedir, quiet=True)
    engine = rosie.engine()
    package = engine.import_package('all')
    allPackages = ['ts', 'date', 'time', 'net', 'num', 'id', 'word']
    arrayOfPatterns = None
    for pack in allPackages:
        engine.import_package(pack)
    with open('pattern.csv', 'r') as csvfile:
        fileRead = csv.reader(csvfile)
        rowOne = next(fileRead)
        cols = len(rowOne)
        arrayOfPatterns = [set() for j in range(0, cols)]
        for row in fileRead:
            for i in range(0, cols):
                element = row[i]
                stringBuilder = ""
                match = None
                b = None
                if len(arrayOfPatterns[i]) > 0:
                    for each in arrayOfPatterns[i]:
                        if (match != None):
                            break
                        stringBuilder += each + '/'
                        b = engine.compile(each)
                        match = b.fullmatch(element)
                if match == None:
                    stringBuilder = 'all.thing'
                    b = engine.compile('all.thing')
                    match = b.fullmatch(element)
                if match is None:
                    print('NO MATCH', element)
                    continue
                best_match = None
                if stringBuilder == 'all.thing':
                    best_match = rosieSub(match.rosie_match)
                    print(best_match['type'], ',', best_match['data'])
                if best_match != None and best_match['type'] == 'all.thing':
                    continue
                elif best_match != None:
                    arrayOfPatterns[i].add(best_match['type'])
                    print(arrayOfPatterns)

            print()

    return arrayOfPatterns
Exemplo n.º 5
0
 def date_format(element):
     #print(element)
     librosiedir = './lib'
     rosie.load(librosiedir, quiet=True)
     engine = rosie.engine()
     engine.import_package("date")
     date_patterns = engine.compile("date.any")
     match = date_patterns.fullmatch(element).rosie_match
     type_of_format = match['subs'][0]['type']
     #print(type_of_format)
     if type_of_format == "date.us_long":
         #print("us long")
         if match['subs'][0]['subs'][0]['type'] == "date.day_name":
             return match['subs'][0]['subs'][1]['data'] + " " + match[
                 'subs'][0]['subs'][2]['data'] + ", " + match['subs'][
                     0]['subs'][3]['data']
         else:
             return match['subs'][0]['subs'][0]['data'] + " " + match[
                 'subs'][0]['subs'][1]['data'] + ", " + match['subs'][
                     0]['subs'][2]['data']
     elif type_of_format == "date.eur":
         #print("europe")
         return match['subs'][0]['subs'][1]['data'] + "/" + match[
             'subs'][0]['subs'][0]['data'] + "/" + match['subs'][0][
                 'subs'][2]['data']
     elif type_of_format == "date.spaced":
         #print("spaced")
         return element.replace(" ", "/")
     elif type_of_format == "date.spaced_en" or (
             type_of_format == "date.rfc2822" and "," not in element):
         #print("eng or rfc")
         return match['subs'][0]['subs'][1]['data'] + " " + match[
             'subs'][0]['subs'][2]['data'] + ", " + match['subs'][0][
                 'subs'][0]['data']
     elif type_of_format == "date.rfc2822" and "," in element:
         #print("big rfc")
         index = element.rindex(",")
         return date_format(element[index + 1:].strip())
     elif type_of_format == "date.us_short":
         #print(match['subs'][0]['subs'])
         return match['subs'][0]['subs'][1]['data'] + " " + match[
             'subs'][0]['subs'][0]['data'] + ", " + match['subs'][0][
                 'subs'][2]['data']
     return element
Exemplo n.º 6
0
 def setUp(self):
     rosie.load(librosiedir, quiet=True)
     self.engine = rosie.engine()
Exemplo n.º 7
0
 def test(self):
     rosie.load(librosiedir, quiet=True)
     engine = rosie.engine()
     assert (engine)
     path = rosie.librosie_path()
     assert (path)