Пример #1
0
def index(url):
    '''
       View function to show the data in a human readable fashion
       @param string path:
          The url given in the browser
       @return string:
           Graph marked up in HTML
    '''

    dobj = {}
    if url is None:
        flash('You need to pass in a url')
    else: 
        path_pts = urlpath.split_path(url)
        ppath =''
        
        if urlpath.urlpath(path_pts[0]) is True:
            if not ppath:
                #ppath = urlpath.get_url(url)
                p = GraphParser()
                dobj = p.data_parse(url)
        else:
            flash('The url does not appear to be valid. Please check')  
    
    htmlstr = '<div id="parsedata">'
    for k in dobj:
        htmlstr += "<p>%s</p>" % k
        #for nk, nv in v.iteritems():
        #    htmlstr += '<p>----------</p><p>%s : %s</p>'% (nk, nv)
            #htmlstr += '<p>----------</p><p>'+str(nk)+' : '+str(nv)+'</p>'

    htmlstr += '</div>'
    return  htmlstr
Пример #2
0
    def __init__(self, phrase='', count = None):

        self.DG = self.create_graph()
        self.pp = GraphParser('settings/urdu-meter.yaml',blank=' ')
        self.lp = GraphParser('settings/long.yaml',blank='b')
        self.sp = GraphParser('settings/short.yaml',blank='b')
        self.components = []
        self.count = count

        if phrase!='':
            self.init_from_phrase(phrase)
Пример #3
0
    def __init__(self, phrase='', count = None):
        ''' Count refers to matra count; can be an int or list of ints'''
        self.DG = self.create_graph()
        self.pp = GraphParser('settings/urdu-meter.yaml',blank=' ') # token parser
        self.lp = GraphParser('settings/long.yaml',blank='b')  # long parser
        self.sp = GraphParser('settings/short.yaml',blank='b') # sort parser
        self.components = []

        if count:
            assert type(count) in [list,int]
            if type(count)=='int':
                count = [count]
            for x in count: assert x>0

        self.count = count

        if phrase!='':
            self.init_from_phrase(phrase)
Пример #4
0
class CustomMeterGraph(MeterGraph):

    def get_matra_count(x):
        y = 0
        for c in x:
            if x =='=': y+=2
            if x =='-': y+=1
        return y

    def __init__(self, phrase='', count = None):
        ''' Count refers to matra count; can be an int or list of ints'''
        self.DG = self.create_graph()
        self.pp = GraphParser('settings/urdu-meter.yaml',blank=' ') # token parser
        self.lp = GraphParser('settings/long.yaml',blank='b')  # long parser
        self.sp = GraphParser('settings/short.yaml',blank='b') # sort parser
        self.components = []

        if count:
            assert type(count) in [list,int]
            if type(count)=='int':
                count = [count]
            for x in count: assert x>0

        self.count = count

        if phrase!='':
            self.init_from_phrase(phrase)

    def transcription_of(self,inp, join_ch=''):
        ''' Provides transcription (into metrical units) of input
        '''
        p = self.pp.parse(inp)
        if p.matches:
            return join_ch.join([x.production for x in p.matches])
     
    def branch(self,syllables,ending=False, number_of_repeats=0, optional=False, weight=0, skip_if_matched=False):
        return Branch(syllables,ending,number_of_repeats,optional,weight,skip_if_matched)

    def create_graph(self):
        DG=nx.DiGraph()
        DG.add_node(0,type='0') # this is the start
        return DG
       # this is the graph scan.


    def add_graph_edge(self, curr_node_ids, new_node_id,optional=NOT_OPTIONAL,weight=0):
        DG = self.DG
        for curr_node_id in curr_node_ids:
            DG.add_edge(curr_node_id,new_node_id)
            curr_type = DG.node[curr_node_id]['type']
            new_type  = DG.node[new_node_id]['type']
            edge = DG[curr_node_id][new_node_id]
            if (curr_type,new_type) in settings.bad_types:
                edge['bad_combos'] = settings.bad_types[(curr_type, new_type)]
            if optional != NOT_OPTIONAL:
                edge['optional'] = optional  #allow preference for ignoring
            edge['weight'] = weight

    def end_nodes_of_component(self, component):
        component_type = type(component).__name__
        assert component_type in ['Fork','MeterSegment']
        if component_type=='Fork':
            end_nodes = [segment.end_node for segment in component.segments]
        else:
            end_nodes = [component.end_node]
        return end_nodes

    def add_fork(self,branches,optional=NOT_OPTIONAL, number_of_repeats=0):

        fork_number_of_repeats = number_of_repeats
        logging.debug('inside add_fork, number of repeats'+str(number_of_repeats)+str(branches))
        #repeats inside fork')

        assert type(branches)==list

        for b in branches: assert type(b).__name__=='Branch'

        DG = self.DG
        if len(self.components)==0:
            prev_nodes=[0] # start from the beginning

            prev_optional = NOT_OPTIONAL
        else: # there are previous components
            prev_component = self.components[-1]
            prev_nodes = self.end_nodes_of_component(prev_component)
            prev_optional = prev_component.optional

        fork = Fork(segments=[],optional=optional)
        branch_starts = []
        branch_ends = []
        for branch in branches:

            curr_nodes = prev_nodes #return to original nodes

            start_node = len(DG.nodes())
            branch_starts.append(start_node)
            last_node = start_node - 1

            syllables = branch.syllables
            ending = branch.ending
            number_of_repeats = branch.number_of_repeats

            for i,s in enumerate(syllables):

                new_node = len(DG.nodes())

                DG.add_node(new_node, type=s)

                if i==len(syllables)-1 and ending:
                    DG.node[new_node]['ending'] = True

                self.add_graph_edge(curr_nodes,new_node)

                curr_nodes = [new_node]

                if i==0 and prev_optional!=NOT_OPTIONAL: #TODO:allow for multiple optionals
                   last_optional = len(self.components)-2
                   optionals = []
                   l = last_optional
                   optionals=[l]
#                   while l>=0 and self.components[l].optional!=NOT_OPTIONAL:
#                       optionals.append(l)
#                       l=l-1
                   for o in optionals: #this might explode
                      #TODO: THIS MAY BE BUGGY.
                      assert o > -1
                      end_nodes = self.end_nodes_of_component(self.components[o])
                      self.add_graph_edge(end_nodes, start_node,optional=True)#self.components[o].optional)
                if i+1==len(syllables):
                    branch_ends.append(new_node)
            if number_of_repeats>0:
                pass
       #DO NOT ALLOW REPEATS ON BRANCHES
            #    if i+1 == (len(syllables)) and number_of_repeats > 0:
    #                print i, curr_node, start_node
      #              self.add_graph_edge(curr_nodes, start_node)
            m = MeterSegment(syllables=syllables, ending=ending,number_of_repeats=number_of_repeats,optional=optional,start_node=start_node, end_node=start_node+len(syllables)-1)
            fork.segments.append(m)
        if fork_number_of_repeats>0:
            logging.debug('repeats inside fork')
            for j in branch_starts:
                    self.add_graph_edge(branch_ends,j)

        self.components.append(fork)

    def add_segment(self,syllables, ending=False, number_of_repeats=0,optional=NOT_OPTIONAL):
        # get ends of previous nodes
        DG = self.DG
        start_node = len(DG.nodes()) # where this segment will start
        last_node = start_node - 1 # this is the last node in the graph

        if len(self.components)==0:
            prev_nodes=[0] # start from the beginning
            prev_optional = NOT_OPTIONAL
        else: # there are previous components
            prev_component = self.components[-1]
            prev_nodes = self.end_nodes_of_component(prev_component)
            prev_optional = prev_component.optional

        curr_nodes = prev_nodes

        for i,s in enumerate(syllables):

            new_node = len(DG.nodes())

            DG.add_node(new_node, type=s)

            if i==len(syllables)-1 and ending:
                DG.node[new_node]['ending'] = True

            self.add_graph_edge(curr_nodes,new_node)

            curr_nodes = [new_node]

            if i==0 and prev_optional!=NOT_OPTIONAL: #TODO:allow for multiple optionals
                last_optional = len(self.components)-2

                l = last_optional
                optionals = [l]
                for o in optionals: #this might explode
                    assert o >-2
                    if  o == -1:
                        end_nodes = [0]
                    else:
                        end_nodes = self.end_nodes_of_component(self.components[o])

                    self.add_graph_edge(end_nodes, start_node,optional=self.components[0].optional)

            if i+1 == (len(syllables)) and number_of_repeats > 0:
                self.add_graph_edge(curr_nodes, start_node)
        m = MeterSegment(syllables=syllables, ending=ending,number_of_repeats=number_of_repeats,optional=optional,start_node=start_node, end_node=start_node+len(syllables)-1)
        self.components.append(m)

    def graph_scan(self, in_string, parse='', ignore_skipping = False):
        #print 'in graph_scan'
        completed_scans = [] # holds complete scans
        if parse == '':
            parse = self.pp.parse(in_string) # holds output, matches
            scan_tokens = self.lp.tokenize(parse.output)
        else:
            scan_tokens = self.pp.tokenize(parse)
        logging.debug('parsed as %s',parse)
        # this generates scan_tokens from the scan of the input string, e.g. ['b','c','v'], using the long parser (lp)
        logging.debug('scan tokens %s',scan_tokens)
#        print 'scan_tokens',scan_tokens
        # this function descends into node (node_id), passing current token_i, matches, and a string represent
        DG = self.DG

        def descend_node(node_id, token_i, matches, matched_so_far):
            logging.debug('descending node_id'+str(node_id))
            import operator

            successors = self.DG.successors(node_id)  #edges([node_id])

            newlist = sorted(successors, key=lambda k: self.DG[node_id][k]['weight'])
            successors=newlist#.sort(key=operator.itemgetter('weight'))
            for successor_id in successors:
                #print ignore_skipping
                if ignore_skipping==False and 'skip_if_matched' in self.DG[node_id][successor_id] and len(completed_scans)>0:
                    logging.debug('********skipping!')
                    continue

                node_type = self.DG.node[successor_id]['type']
                assert node_type in ('=','-')

                if node_type=='=':
                      parser = self.lp
                else:
         #           print 'using sp'
                    parser = self.sp
                    if node_type=='-' and ignore_skipping==False and len(completed_scans)>0:
                        #if len(self.lp.match_all_at(scan_tokens,token_i))>1: # Long matches possible, so moving along
                            logging.debug('skipping wild shorts at node %d',successor_id)
                            continue
                if 'optional' in self.DG[node_id][successor_id]:# check the edge if it's optional
                    logging.debug('found an optional edge')

                for m in parser.match_all_at(scan_tokens, token_i):
                    #print '   matched ', m.tokens, m.production
                    # next, check to make sure that this is not a bad combination
                    # do so by looking for constraints on the edge
                    # note: this could be added as a constraint to match_all_at() as not_starting_with ...

                    if len(matches)>0: # if already matched something
 #                       print 'already matched'
                        a = matches[-1].found # details of previous match
                        b = m.production#**['rule']['production']   # details of current match
                        if 'bad_combos' in self.DG[node_id][successor_id]: # if
                             if (a,b) in self.DG[node_id][successor_id]['bad_combos']:
                                logging.debug('found bad combos %s',(a,b))
                                continue # abort! bad combination
                    orig_tokens =[]
                    for i in range(token_i, token_i+len(m.tokens)):
                        orig_tokens +=parse.matches[i].tokens

                    # generate node_ipa

                    node_ipa = u''

                    for tkn in orig_tokens:
                        if tkn in phonemes.phonemes:
                            node_ipa +=phonemes.phonemes[tkn]
                        else:
                            print 'could not find token',tkn,'in ',phonemes.phonemes
                    if node_ipa.endswith(u'ː̃'):#, node_ipa): # if nasal after long symbol, switch
                        node_ipa = node_ipa[0:-2]+u'̃ː'
                    if m.production.startswith('s_') and node_ipa.endswith(u'ː'):
                        node_ipa = node_ipa[0:-1]+u'ˑ'


                    # advance token index based on length of match tokens

                    # generate match data

                    matched_tokens = m.tokens


                    match_data = NodeMatch(node_type=node_type,
                                           matched_tokens = matched_tokens,
                                           node_id=node_id,
                                           orig_tokens=orig_tokens,
                                           ipa = node_ipa,
                                           found=m.production,
                                           token_i=token_i)

                    new_token_i = token_i + len(matched_tokens)

                    so_far=matched_so_far + node_type

                    curr_matches = list(matches)

                    curr_matches.append(match_data)

                    if new_token_i == len(scan_tokens):
                        logging.debug('AT THE END')
                        logging.debug(curr_matches)
                        logging.debug('node is %d%s',successor_id,self.DG.node[successor_id])


                        if 'ending' in self.DG.node[successor_id]:
                            logging.debug('AT THE END REALLY')

                            count_okay = True

                            if self.count:

                                count=0
                                for x in so_far:
                                    if x=='=': count+=2
                                    if x=='-': count+=1

                                count_okay = count in self.count

                            if count_okay == True:
                                completed_scans.append(ScanResult(scan=so_far, matches=curr_matches, meter_type='CUSTOM'))
                                match_node = successor_id
                            else: # count not okay
                                pass
                        else:
                            pass # doesn't match and at end, so don't continu
                    else:
                        descend_node(successor_id, new_token_i,curr_matches,so_far)
        descend_node(0, 0, [], '')
        return completed_scans



    def init_from_phrase(self,phrase):

        self.initial_phrase = phrase
        self.parse_meter(phrase)

    def parse_meter(self,phrase):

        x  = '(?:'
        x +=   '(?:'
        x +=     '(?P<required_group>\[.+?\])'+'|'
        x +=     '(?P<optional_group>\(.+?\))'
        x +=   ')'
        x +=   '(?P<repeated_group>\+)?'
        x += ')|'
        x += '(?P<regular>[=-]+)'

        my_re = re.compile(x)

        matches = [m for m in my_re.finditer(phrase)]

        endings = [False] * len(matches)


        optionals = [m.group('optional_group')!=None for m in matches]# in enumerate(matches) if m.group('optional_group')==None]

        ending_start = len(matches)-1

        while optionals[ending_start]==True:
            ending_start-=1
        for i in range(ending_start, len(endings)):
            endings[i]=True




        for i,m in enumerate(matches):

            if m.group('required_group') is not None or m.group('optional_group') is not None:

                optional_on = m.group('optional_group') is not None
                repeat_on = m.group('repeated_group') is not None
                if repeat_on:
                    phrase = m.group(0)[1:-2]
                else:
                    phrase = m.group(0)[1:-1]
                internal_groups = phrase.split('|')

                logging.debug('processing group '+phrase+' optional:'+str(optional_on)+' repeat:'+str(repeat_on))
#                print 'optional = ',optional_on,'repeat',repeat_on

                if optional_on:
                    optional_setting = OPTIONAL
                else:
                    optional_setting = NOT_OPTIONAL

                if repeat_on:
                    number_of_repeats = 3
                else:
                    number_of_repeats = 0

                ending = endings[i]
                #.set_trace()
                if len(internal_groups)==1:
                    self.add_segment(internal_groups[0], number_of_repeats=number_of_repeats, optional=optional_setting,
                                     ending=ending)

                elif len(internal_groups)>0:
                    branches = [ self.branch(j,ending=ending, number_of_repeats=number_of_repeats, weight=w) for w,j in enumerate(internal_groups)]
                    self.add_fork(branches, optional = optional_setting, number_of_repeats=number_of_repeats)
            else:
                self.add_segment(m.group(0), optional=NOT_OPTIONAL,ending=endings[i])
#               print 'non-group found', m.group(0)


    def get_scan_as_string(self,x):
        z = self.graph_scan(x)
        if z:
            return z[0].scan # add space for spreadsheet viewing
        else:
            return ''

    def get_all_scans_as_string(self,x, ignore_skipping = True, separator = '\n'):
        z = self.graph_scan(x, ignore_skipping = ignore_skipping)
        if z:
            return separator.join([x.scan for x in z])
        else:
            return ''    #join(z[0].scan)
Пример #5
0
    def __init__(self):

        self.DG = self.create_graph()
        self.pp = GraphParser('settings/urdu-meter.yaml',blank=' ')
        self.lp = GraphParser('settings/long.yaml',blank='b')
        self.sp = GraphParser('settings/short.yaml',blank='b')
Пример #6
0
class MeterGraph:
    
    def __init__(self):

        self.DG = self.create_graph()
        self.pp = GraphParser('settings/urdu-meter.yaml',blank=' ')
        self.lp = GraphParser('settings/long.yaml',blank='b')
        self.sp = GraphParser('settings/short.yaml',blank='b')

    def create_graph(self):
        DG=nx.DiGraph()
        
        DG.add_node(0,type='0') # this is the start
        
        for meter in settings.meters_with_feet:
            
            meter_type = settings.meters_with_feet[meter]
            meter_full_description = meter
            
            meter = meter.replace('/','') # ignore feet denominator for now
            
            node_id = 0
            
            curr_node = 0
            
            
            for i,c in enumerate(meter):
        
                found_it = False
        
                for n in DG.successors(curr_node):
        
                    node = DG.node[n]
        
                    if node['type']==c:
                        curr_node = n
                        found_it = True
                        break
        
                if found_it==False:
                    new_node = len(DG.nodes())            
                    DG.add_node(new_node, type=c)            
                    DG.add_edge(curr_node,new_node)
                    
                    # now add restraints to edge, this is a 'bad_combo' attribute 
                  #  print DG.node[curr_node]['type']
                    old_c = DG.node[curr_node]['type']
                    if (old_c,c) in settings.bad_types:
                        DG[curr_node][new_node]['bad_combos'] = settings.bad_types[(old_c, c)]
               #         print 'yes found ',old_c,c, bad_types[(old_c,c)]
                    curr_node = new_node
                if i==len(meter)-1: # store some metrical data at the last node
                    DG.node[curr_node]['meter_type'] = meter_type
                    DG.node[curr_node]['meter_full_description'] = meter_full_description
        return DG

# this is the graph scan.
     
    def graph_scan(self, in_string): 

        completed_scans = [] # holds complete scans
        
        #from collections import namedtuple
        
        # this is the scan of the input string to bcv, etc.
    #    scan = pp.parse(in_string)
        parse = self.pp.parse(in_string) # holds output, matches
        #pd = pp.parse_details # details on the matched tokens, rules, etc.
        
        # this generates scan_tokens from the scan of the input string, e.g. ['b','c','v'], using the long parser (lp)
        scan_tokens = self.lp.tokenize(parse.output)
       # print scan_tokens
     #   print 'scan_tokens are ',scan_tokens
        # This is a check to see that the short and long parsers match
        # TODO: remove later
        
    #    import collections
    #    assert collections.Counter(scan_tokens) == collections.Counter(lp.tokenize(scan))
        
        # this function descends into node (node_id), passing current token_i, matches, and a string represent
        def descend_node(node_id, token_i, matches, matched_so_far):
      
            for successor_id in self.DG.successors(node_id):
       #         print "  in successor ",successor_id, DG.node[successor_id]
    #            print scan_tokens
    #            print "\nDESCENDED INTO node, ", node_id, DG.node[node_id], "token_i ",token_i
    #            print "  So far ",matched_so_far
                node_type = self.DG.node[successor_id]['type']
                assert node_type in ('=','-')
                
                if node_type=='=': 
        #            print 'using lp'
                    parser = self.lp
                else:
         #           print 'using sp'
                    parser = self.sp
                
    #            print "  TRYING ",node_type, " from node ", node_id
                # for each match m at token_i of scan_tokens 
                # m contains ['tokens', 'start', 'rule_id', 'rule']
                # m['rule'] contain ['tokens', 'production']
                # TODO: declunkify.
        #       print ".. here , at ", token_i," of ", scan_tokens, "parser finds: ",parser.match_all_at(scan_tokens, token_i)
                for m in parser.match_all_at(scan_tokens, token_i):
    #                print '   matched ', m.tokens, m.production
                    # next, check to make sure that this is not a bad combination
                    # do so by looking for constraints on the edge
                    # note: this could be added as a constraint to match_all_at() as not_starting_with ...

                    if len(matches)>0: # if already matched something
                        a = matches[-1].found # details of previous match
                        b = m.production#**['rule']['production']   # details of current match 
                        if 'bad_combos' in self.DG[node_id][successor_id]: # if 'bad_combos' in the a,b's edge
   #                         print ' WARNING! BAD COMBO',a,b
   #                         print ' matches are ... '
    #                        print matches
                          #  print 'checking bad combos at ',node_id, successor_id
                          #  print 'trying ',(a,b,'in ',self.DG[node_id][successor_id]['bad_combos']

                            if (a,b) in self.DG[node_id][successor_id]['bad_combos']: # if it's bad
            
                          #      print '   aborting! found ',a,b
                                continue # abort! bad combination

                    # determine orig_tokens
                    # meaning, what is matched from original input and parsed to b,c,s, etc.
        
                    orig_tokens =[]
                    for i in range(token_i, token_i+len(m.tokens)):#['tokens'])):
                        #parse = pp.parse(in_string)
                        #print type(parse.matches[i].tokens)
                        orig_tokens.append(parse.matches[i].tokens)
    #                    orig_tokens.append(pd[i]['rule']['tokens'])  ## parser details here
                        # this will break if 'rule' is None
                        
    #                    except TypeError:
    #                        print 'error','i=',i
    #                        print 'pd[]i]',pd[i]
    #                        print 'error',m['tokens'], 'i',i
    #                        rule','tokens',"\n",m
     
                    # advance token index based on length of match tokens
                    
                    

                    
                    
                    
                    # generate match data
                    
                    matched_tokens = m.tokens
    #                print "   accepting ",matched_tokens
                    match_data = NodeMatch(node_type=node_type,
                                           matched_tokens = matched_tokens,
                                           node_id=node_id,
                                           orig_tokens=orig_tokens,
                                           found=m.production,
                                           token_i=token_i)
    #                print matches
    #                print match_data
        #            print match_data
                    # advance token_i 
                    
                    new_token_i = token_i + len(matched_tokens)
                    
                 ##   matches.append(match_data)
                    
                    so_far=matched_so_far + node_type
                    
                    #print ' so far',so_far
     
                    # if we're at the end
                    curr_matches = list(matches)
                    #print "curr = ",matches
                    curr_matches.append(match_data)
                    #print "~~~~\n",curr_matches
                    
                   # print curr_matches
                    if new_token_i == len(scan_tokens) and 'meter_type' in self.DG.node[successor_id]:
                        #and 'meter_type' in DG.node[s]:# and len(DG.successors(s))==0:
                        #print 'made it!', successor_id, DG.successors(successor_id),DG.node[successor_id], so_far
                        completed_scans.append(ScanResult(scan=so_far, matches=curr_matches, meter_type=self.DG.node[successor_id]['meter_type']))
                        #,"matches"]) # used for completed scans

                   #     for x in matches:
                   #         print x

                        match_node = successor_id
                        #print DG.node(match_node,data=True)#[match_node]
                    else:                  
                        descend_node(successor_id, new_token_i,curr_matches,so_far)
        # start descent into node 0 of the graph, at token_i 0, with no matches       
        descend_node(0, 0, [], '')

        return completed_scans

    def draw_graph(self):

        g = self.DG

        pos=nx.spring_layout(g)
    
        plt.figure(figsize=(15,15))

        labels=dict((n,d['type']) for n,d in g.nodes(data=True)) # need to change labels for 0,1,etc.

        nx.draw(g,labels=labels,node_color='#A0CBE2',node_size=200)