# 使用70行python代码实现一个递归下降解析器的教程

‘*’:’mul’, ‘/’:’mul’,
‘(‘:’lpar’, ‘)’:’rpar’}
token = namedtuple(‘token’, [‘name’, ‘value’])

split_expr = re.findall(‘[\d.]+|[%s]’ % ”.join(token_map), expr)
tokens = [token(token_map.get(x, ‘num’), x) for x in split_expr]

‘1.2 / ( 11+3)’ –> [‘1.2’, ‘/’, ‘(‘, ’11’, ‘+’, ‘3’, ‘)’]

[‘1.2’, ‘/’, ‘(‘, ’11’, ‘+’, ‘3’, ‘)’]
->
[token(name=’num’, value=’1.2′), token(name=’mul’, value=’/’), token(name=’lpar’, value='(‘), token(name=’num’, value=’11’), token(name=’add’, value=’+’), token(name=’num’, value=’3′), token(name=’rpar’, value=’)’)]

mul: mul mul atom | atom;
atom: num | ‘(‘ add ‘)’ | neg;
neg: ‘-‘ atom;

(如果您还不理解上述语法，请阅读我之前发表的文章)

rule_map = {
‘mul’ : [‘atom mul mul’, ‘atom’],
‘atom’: [‘num’, ‘lpar add rpar’, ‘neg’],
}

lr版本使用了左递归的模式。当ll解析器遇到递归的时候，它会尝试去匹配规则。所以，当左递归发生是，解析器会进入无穷递归。甚至连聪明的ll解析器例如antlr也逃避不了这个问题，它会以友好的错误提示代替无穷的递归，而不像我们这个玩具解析器那样。

rulematch = namedtuple(‘rulematch’, [‘name’, ‘matched’])
def match(rule_name, tokens):
if tokens and rule_name == tokens[0].name: # 是否匹配标识?
return rulematch(tokens[0], tokens[1:])
for expansion in rule_map.get(rule_name, ()): # 是否匹配规则?
remaining_tokens = tokens
matched_subrules = []
for subrule in expansion.split():
matched, remaining_tokens = match(subrule, remaining_tokens)
if not matched:
break # 运气不好，跳出循环，处理下一个扩展定义!
matched_subrules.append(matched)
else:
return rulematch(rule_name, matched_subrules), remaining_tokens
return none, none # 无匹配结果

>>> tokens = [token(name=’num’, value=’1.2′), token(name=’mul’, value=’/’), token(name=’lpar’, value='(‘), token (name=’num’, value=’11’), token(name=’add’, value=’+’), token(name=’num’, value=’3′), token(name=’rpar’, value=’)’)]
(rulematch(name=’add’, matched=[rulematch(name=’mul’, matched=[rulematch(name=’atom’, matched=[token(name=’num’, value=’1.2′)]), token(name=’mul’, value=’/’), rulematch(name=’mul’, matched=[rulematch(name=’atom’, matched=[token(name=’lpar’, value='(‘), rulematch(name=’add’, matched=[rulematch(name=’mul’, matched=[rulematch(name=’atom’, matched=[token(name=’num’, value=’11’)])]), token(name=’add’, value=’+’), rulematch(name=’add’, matched=[rulematch(name=’mul’, matched=[rulematch(name=’atom’, matched=[token(name=’num’, value=’3′)])])])]), token(name=’rpar’, value=’)’)])])])]), [])

mul
atom
num ‘1.2’
mul ‘/’
mul
atom
lpar ‘(‘
mul
atom
num ’11’
mul
atom
num ‘3’
rpar ‘)’

mul
atom
num 8
mul ‘/’
mul
atom
num 4
mul ‘/’
mul
atom
num 2

def _recurse_tree(tree, func):
return map(func, tree.matched) if tree.name in rule_map else tree[1]
def flatten_right_associativity(tree):
new = _recurse_tree(tree, flatten_right_associativity)
if tree.name in fix_assoc_rules and len(new)==3 and new[2].name==tree.name:
new[-1:] = new[-1].matched
return rulematch(tree.name, new)

def build_left_associativity(tree):
new_nodes = _recurse_tree(tree, build_left_associativity)
if tree.name in fix_assoc_rules:
while len(new_nodes)>3:
new_nodes[:3] = [rulematch(tree.name, new_nodes[:3])]
return rulematch(tree.name, new_nodes)

bin_calc_map = {‘*’:mul, ‘/’:p, ‘+’:add, ‘-‘:sub}
def calc_binary(x):
while len(x) > 1:
x[:3] = [ bin_calc_map[x[1]](x[0], x[2]) ]
return x[0]
calc_map = {
‘num’ : float,
‘atom’: lambda x: x[len(x)!=1],
‘neg’ : lambda (op,num): (num,-num)[op==’-‘],
‘mul’ : calc_binary,
}
def evaluate(tree):
solutions = _recurse_tree(tree, evaluate)
return calc_map.get(tree.name, lambda x:x)(solutions)

if __name__ == ‘__main__’:
while true:
print( calc(raw_input(‘> ‘)) )

”’a calculator implemented with a top-down, recursive-descent parser”’
# author: erez shinan, dec 2012
import re, collections
token = collections.namedtuple(‘token’, [‘name’, ‘value’])
rulematch = collections.namedtuple(‘rulematch’, [‘name’, ‘matched’])
rule_map = {
‘mul’ : [‘atom mul mul’, ‘atom’],
‘atom’: [‘num’, ‘lpar add rpar’, ‘neg’],
}
bin_calc_map = {‘*’:mul, ‘/’:p, ‘+’:add, ‘-‘:sub}
def calc_binary(x):
while len(x) > 1:
x[:3] = [ bin_calc_map[x[1]](x[0], x[2]) ]
return x[0]
calc_map = {
‘num’ : float,
‘atom’: lambda x: x[len(x)!=1],
‘neg’ : lambda (op,num): (num,-num)[op==’-‘],
‘mul’ : calc_binary,
}
def match(rule_name, tokens):
if tokens and rule_name == tokens[0].name: # match a token?
for expansion in rule_map.get(rule_name, ()): # match a rule?
remaining_tokens = tokens
matched_subrules = []
for subrule in expansion.split():
matched, remaining_tokens = match(subrule, remaining_tokens)
if not matched:
break # no such luck. next expansion!
matched_subrules.append(matched)
else:
return rulematch(rule_name, matched_subrules), remaining_tokens
def _recurse_tree(tree, func):
return map(func, tree.matched) if tree.name in rule_map else tree[1]
def flatten_right_associativity(tree):
new = _recurse_tree(tree, flatten_right_associativity)
if tree.name in fix_assoc_rules and len(new)==3 and new[2].name==tree.name:
new[-1:] = new[-1].matched
return rulematch(tree.name, new)
def evaluate(tree):
solutions = _recurse_tree(tree, evaluate)
return calc_map.get(tree.name, lambda x:x)(solutions)
def calc(expr):
split_expr = re.findall(‘[\d.]+|[%s]’ % ”.join(token_map), expr)
tokens = [token(token_map.get(x, ‘num’), x) for x in split_expr]