import random
from copy import deepcopy
from typing import Tuple, List, Union
from mwptoolkit.utils.enum_type import SpecialTokens,NumMask,EPT
from mwptoolkit.utils.preprocess_tool.number_operator import constant_number
[docs]def from_infix_to_postfix(expression):
r"""convert infix equation to postfix equation.
Args:
expression (list): infix expression.
Returns:
(list): postfix expression.
"""
st = list()
res = list()
priority = {"<BRG>": 0, "=": 1, "+": 2, "-": 2, "*": 3, "/": 3, "^": 4}
for e in expression:
if e in ["(", "["]:
st.append(e)
elif e == ")":
c = st.pop()
while c != "(":
res.append(c)
c = st.pop()
elif e == "]":
c = st.pop()
while c != "[":
res.append(c)
c = st.pop()
elif e in priority:
while len(st) > 0 and st[-1] not in ["(", "["] and priority[e] <= priority[st[-1]]:
res.append(st.pop())
st.append(e)
else:
res.append(e)
while len(st) > 0:
res.append(st.pop())
return res
[docs]def from_infix_to_prefix(expression):
r"""convert infix equation to prefix equation
Args:
expression (list): infix expression.
Returns:
(list): prefix expression.
"""
st = list()
res = list()
priority = {"<BRG>": 0, "=": 1, "+": 2, "-": 2, "*": 3, "/": 3, "^": 4}
expression = deepcopy(expression)
expression.reverse()
for e in expression:
if e in [")", "]"]:
st.append(e)
elif e == "(":
c = st.pop()
while c != ")":
res.append(c)
c = st.pop()
elif e == "[":
c = st.pop()
while c != "]":
res.append(c)
c = st.pop()
elif e in priority:
while len(st) > 0 and st[-1] not in [")", "]"] and priority[e] < priority[st[-1]]:
res.append(st.pop())
st.append(e)
else:
res.append(e)
while len(st) > 0:
res.append(st.pop())
res.reverse()
return res
[docs]def from_prefix_to_postfix(expression):
r"""convert prefix equation to postfix equation
Args:
expression (list): prefix expression.
Returns:
(list): postfix expression.
"""
st = list()
expression = deepcopy(expression)
expression.reverse()
for symbol in expression:
if symbol not in ['+', '-', '*', '/', '^', "=", "<BRG>"]:
st.append([symbol])
else:
n1 = st.pop()
n2 = st.pop()
st.append(n1 + n2 + [symbol])
res = st.pop()
return res
[docs]def from_postfix_to_prefix(expression):
r"""convert postfix equation to prefix equation
Args:
expression (list): postfix expression.
Returns:
(list): prefix expression.
"""
st = list()
for symbol in expression:
if symbol not in ['+', '-', '*', '/', '^', "=", "<BRG>"]:
st.append([symbol])
else:
n1 = st.pop() #2
n2 = st.pop() #3
st.append([symbol] + n2 + n1)
res = st.pop()
return res
[docs]def from_prefix_to_infix(expression):
r"""convert prefix equation to infix equation
Args:
expression (list): prefix expression.
Returns:
(list): infix expression.
"""
st = list()
last_op = []
priority = {"<BRG>": 0, "=": 1, "+": 2, "-": 2, "*": 3, "/": 3, "^": 4}
expression = deepcopy(expression)
expression.reverse()
for symbol in expression:
if symbol not in ['+', '-', '*', '/', '^', "=", "<BRG>"]:
st.append([symbol])
else:
n_left = st.pop()
n_right = st.pop()
left_first = False
right_first = False
if len(n_left) > 1 and priority[last_op.pop()] < priority[symbol]:
left_first = True
if len(n_right) > 1 and priority[last_op.pop()] <= priority[symbol]:
right_first = True
if left_first:
n_left = ['('] + n_left + [')']
if right_first:
n_right = ['('] + n_right + [')']
st.append(n_left + [symbol] + n_right)
last_op.append(symbol)
res = st.pop()
return res
[docs]def from_postfix_to_infix(expression):
r"""convert postfix equation to infix equation
Args:
expression (list): postfix expression.
Returns:
(list): infix expression.
"""
st = list()
last_op = []
priority = {"<BRG>": 0, "=": 1, "+": 2, "-": 2, "*": 3, "/": 3, "^": 4}
for symbol in expression:
if symbol not in ['+', '-', '*', '/', '^', "=", "<BRG>"]:
st.append([symbol])
else:
n_right = st.pop()
n_left = st.pop()
left_first = False
right_first = False
if len(n_right) > 1 and priority[last_op.pop()] <= priority[symbol]:
right_first = True
if len(n_left) > 1 and priority[last_op.pop()] < priority[symbol]:
left_first = True
if left_first:
n_left = ['('] + n_left + [')']
if right_first:
n_right = ['('] + n_right + [')']
st.append(n_left + [symbol] + n_right)
last_op.append(symbol)
res = st.pop()
return res
[docs]def from_infix_to_multi_way_tree(expression):
res = []
st = []
level = 0
for e in expression:
if e in ['(', '[']:
level += 1
st.append(e)
elif e in [')', ']']:
level -= 1
st.append(e)
if level == 0:
sub_res = from_infix_to_multi_way_tree(st[1:-1])
res.append(sub_res)
st = []
else:
if level != 0:
st.append(e)
else:
res.append(e)
return res
[docs]def postfix_parser(equation, memory: list) -> int:
"""
Read Op-token postfix equation and transform it into Expression-token sequence.
:param List[Union[str,Tuple[str,Any]]] equation:
List of op-tokens to be parsed into a Expression-token sequence
Item of this list should be either
- an operator string
- a tuple of (operand source, operand value)
:param list memory:
List where previous execution results of expressions are stored
:rtype: int
:return:
Size of stack after processing. Value 1 means parsing was done without any free expression.
"""
stack = []
for tok in equation:
if tok in EPT.OPERATORS:
# If token is an operator, form expression and push it into the memory and stack.
op = EPT.OPERATORS[tok]
arity = op['arity']
# Retrieve arguments
args = stack[-arity:]
stack = stack[:-arity]
# Store the result with index where the expression stored
stack.append((EPT.ARG_MEM, len(memory)))
# Store the expression into the memory.
memory.append((tok, args))
else:
# Push an operand before meet an operator
stack.append(tok)
return len(stack)
[docs]def orig_infix_to_postfix(equation: Union[str, List[str]], number_token_map: dict, free_symbols: list,
join_output: bool = True):
"""
Read infix equation string and convert it into a postfix string
:param Union[str,List[str]] equation:
Either one of these.
- A single string of infix equation. e.g. "5 + 4"
- Tokenized sequence of infix equation. e.g. ["5", "+", "4"]
:param dict number_token_map:
Mapping from a number token to its anonymized representation (e.g. N_0)
:param list free_symbols:
List of free symbols (for return)
:param bool join_output:
True if the output need to be joined. Otherwise, this method will return the tokenized postfix sequence.
:rtype: Union[str, List[str]]
:return:
Either one of these.
- A single string of postfix equation. e.g. "5 4 +"
- Tokenized sequence of postfix equation. e.g. ["5", "4", "+"]
"""
# Template in ALG514/DRAW is already tokenized, without parenthesis.
# Template in MAWPS is also tokenized but contains parenthesis.
output_tokens = []
postfix_stack = []
# Tokenize the string if that is not tokenized yet.
if type(equation) is str:
equation = equation.strip().split(' ')
# Read each token
for tok in equation:
# Ignore blank token
if not tok:
continue
if tok == ')':
# Pop until find the opening paren '('
while postfix_stack:
top = postfix_stack.pop()
if top == '(':
# Discard the matching '('
break
else:
output_tokens.append(top)
elif tok in '*/+-=(':
# '(' has the highest precedence when in the input string.
precedence = EPT.OPERATOR_PRECEDENCE.get(tok, 1E9)
while postfix_stack:
# Pop until the top < current_precedence.
# '(' has the lowest precedence in the stack.
top = postfix_stack[-1]
if EPT.OPERATOR_PRECEDENCE.get(top, -1E9) < precedence:
break
else:
output_tokens.append(postfix_stack.pop())
postfix_stack.append(tok)
elif EPT.NUMBER_PATTERN.fullmatch(tok) is not None:
# Just output the operand.
positive, const_code = constant_number(tok)
if not positive:
const_code = const_code + '_NEG'
output_tokens.append(const_code)
elif tok in number_token_map:
# Just output the operand
output_tokens += number_token_map[tok]
else:
# This is a variable name
# Just output the operand.
if tok not in free_symbols:
free_symbols.append(tok)
tok = 'X_%s' % free_symbols.index(tok)
output_tokens.append(tok)
while postfix_stack:
output_tokens.append(postfix_stack.pop())
if join_output:
return ' '.join(output_tokens)
else:
return output_tokens
[docs]def infix_to_postfix(equation, free_symbols: list,
join_output: bool = True):
output_tokens = []
postfix_stack = []
# Tokenize the string if that is not tokenized yet.
if type(equation) is str:
equation = equation.strip().split(' ')
# Read each token
for tok in equation:
# Ignore blank token
if not tok:
continue
if tok == ')':
# Pop until find the opening paren '('
while postfix_stack:
top = postfix_stack.pop()
if top == '(':
# Discard the matching '('
break
else:
output_tokens.append(top)
elif tok in '*/+-=^(':
# '(' has the highest precedence when in the input string.
precedence = EPT.OPERATOR_PRECEDENCE.get(tok, 1E9)
while postfix_stack:
# Pop until the top < current_precedence.
# '(' has the lowest precedence in the stack.
top = postfix_stack[-1]
if EPT.OPERATOR_PRECEDENCE.get(top, -1E9) < precedence:
break
else:
output_tokens.append(postfix_stack.pop())
postfix_stack.append(tok)
elif EPT.NUMBER_PATTERN.fullmatch(tok) is not None:
# Just output the operand.
positive, const_code = constant_number(tok)
if not positive:
const_code = const_code + '_NEG'
output_tokens.append(const_code)
elif 'NUM_' in tok:
output_tokens.append('N_'+tok[4:])
else:
# This is a variable name
# Just output the operand.
if tok not in free_symbols:
free_symbols.append(tok)
tok = 'X_%s' % free_symbols.index(tok)
output_tokens.append(tok)
while postfix_stack:
output_tokens.append(postfix_stack.pop())
if join_output:
return ' '.join(output_tokens)
else:
return output_tokens
[docs]def operator_mask(expression):
template = []
for symbol in expression:
if isinstance(symbol, list):
sub_temp = operator_mask(symbol)
template.append(sub_temp)
elif symbol in ["+", "-", "*", "/", "^", "=", "<BRG>"]:
template.append(SpecialTokens.OPT_TOKEN)
else:
template.append(symbol)
return template
[docs]def trans_symbol_2_number(equ_list, num_list):
"""transfer mask symbol in equation to number.
Args:
equ_list (list): equation.
num_list (list): number list.
Return:
(list): equation.
"""
symbol_list = NumMask.number
new_equ_list = []
for symbol in equ_list:
if 'NUM' in symbol:
index = symbol_list.index(symbol)
new_equ_list.append(str(num_list[index]))
else:
new_equ_list.append(symbol)
return new_equ_list
[docs]def EN_rule1_stat(datas, sample_k=100):
"""equation norm rule1
Args:
datas (list): dataset.
sample_k (int): number of random sample.
Returns:
(list): classified equations. equivalent equations will be in the same class.
"""
rule_1 = []
for data in datas:
temp_data = data
equ_list = data["equation"]
rule_1.append(equ_list)
samples = random.sample(range(10, 100), k=sample_k)
random.shuffle(samples)
ans_dict = {}
for equ_list in rule_1:
new_equ = trans_symbol_2_number(equ_list, samples)
new_equ = ''.join(new_equ)
new_equ = new_equ.replace("^", "**", 10)
new_equ = new_equ.replace("[", "(", 10)
new_equ = new_equ.replace("]", ")", 10)
try:
ans = eval(new_equ)
except:
ans = float("inf")
try:
ans_dict[ans].append(equ_list)
except:
ans_dict[ans] = []
ans_dict[ans].append(equ_list)
class_list = []
for k, v in ans_dict.items():
class_list.append(v)
for i in range(50):
samples = random.sample(range(10, 100), k=sample_k)
random.shuffle(samples)
class_copy = deepcopy(class_list)
class_list = []
for equ_lists in class_copy:
ans_dict = {}
for equ_list in equ_lists:
new_equ = trans_symbol_2_number(equ_list, samples)
new_equ = ''.join(new_equ)
new_equ = new_equ.replace("^", "**", 10)
new_equ = new_equ.replace("[", "(", 10)
new_equ = new_equ.replace("]", ")", 10)
try:
ans = eval(new_equ)
except:
ans = float("inf")
try:
ans_dict[ans].append(equ_list)
except:
ans_dict[ans] = []
ans_dict[ans].append(equ_list)
for k, v in ans_dict.items():
class_list.append(v)
class_copy = deepcopy(class_list)
class_list = []
for equ_lists in class_copy:
class_list_temp = []
for equ_list in equ_lists:
if equ_list not in class_list_temp:
class_list_temp.append(equ_list)
class_list_temp = sorted(class_list_temp, key=lambda x: len(x), reverse=False)
class_list.append(class_list_temp)
return class_list
[docs]def EN_rule2(equ_list):
"""equation norm rule2
Args:
equ_list (list): equation.
Returns:
list: equivalent equation.
"""
new_list = []
i = 0
while i < len(equ_list):
if (i + 4) < len(equ_list) and ('NUM' in equ_list[i] or equ_list[i].isalpha()) and '+' in equ_list[i + 1] and (
'NUM' in equ_list[i + 2] or equ_list[i + 2].isalpha()) and '+' in equ_list[i + 3] and ('NUM' in equ_list[i + 4] or equ_list[i + 4].isalpha()):
if i - 1 >= 0 and equ_list[i - 1] in ['/', '-', '*']:
new_list.append(equ_list[i])
i += 1
continue
if i + 5 < len(equ_list) and equ_list[i + 5] in ['/', '-', '*']:
new_list.append(equ_list[i])
i += 1
continue
temp = [equ_list[i], equ_list[i + 2], equ_list[i + 4]]
sort_temp = sorted(temp)
new_temp = sort_temp[0:1] + ['+'] + sort_temp[1:2] + ['+'] + sort_temp[2:3]
new_list += new_temp
i += 5
elif (i + 4) < len(equ_list) and ('NUM' in equ_list[i] or equ_list[i].isalpha()) and '*' in equ_list[i + 1] and (
'NUM' in equ_list[i + 2] or equ_list[i + 2].isalpha()) and '*' in equ_list[i + 3] and ('NUM' in equ_list[i + 4] or equ_list[i + 4].isalpha()):
if i - 1 >= 0 and equ_list[i - 1] in ['/', '-']:
new_list.append(equ_list[i])
i += 1
continue
if i + 5 < len(equ_list) and equ_list[i + 5] in ['/', '-']:
new_list.append(equ_list[i])
i += 1
continue
temp = [equ_list[i], equ_list[i + 2], equ_list[i + 4]]
sort_temp = sorted(temp)
new_temp = sort_temp[0:1] + ['*'] + sort_temp[1:2] + ['*'] + sort_temp[2:3]
new_list += new_temp
i += 5
elif (i + 2) < len(equ_list) and ('NUM' in equ_list[i] or equ_list[i].isalpha()) and '+' in equ_list[i + 1] and ('NUM' in equ_list[i + 2] or equ_list[i + 2].isalpha()):
if i - 1 >= 0 and equ_list[i - 1] in ['/', '-', '*']:
new_list.append(equ_list[i])
i += 1
continue
if i + 3 < len(equ_list) and equ_list[i + 3] in ['/', '-', '*']:
new_list.append(equ_list[i])
i += 1
continue
temp = [equ_list[i], equ_list[i + 2]]
sort_temp = sorted(temp)
new_temp = sort_temp[0:1] + ['+'] + sort_temp[1:2]
new_list += new_temp
i += 3
elif (i + 2) < len(equ_list) and ('NUM' in equ_list[i] or equ_list[i].isalpha()) and '*' in equ_list[i + 1] and ('NUM' in equ_list[i + 2] or equ_list[i + 2].isalpha()):
if i - 1 >= 0 and equ_list[i - 1] in ['/', '-']:
new_list.append(equ_list[i])
i += 1
continue
if i + 3 < len(equ_list) and equ_list[i + 3] in ['/', '-']:
new_list.append(equ_list[i])
i += 1
continue
temp = [equ_list[i], equ_list[i + 2]]
sort_temp = sorted(temp)
new_temp = sort_temp[0:1] + ['*'] + sort_temp[1:2]
new_list += new_temp
i += 3
else:
new_list.append(equ_list[i])
i += 1
return new_list