Source code for mwptoolkit.utils.preprocess_tool.number_operator

import re
import copy

from word2number import w2n

from mwptoolkit.utils.enum_type import NumMask,EPT

[docs]def trans_symbol_2_number(equ_list, num_list): """transfer mask symbol in equation to number. Args: equ_list (list): equation. num_list (list): number list. Return: (list): equation. """ symbol_list = NumMask.number new_equ_list = [] for symbol in equ_list: if 'NUM' in symbol: index = symbol_list.index(symbol) new_equ_list.append(str(num_list[index])) else: new_equ_list.append(symbol) return new_equ_list
[docs]def fraction_word_to_num(number_sentence): """transfer english expression of fraction to number. numerator and denominator are not more than 10. Args: number_sentence (str): english expression. Returns: (float): number """ fraction={ 'one-third':1/3,'one-thirds':1/3,'one-quarter':1/4,'one-forth':1/4,'one-fourth':1/4,'one-fourths':1/4,'one-fifth':1/5, 'one-sixth':1/6, 'one-seventh':1/7, 'one-eighth':1/8, 'one-ninth':1/9, 'one-tenth':1/10,'one-fifths':1/5, 'one-sixths':1/6, 'one-sevenths':1/7, 'one-eighths':1/8, 'one-ninths':1/9, 'one-tenths':1/10,\ 'two-third':2/3,'two-thirds':2/3, 'two-quarter':2/4, 'two-forth':2/4,'two-fourth':2/4,'two-fourths':2/4, 'two-fifth':2/5, 'two-sixth':2/6, 'two-seventh':2/7, 'two-eighth':2/8, 'two-ninth':2/9, 'two-tenth':2/10,'two-fifths':2/5, 'two-sixths':2/6, 'two-sevenths':2/7, 'two-eighths':2/8, 'two-ninths':2/9, 'two-tenths':2/10,\ 'three-third':3/3,'three-thirds':3/3, 'three-quarter':3/4, 'three-forth':3/4,'three-fourth':3/4,'three-fourths':3/4, 'three-fifth':3/5, 'three-sixth':3/6, 'three-seventh':3/7, 'three-eighth':3/8, 'three-ninth':3/9, 'three-tenth':3/10,'three-fifths':3/5, 'three-sixths':3/6, 'three-sevenths':3/7, 'three-eighths':3/8, 'three-ninths':3/9, 'three-tenths':3/10,\ 'four-third':4/3,'four-thirds':4/3, 'four-quarter':4/4, 'four-forth':4/4,'four-fourth':4/4,'four-fourths':4/4, 'four-fifth':4/5, 'four-sixth':4/6, 'four-seventh':4/7, 'four-eighth':4/8, 'four-ninth':4/9, 'four-tenth':4/10,'four-fifths':4/5, 'four-sixths':4/6, 'four-sevenths':4/7, 'four-eighths':4/8, 'four-ninths':4/9, 'four-tenths':4/10,\ 'five-third':5/3,'five-thirds':5/3, 'five-quarter':5/4, 'five-forth':5/4,'five-fourth':5/4,'five-fourths':5/4, 'five-fifth':5/5, 'five-sixth':5/6, 'five-seventh':5/7, 'five-eighth':5/8, 'five-ninth':5/9, 'five-tenth':5/10,'five-fifths':5/5, 'five-sixths':5/6, 'five-sevenths':5/7, 'five-eighths':5/8, 'five-ninths':5/9, 'five-tenths':5/10,\ 'six-third':6/3,'six-thirds':6/3, 'six-quarter':6/4, 'six-forth':6/4,'six-fourth':6/4,'six-fourths':6/4, 'six-fifth':6/5, 'six-sixth':6/6, 'six-seventh':6/7, 'six-eighth':6/8, 'six-ninth':6/9, 'six-tenth':6/10,'six-fifths':6/5, 'six-sixths':6/6, 'six-sevenths':6/7, 'six-eighths':6/8, 'six-ninths':6/9, 'six-tenths':6/10,\ 'seven-third':7/3,'seven-thirds':7/3,'seven-quarter':7/4, 'seven-forth':7/4,'seven-fourth':7/4,'seven-fourths':7/4, 'seven-fifth':7/5, 'seven-sixth':7/6, 'seven-seventh':7/7, 'seven-eighth':7/8, 'seven-ninth':7/9, 'seven-tenth':7/10,'seven-fifths':7/5, 'seven-sixths':7/6, 'seven-sevenths':7/7, 'seven-eighths':7/8, 'seven-ninths':7/9, 'seven-tenths':7/10,\ 'eight-third':8/3,'eight-thirds':8/3, 'eight-quarter':8/4, 'eight-forth':8/4,'eight-fourth':8/4,'eight-fourths':8/4, 'eight-fifth':8/5, 'eight-sixth':8/6, 'eight-seventh':8/7, 'eight-eighth':8/8, 'eight-ninth':8/9, 'eight-tenth':8/10,'eight-fifths':8/5, 'eight-sixths':8/6, 'eight-sevenths':8/7, 'eight-eighths':8/8, 'eight-ninths':8/9, 'eight-tenths':8/10,\ 'nine-third':9/3,'nine-thirds':9/3, 'nine-quarter':9/4, 'nine-forth':9/4,'nine-fourth':9/4,'nine-fourths':9/4, 'nine-fifth':9/5, 'nine-sixth':9/6, 'nine-seventh':9/7, 'nine-eighth':9/8, 'nine-ninth':9/9, 'nine-tenth':9/10,'nine-fifths':9/5, 'nine-sixths':9/6, 'nine-sevenths':9/7, 'nine-eighths':9/8, 'nine-ninths':9/9, 'nine-tenths':9/10 } return fraction[number_sentence.lower()]
[docs]def english_word_2_num(sentence_list,fraction_acc=None): """transfer english word to number. Args: sentence_list (list): list of words. fraction_acc (int|None): the accuracy to transfer fraction to float, if None, not to match fraction expression. Returns: (list): transfered sentence. """ # bug : 4.9 million can't be matched match_word=[ 'zero','one','two','three','four','five','six','seven','eight','nine','ten',\ 'eleven','twelve','thirteen','fourteen','fifteen','sixteen','seventeen','eighteen','nineteen',\ 'twenty','thirty','forty','fifty','sixty','seventy','eighty','ninety',\ 'hundred','thousand','million','billion',\ 'point' ] num1=['one','two','three','four','five','six','seven','eight','nine'] num2=['twenty','thirty','forty','fifty','sixty','seventy','eighty','ninety'] for n2 in num2: for n1 in num1: match_word.append(n2+'-'+n1) new_list=[] stack=[] start_idx=0 for idx,word in enumerate(sentence_list): if idx<start_idx: continue if word.lower() in match_word : start_idx=idx while(sentence_list[start_idx].lower() in match_word): stack.append(sentence_list[start_idx]) start_idx+=1 if len(stack)==1 and stack[0] == 'point': new_list.append(stack[0]) elif len(stack)==1 and stack[0].lower() == 'one': new_list.append(stack[0]) elif len(stack)==2 and stack[0].lower() == 'one' and stack[1] == 'point': new_list.append(stack[0]) new_list.append(stack[1]) elif stack[-1] == 'point': num_words=' '.join(stack[:-1]) number=w2n.word_to_num(num_words) new_list.append(str(number)) new_list.append(stack[-1]) else: if len(stack)>=2: x=1 num_words=' '.join(stack) number=w2n.word_to_num(num_words) new_list.append(str(number)) stack=[] else: new_list.append(word) if fraction_acc!=None: num1=['one','two','three','four','five','six','seven','eight','nine'] num2=['third','thirds','quarter','forth','fourth','fourths','fifth','sixth','seventh','eighth','ninth','tenth','fifths','sixths','sevenths','eighths','ninths','tenths'] match_word=[] for n1 in num1: for n2 in num2: match_word.append(n1+'-'+n2) sentence_list=copy.deepcopy(new_list) new_list=[] for idx,word in enumerate(sentence_list): if word.lower() in match_word : number=fraction_word_to_num(word) number=int(number*10**fraction_acc)/10**fraction_acc #number=round(number,fraction_acc) new_list.append(str(number)) else: new_list.append(word) return new_list
[docs]def split_number(text_list): """separate number expression from other characters. Args: text_list (list): text list. Returns: (list): processed text list. """ pattern = re.compile("\d*\(\d+/\d+\)\d*|\d+\.\d+%?|\d+%?") new_text = [] for s in text_list: pos = re.search(pattern, s) if pos and pos.start() == 0: num = s[pos.start():pos.end()] new_text.append(num) if pos.end() < len(s): new_text.append(s[pos.end():]) else: new_text.append(s) return new_text
[docs]def joint_number(text_list): """joint fraction number Args: text_list (list): text list. Returns: (list): processed text list. """ new_list = [] i = 0 while i < len(text_list): if text_list[i] == '(' and i + 4 < len(text_list) and text_list[i + 4] == ')': sub = ''.join(text_list[i:i + 5]) new_list.append(sub) i = i + 5 else: new_list.append(text_list[i]) i += 1 return new_list
[docs]def joint_number_(text_list): new_list = [] i = 0 while i < len(text_list): if text_list[i] == '(': try: j = text_list[i:].index(')') if i + 1 == i + j: j = None if "(" in text_list[i + 1:i + j + 1]: j = None except: j = None if j: stack = [] flag = True idx = 0 for temp_idx, word in enumerate(text_list[i:i + j + 1]): if word in ["(", ")", "/"] or word.isdigit(): stack.append(word) idx = temp_idx else: flag = False break if flag: number = ''.join(stack) new_list.append(number) else: for word in stack: new_list.append(word) i += idx + 1 else: new_list.append(text_list[i]) i += 1 else: new_list.append(text_list[i]) i += 1 return new_list
[docs]def constant_number(const): """ Converts number to constant symbol string (e.g. 'C_3'). To avoid sympy's automatic simplification of operation over constants. :param Union[str,int,float,Expr] const: constant value to be converted. :return: (str) Constant symbol string represents given constant. """ if type(const) is str: if const in ['C_pi', 'C_e', 'const_pi', 'const_e']: # Return pi, e as itself. return True, const.replace('const_', 'C_') # Otherwise, evaluate string and call this function with the evaluated number const = float(const.replace('C_', '').replace('const_', '').replace('_', '.')) return constant_number(const) elif type(const) is int or int(const) == float(const): # If the value is an integer, we trim the following zeros under decimal points. return const >= 0, 'C_%s' % int(abs(const)) else: if abs(const - 3.14) < 1E-2: # Including from 3.14 return True, 'C_pi' if abs(const - 2.7182) < 1E-4: # Including from 2.7182 return True, 'C_e' # If the value is not an integer, we write it and trim followed zeros. # We need to use '%.15f' formatting because str() may gives string using precisions like 1.7E-3 # Also we will trim after four zeros under the decimal like 0.05000000074 because of float's precision. return const >= 0, 'C_%s' % \ EPT.FOLLOWING_ZERO_PATTERN.sub('\\1', ('%.15f' % abs(const)).replace('.', '_'))