I have some Numba accelerated code in NoPython mode using numbaa.njit().
At some point I need to parse a str (or bytes) to get a float.
In pure Python, the way I would go about it is with float(), but that does not work:
import numba as nb
@nb.njit
def str2float(text):
return float(text)
str2float("1.2")
TypingError: Failed in nopython mode pipeline (step: nopython frontend) No implementation of function Function(<class 'float'>) found for signature:
>>> float(unicode_type)
while I would like it to produce a float with value 1.2.
The following questions are somewhat related:
- this question discusses parsing to
int(str/bytes-to-int) - this question discusses the opposite, i.e. the conversion of a
float-to-strconversion
CodePudding user response:
While this is not yet supported (as of July 2022), you can implement something manually.
Below are two versions, one for str and one for bytes.
In the process of solving the task I use a str/bytes-to-int which is used to parse str that include the exponential notation e.g. 1.0e-02 and -- potentially -- a trim() function to pre-process inputs surrounded by whitespaces ("C" whitespaces: " ", "\n", "\r", "\t", "\v").
Both are presented here and only used below.
From str
import math
import numba as nb
@nb.njit
def str2float_helper(text):
sep = ord(".")
c_min = ord("0")
c_max = ord("9")
n = len(text)
valid = n > 0
# determine sign
start = n - 1
stop = -1
sign = 1
if valid:
first = ord(text[0])
if first == ord(" "):
stop = 0
elif first == ord("-"):
sign = -1
stop = 0
# parse rest
sep_pos = 0
number = 0
j = 0
for i in range(start, stop, -1):
c = ord(text[i])
if c_min <= c <= c_max:
number = (c - c_min) * 10 ** j
j = 1
elif c == sep and sep_pos == 0:
sep_pos = j
else:
valid = False
break
return sign * number, sep_pos, valid
@nb.njit
def str2float(text):
if text == "nan" or text == "NAN" or text == "NaN":
return math.nan
exp_chars = b"eE"
exp_pos = -1
for exp_char in exp_chars:
for i, c in enumerate(text[::-1]):
c = ord(c)
if c == exp_char:
exp_pos = i
break
if exp_pos > -1:
break
if exp_pos > 0:
exp_number = str2int(text[-exp_pos:])
if exp_number is None:
exp_number = 0
number, sep_pos, valid = str2float_helper(text[:-exp_pos-1])
result = number / 10.0 ** (sep_pos - exp_number) if valid else None
else:
number, sep_pos, valid = str2float_helper(text)
result = number / 10.0 ** sep_pos if valid else None
return result
This should work similarly to float_() (defined below) which is a helper function that returns None instead of raising in case of parsing failure:
def float_(x):
try:
return float(x)
except ValueError:
return None
def is_close(x, y):
if x and not y or not x and y:
return False
else:
return x == y or math.isclose(x, y) or math.isnan(x) and math.isnan(y)
numbers = (
"", "NaN", "10", "32.1", "4123.43214e 05", "4123.43214E 05", "4123.43214e-05",
"-31", "-12.3", "-4123.43214e 05", "-4123.43214E 05", "-4123.43214e-05",
" 1321.432 \t ", "1 2", "1-2", "1e", "e1",
)
k = 24
for number in numbers:
print(f"{number!r:{k}} {float_(number)!s:{k}} {str2float(number)!s:{k}} {is_close(float_(number), str2float(number))}")
# '' None None True
# 'NaN' nan nan True
# '10' 10.0 10.0 True
# '32.1' 32.1 32.1 True
# '4123.43214e 05' 412343214.0 412343214.0 True
# '4123.43214E 05' 412343214.0 412343214.0 True
# '4123.43214e-05' 0.0412343214 0.0412343214 True
# '-31' -31.0 -31.0 True
# '-12.3' -12.3 -12.3 True
# '-4123.43214e 05' -412343214.0 -412343214.0 True
# '-4123.43214E 05' -412343214.0 -412343214.0 True
# '-4123.43214e-05' -0.0412343214 -0.0412343214 True
# ' 1321.432 \t ' 1321.432 None False
# '1 2' None None True
# '1-2' None None True
# '1e' None None True
# 'e1' None None True
# '1.1e-200' 1.1e-200 1.0999999999999995e-200 True
# '1.1e 200' 1.1e 200 1.1000000000000005e 200 True
(except for the trimming whitespaces part which can be added if needed).
Timewise, this is some 12x slower than pure Python:
%timeit -n 32 -r 32 [str2float(number) for number in numbers]
# 32 loops, best of 32: 80.3 µs per loop
%timeit -n 32 -r 32 [float_(number) for number in numbers]
# 32 loops, best of 32: 6.55 µs per loop
and hence only useful if this is needed as part of a more complex njit()-ed code.
From bytes
This is essentially a rewrite of the above to work with bytes (which typically only require skipping some ord(), because iterating bytes provides the integer representation directly) or defining a == operator because that is not available for bytes.
@nb.njit
def bytes2float_helper(text):
sep = ord(".")
c_min = ord("0")
c_max = ord("9")
n = len(text)
valid = n > 0
# determine sign
start = n - 1
stop = -1
sign = 1
if valid:
first = text[0]
if first == ord(" "):
stop = 0
elif first == ord("-"):
sign = -1
stop = 0
# parse rest
sep_pos = 0
number = 0
j = 0
for i in range(start, stop, -1):
c = text[i]
if c_min <= c <= c_max:
number = (c - c_min) * 10 ** j
j = 1
elif c == sep and sep_pos == 0:
sep_pos = j
else:
valid = False
break
return sign * number, sep_pos, valid
@nb.njit
def eqb(text_a, text_b):
len_a = len(text_a)
len_b = len(text_b)
if len_a == len_b:
for i in range(len_a):
if text_a[i] != text_b[i]:
return False
return True
else:
return False
@nb.njit
def bytes2float(text):
if eqb(text, b"nan") or eqb(text, b"NAN") or eqb(text, b"NaN"):
return math.nan
exp_chars = b"eE"
exp_pos = -1
for exp_char in exp_chars:
for i, c in enumerate(text[::-1]):
if c == exp_char:
exp_pos = i
break
if exp_pos > -1:
break
if exp_pos > 0:
exp_number = bytes2int(text[-exp_pos:])
if exp_number is None:
exp_number = 0
number, sep_pos, valid = bytes2float_helper(text[:-exp_pos-1])
result = number / 10.0 ** (sep_pos - exp_number) if valid else None
else:
number, sep_pos, valid = bytes2float_helper(text)
result = number / 10.0 ** sep_pos if valid else None
return result
The interesting bit it of this is that this has comparable speed (albeit marginally slower by some 15%) as the pure Python counterpart:
numbers = (
b"", b"NaN", b"10", b"32.1", b"4123.43214e 05", b"4123.43214E 05", b"4123.43214e-05",
b"-31", b"-12.3", b"-4123.43214e 05", b"-4123.43214E 05", b"-4123.43214e-05",
b" 1321.432 ", b"1 2", b"1-2", b"1e", b"e1", b"1.1e-200", b"1.1e 200",
)
k = 24
for number in numbers:
print(f"{number!s:{k}} {float_(number)!s:{k}} {bytes2float(number)!s:{k}} {is_close(float_(number), bytes2float(number))}")
# b'' None None True
# b'NaN' nan nan True
# b'10' 10.0 10.0 True
# b'32.1' 32.1 32.1 True
# b'4123.43214e 05' 412343214.0 412343214.0 True
# b'4123.43214E 05' 412343214.0 412343214.0 True
# b'4123.43214e-05' 0.0412343214 0.0412343214 True
# b'-31' -31.0 -31.0 True
# b'-12.3' -12.3 -12.3 True
# b'-4123.43214e 05' -412343214.0 -412343214.0 True
# b'-4123.43214E 05' -412343214.0 -412343214.0 True
# b'-4123.43214e-05' -0.0412343214 -0.0412343214 True
# b' 1321.432 ' 1321.432 None False
# b'1 2' None None True
# b'1-2' None None True
# b'1e' None None True
# b'e1' None None True
# b'1.1e-200' 1.1e-200 1.0999999999999995e-200 True
# b'1.1e 200' 1.1e 200 1.1000000000000005e 200 True
%timeit -n 32 -r 32 [bytes2float(number) for number in numbers]
# 32 loops, best of 32: 8.84 µs per loop
%timeit -n 32 -r 32 [float_(number) for number in numbers]
# 32 loops, best of 32: 7.66 µs per loop
