Collect value statistics for formats

This notebook presents various statistics for a variety of float formats. Some of these are present on the FormatInfo class, and are presented for all formats. Others are obtained by enumerating all values (only for the =16 bit formats).

Statistics obtained from FormatInfo

  • name: Format

  • B: Bits in the format

  • P: Precision in bits

  • E: Exponent field width in bits

  • T: Trailing significand field width in bits

  • max: Largest finite value

  • min: Most negative value (typically the same, unless twos complement)

  • smallest: Smallest positive value

  • smallest_normal: Smallest positive normal value, NaN if all finite values are subnormal

%run utils.py
D = pandas_render  # from utils
import pandas
from functools import partial
from gfloat import *
from gfloat.formats import *

import numpy as np


# Special rendering for float values -
# if they don't round-trip in 8.5g, prepend with "≈", or render as rational
def render_float(approx: bool, v):
    if not isinstance(v, float):
        return str(v)

    if np.isnan(v):
        return "n/a"

    s = f"{v:8.5g}"
    if float(s) == v:
        return s

    if approx:
        return "≈" + s
    else:
        return float_pow2str(v)


def collect_stats(fi: FormatInfo):
    return dict(
        name=fi.name,
        B=fi.bits,
        P=fi.precision,
        E=fi.expBits,
        smallest=fi.smallest,
        smallest_normal=fi.smallest_normal if not fi.is_all_subnormal else np.nan,
        max=fi.max,
        num_nans=float(fi.num_nans),
        infs=2 if fi.has_infs else 0,
    )


stats = [collect_stats(fi) for fi in all_formats]
df = pandas.DataFrame(stats)
D(df, format=partial(render_float, True))
name B P E smallest smallest_normal max num_nans infs
ocp_e2m1 4 2 2 0.5 1 6 0 0
ocp_e2m3 6 4 2 0.125 1 7.5 0 0
ocp_e3m2 6 3 3 0.0625 0.25 28 0 0
ocp_e4m3 8 4 4 ≈0.0019531 0.015625 448 2 0
ocp_e5m2 8 3 5 ≈1.5259e-05 ≈6.1035e-05 57344 6 2
p3109_p1 8 1 7 ≈2.1684e-19 ≈2.1684e-19 ≈9.2234e+18 1 2
p3109_p2 8 2 6 ≈2.3283e-10 ≈4.6566e-10 ≈2.1475e+09 1 2
p3109_p3 8 3 5 ≈7.6294e-06 ≈3.0518e-05 49152 1 2
p3109_p4 8 4 4 ≈0.00097656 0.0078125 224 1 2
p3109_p5 8 5 3 0.0078125 0.125 15 1 2
p3109_p6 8 6 2 0.015625 0.5 3.875 1 2
binary16 16 11 5 ≈5.9605e-08 ≈6.1035e-05 65504 2046 2
bfloat16 16 8 8 ≈9.1835e-41 ≈1.1755e-38 ≈3.3895e+38 254 2
binary32 32 24 8 ≈1.4013e-45 ≈1.1755e-38 ≈3.4028e+38 ≈1.6777e+07 2
binary64 64 53 11 4.9407e-324 ≈2.2251e-308 ≈1.7977e+308 ≈9.0072e+15 2
ocp_e8m0 8 1 8 ≈5.8775e-39 ≈5.8775e-39 ≈1.7014e+38 1 0
ocp_int8 8 8 0 0.015625 n/a ≈ 1.9844 0 0

Statistics computed by exhaustive inspection

  • lt1: Number of values x such that 0 < x < 1

  • gt1: Number of values x such that 1 < x < Inf

  • rt16: True if all values are exactly representable in IEEE binary16

  • min/maxSubnormal: Smallest/largest subnormal value, “n/a” if no values are subnormal

  • min/maxNormal: Smallest/largest normal value, “n/a” if no values are normal

def compute_stats(fi: FormatInfo):
    # Generate all values
    values = [decode_float(fi, i) for i in range(2**fi.bits)]
    df = pandas.DataFrame(values)

    # Compute statistics: lt1,gt1
    fval = df["fval"]
    total_01 = fval.between(0, 1, inclusive="neither").sum()
    total_1Inf = fval.between(1, np.inf, inclusive="neither").sum()

    # Compute statistics: maxFinite,minFinite
    finite_vals = fval[np.isfinite(fval)]
    maxFinite = finite_vals.loc[finite_vals.idxmax()]
    minFinite = finite_vals.loc[finite_vals.idxmin()]
    assert maxFinite == fi.max
    assert minFinite == fi.min

    # Compute statistics: maxNormal,minNormal
    normal_vals = fval[(df["fclass"] == FloatClass.NORMAL) & (fval > 0)]
    maxNormal = normal_vals.loc[normal_vals.idxmax()] if normal_vals.any() else np.nan
    minNormal = normal_vals.loc[normal_vals.idxmin()] if normal_vals.any() else np.nan
    assert np.isnan(maxNormal) or maxNormal == fi.max
    assert np.isnan(minNormal) or minNormal == fi.smallest_normal

    # Compute statistics: minSubnormal
    pos_subnormal = fval[(df["fclass"] == FloatClass.SUBNORMAL) & (fval > 0)]
    maxSubnormal = (
        pos_subnormal.loc[pos_subnormal.idxmax()] if pos_subnormal.any() else np.nan
    )
    minSubnormal = (
        pos_subnormal.loc[pos_subnormal.idxmin()] if pos_subnormal.any() else np.nan
    )
    assert np.isnan(minSubnormal) or minSubnormal == fi.smallest_subnormal

    assert np.nanmin([minSubnormal, minNormal]) == fi.smallest

    # Compute roundtrips: rt16, rt32
    with np.errstate(over="ignore"):
        rt16 = (np.float64(np.float16(fval)) == np.float64(fval)) | ~np.isfinite(fval)
        rt32 = (np.float64(np.float32(fval)) == np.float64(fval)) | ~np.isfinite(fval)

    rt16 = rt16.all()
    rt32 = rt32.all()
    assert rt32  # If not, we should include rt32 in the table

    # Assemble tuple
    return dict(
        name=fi.name,
        B=fi.bits,
        P=fi.precision,
        E=fi.expBits,
        rt16=rt16,
        lt1=total_01,
        gt1=total_1Inf,
        minSubnormal=minSubnormal,
        maxSubnormal=maxSubnormal,
        minNormal=minNormal,
        maxNormal=maxNormal,
    )


stats = [compute_stats(fi) for fi in all_formats if fi.bits <= 16]
df2 = pandas.DataFrame(stats)
D(df2, format=partial(render_float, True))
name B P E rt16 lt1 gt1 minSubnormal maxSubnormal minNormal maxNormal
ocp_e2m1 4 2 2 True 1 5 0.5 0.5 1 6
ocp_e2m3 6 4 2 True 7 23 0.125 0.875 1 7.5
ocp_e3m2 6 3 3 True 11 19 0.0625 0.1875 0.25 28
ocp_e4m3 8 4 4 True 55 70 ≈0.0019531 ≈0.013672 0.015625 448
ocp_e5m2 8 3 5 True 59 63 ≈1.5259e-05 ≈4.5776e-05 ≈6.1035e-05 57344
p3109_p1 8 1 7 False 62 63 n/a n/a ≈2.1684e-19 ≈9.2234e+18
p3109_p2 8 2 6 False 63 62 ≈2.3283e-10 ≈2.3283e-10 ≈4.6566e-10 ≈2.1475e+09
p3109_p3 8 3 5 True 63 62 ≈7.6294e-06 ≈2.2888e-05 ≈3.0518e-05 49152
p3109_p4 8 4 4 True 63 62 ≈0.00097656 ≈0.0068359 0.0078125 224
p3109_p5 8 5 3 True 63 62 0.0078125 ≈ 0.11719 0.125 15
p3109_p6 8 6 2 True 63 62 0.015625 ≈ 0.48438 0.5 3.875
binary16 16 11 5 True 15359 16383 ≈5.9605e-08 ≈6.0976e-05 ≈6.1035e-05 65504
bfloat16 16 8 8 False 16255 16383 ≈9.1835e-41 ≈1.1663e-38 ≈1.1755e-38 ≈3.3895e+38
ocp_e8m0 8 1 8 False 127 127 n/a n/a ≈5.8775e-39 ≈1.7014e+38
ocp_int8 8 8 0 True 63 63 0.015625 ≈ 1.9844 n/a n/a

Emit the same table, but with exact values

In this table, float values are printed as decimals, unless the decimals are not an exact representation of the value, in which case, they are printed as rationals (between 1 and 2) times 2^E.

D(df2, format=partial(render_float, False))
name B P E rt16 lt1 gt1 minSubnormal maxSubnormal minNormal maxNormal
ocp_e2m1 4 2 2 True 1 5 0.5 0.5 1 6
ocp_e2m3 6 4 2 True 7 23 0.125 0.875 1 7.5
ocp_e3m2 6 3 3 True 11 19 0.0625 0.1875 0.25 28
ocp_e4m3 8 4 4 True 55 70 2^-9 7/4*2^-7 0.015625 448
ocp_e5m2 8 3 5 True 59 63 2^-16 3/2*2^-15 2^-14 57344
p3109_p1 8 1 7 False 62 63 n/a n/a 2^-62 2^63
p3109_p2 8 2 6 False 63 62 2^-32 2^-32 2^-31 2^31
p3109_p3 8 3 5 True 63 62 2^-17 3/2*2^-16 2^-15 49152
p3109_p4 8 4 4 True 63 62 2^-10 7/4*2^-8 0.0078125 224
p3109_p5 8 5 3 True 63 62 0.0078125 15/8*2^-4 0.125 15
p3109_p6 8 6 2 True 63 62 0.015625 31/16*2^-2 0.5 3.875
binary16 16 11 5 True 15359 16383 2^-24 1023/512*2^-15 2^-14 65504
bfloat16 16 8 8 False 16255 16383 2^-133 127/64*2^-127 2^-126 255/128*2^127
ocp_e8m0 8 1 8 False 127 127 n/a n/a 2^-127 2^127
ocp_int8 8 8 0 True 63 63 0.015625 127/64*2^0 n/a n/a

Tables in RST/Markdown

These are used to generate gfloat documentation, but may be of use in other contexts so left here.

from tabulate import tabulate

dfstr = df.map(lambda x: render_float(True, x))
print(
    tabulate(dfstr, df.columns, tablefmt="rst", showindex=False).replace(" nan", " n/a")
)
========  ===  ===  ===  ===========  =================  ============  ===========  ======
name        B    P    E  smallest     smallest_normal    max           num_nans       infs
========  ===  ===  ===  ===========  =================  ============  ===========  ======
ocp_e2m1    4    2    2  0.5          1                  6             0                 0
ocp_e2m3    6    4    2  0.125        1                  7.5           0                 0
ocp_e3m2    6    3    3  0.0625       0.25               28            0                 0
ocp_e4m3    8    4    4  ≈0.0019531   0.015625           448           2                 0
ocp_e5m2    8    3    5  ≈1.5259e-05  ≈6.1035e-05        57344         6                 2
p3109_p1    8    1    7  ≈2.1684e-19  ≈2.1684e-19        ≈9.2234e+18   1                 2
p3109_p2    8    2    6  ≈2.3283e-10  ≈4.6566e-10        ≈2.1475e+09   1                 2
p3109_p3    8    3    5  ≈7.6294e-06  ≈3.0518e-05        49152         1                 2
p3109_p4    8    4    4  ≈0.00097656  0.0078125          224           1                 2
p3109_p5    8    5    3  0.0078125    0.125              15            1                 2
p3109_p6    8    6    2  0.015625     0.5                3.875         1                 2
binary16   16   11    5  ≈5.9605e-08  ≈6.1035e-05        65504         2046              2
bfloat16   16    8    8  ≈9.1835e-41  ≈1.1755e-38        ≈3.3895e+38   254               2
binary32   32   24    8  ≈1.4013e-45  ≈1.1755e-38        ≈3.4028e+38   ≈1.6777e+07       2
binary64   64   53   11  4.9407e-324  ≈2.2251e-308       ≈1.7977e+308  ≈9.0072e+15       2
ocp_e8m0    8    1    8  ≈5.8775e-39  ≈5.8775e-39        ≈1.7014e+38   1                 0
ocp_int8    8    8    0  0.015625     n/a                ≈  1.9844     0                 0
========  ===  ===  ===  ===========  =================  ============  ===========  ======
from tabulate import tabulate

dfstr = df.map(lambda x: render_float(False, x))
print(tabulate(dfstr, df.columns, tablefmt="rst", showindex=False))
========  ===  ===  ===  ===========  =================  ========================================  ======================================  ======
name        B    P    E  smallest     smallest_normal    max                                       num_nans                                  infs
========  ===  ===  ===  ===========  =================  ========================================  ======================================  ======
ocp_e2m1    4    2    2  0.5          1                  6                                         0                                            0
ocp_e2m3    6    4    2  0.125        1                  7.5                                       0                                            0
ocp_e3m2    6    3    3  0.0625       0.25               28                                        0                                            0
ocp_e4m3    8    4    4  2^-9         0.015625           448                                       2                                            0
ocp_e5m2    8    3    5  2^-16        2^-14              57344                                     6                                            2
p3109_p1    8    1    7  2^-62        2^-62              2^63                                      1                                            2
p3109_p2    8    2    6  2^-32        2^-31              2^31                                      1                                            2
p3109_p3    8    3    5  2^-17        2^-15              49152                                     1                                            2
p3109_p4    8    4    4  2^-10        0.0078125          224                                       1                                            2
p3109_p5    8    5    3  0.0078125    0.125              15                                        1                                            2
p3109_p6    8    6    2  0.015625     0.5                3.875                                     1                                            2
binary16   16   11    5  2^-24        2^-14              65504                                     2046                                         2
bfloat16   16    8    8  2^-133       2^-126             255/128*2^127                             254                                          2
binary32   32   24    8  2^-149       2^-126             16777215/8388608*2^127                    8388607/4194304*2^23                         2
binary64   64   53   11  4.9407e-324  2^-1022            9007199254740991/9007199254740992*2^1024  4503599627370495/4503599627370496*2^53       2
ocp_e8m0    8    1    8  2^-127       2^-127             2^127                                     1                                            0
ocp_int8    8    8    0  0.015625     n/a                127/64*2^0                                0                                            0
========  ===  ===  ===  ===========  =================  ========================================  ======================================  ======
print(df.to_markdown())
|    | name     |   B |   P |   E |     smallest |   smallest_normal |              max |       num_nans |   infs |
|---:|:---------|----:|----:|----:|-------------:|------------------:|-----------------:|---------------:|-------:|
|  0 | ocp_e2m1 |   4 |   2 |   2 | 0.5          |      1            |     6            |    0           |      0 |
|  1 | ocp_e2m3 |   6 |   4 |   2 | 0.125        |      1            |     7.5          |    0           |      0 |
|  2 | ocp_e3m2 |   6 |   3 |   3 | 0.0625       |      0.25         |    28            |    0           |      0 |
|  3 | ocp_e4m3 |   8 |   4 |   4 | 0.00195312   |      0.015625     |   448            |    2           |      0 |
|  4 | ocp_e5m2 |   8 |   3 |   5 | 1.52588e-05  |      6.10352e-05  | 57344            |    6           |      2 |
|  5 | p3109_p1 |   8 |   1 |   7 | 2.1684e-19   |      2.1684e-19   |     9.22337e+18  |    1           |      2 |
|  6 | p3109_p2 |   8 |   2 |   6 | 2.32831e-10  |      4.65661e-10  |     2.14748e+09  |    1           |      2 |
|  7 | p3109_p3 |   8 |   3 |   5 | 7.62939e-06  |      3.05176e-05  | 49152            |    1           |      2 |
|  8 | p3109_p4 |   8 |   4 |   4 | 0.000976562  |      0.0078125    |   224            |    1           |      2 |
|  9 | p3109_p5 |   8 |   5 |   3 | 0.0078125    |      0.125        |    15            |    1           |      2 |
| 10 | p3109_p6 |   8 |   6 |   2 | 0.015625     |      0.5          |     3.875        |    1           |      2 |
| 11 | binary16 |  16 |  11 |   5 | 5.96046e-08  |      6.10352e-05  | 65504            | 2046           |      2 |
| 12 | bfloat16 |  16 |   8 |   8 | 9.18355e-41  |      1.17549e-38  |     3.38953e+38  |  254           |      2 |
| 13 | binary32 |  32 |  24 |   8 | 1.4013e-45   |      1.17549e-38  |     3.40282e+38  |    1.67772e+07 |      2 |
| 14 | binary64 |  64 |  53 |  11 | 4.94066e-324 |      2.22507e-308 |     1.79769e+308 |    9.0072e+15  |      2 |
| 15 | ocp_e8m0 |   8 |   1 |   8 | 5.87747e-39  |      5.87747e-39  |     1.70141e+38  |    1           |      0 |
| 16 | ocp_int8 |   8 |   8 |   0 | 0.015625     |    nan            |     1.98438      |    0           |      0 |