From 8b3e2189bc1245102ed03c478ef97d38a12747b6 Mon Sep 17 00:00:00 2001 From: lhdjply Date: Thu, 15 May 2025 11:03:14 +0800 Subject: [PATCH] feat: update python-gfloat to 0.3-1 Signed-off-by: lhdjply --- .gitignore | 164 + .pre-commit-config.yaml | 27 + .readthedocs.yaml | 15 + BUILDING.md | 13 + ChangeLog | 17 + LICENSE | 21 + PKG-INFO | 70 + README.md | 43 +- debian/changelog | 24 +- debian/compat | 1 - debian/control | 66 +- debian/copyright | 44 +- debian/gbp.conf | 2 + debian/python-gfloat-doc.docs | 2 + debian/rules | 16 +- debian/source/options | 1 + debian/watch | 4 + docs/Makefile | 20 + docs/make.bat | 35 + docs/requirements-rtd.txt | 1 + docs/source/01-decode.ipynb | 401 +++ docs/source/02-value-stats.ipynb | 436 +++ docs/source/03-value-tables.ipynb | 4114 ++++++++++++++++++++++ docs/source/api.rst | 43 + docs/source/conf.py | 54 + docs/source/formats.rst | 44 + docs/source/index.rst | 55 + docs/source/notebooks.rst | 13 + etc/check-copyright.sh | 12 + etc/package.sh | 22 + etc/test-check-copyright.sh | 30 + pyproject.toml | 40 + requirements-dev.txt | 21 + requirements-test.txt | 3 + requirements.txt | 1 + setup.cfg | 4 + src/gfloat.egg-info/PKG-INFO | 70 + src/gfloat.egg-info/SOURCES.txt | 45 + src/gfloat.egg-info/dependency_links.txt | 1 + src/gfloat.egg-info/requires.txt | 16 + src/gfloat.egg-info/top_level.txt | 1 + src/gfloat/__init__.py | 17 + src/gfloat/block.py | 192 + src/gfloat/decode.py | 98 + src/gfloat/formats.py | 262 ++ src/gfloat/printing.py | 55 + src/gfloat/round.py | 231 ++ src/gfloat/types.py | 408 +++ test/test_block.py | 30 + test/test_decode.py | 259 ++ test/test_encode.py | 47 + test/test_finfo.py | 32 + test/test_microxcaling.py | 91 + test/test_printing.py | 23 + test/test_round.py | 455 +++ 55 files changed, 8175 insertions(+), 37 deletions(-) create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 .readthedocs.yaml create mode 100644 BUILDING.md create mode 100644 ChangeLog create mode 100644 LICENSE create mode 100644 PKG-INFO delete mode 100644 debian/compat create mode 100644 debian/gbp.conf create mode 100644 debian/python-gfloat-doc.docs create mode 100644 debian/source/options create mode 100644 debian/watch create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/requirements-rtd.txt create mode 100644 docs/source/01-decode.ipynb create mode 100644 docs/source/02-value-stats.ipynb create mode 100644 docs/source/03-value-tables.ipynb create mode 100644 docs/source/api.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/formats.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/notebooks.rst create mode 100755 etc/check-copyright.sh create mode 100644 etc/package.sh create mode 100644 etc/test-check-copyright.sh create mode 100644 pyproject.toml create mode 100644 requirements-dev.txt create mode 100644 requirements-test.txt create mode 100644 requirements.txt create mode 100644 setup.cfg create mode 100644 src/gfloat.egg-info/PKG-INFO create mode 100644 src/gfloat.egg-info/SOURCES.txt create mode 100644 src/gfloat.egg-info/dependency_links.txt create mode 100644 src/gfloat.egg-info/requires.txt create mode 100644 src/gfloat.egg-info/top_level.txt create mode 100644 src/gfloat/__init__.py create mode 100644 src/gfloat/block.py create mode 100644 src/gfloat/decode.py create mode 100644 src/gfloat/formats.py create mode 100644 src/gfloat/printing.py create mode 100644 src/gfloat/round.py create mode 100644 src/gfloat/types.py create mode 100644 test/test_block.py create mode 100644 test/test_decode.py create mode 100644 test/test_encode.py create mode 100644 test/test_finfo.py create mode 100644 test/test_microxcaling.py create mode 100644 test/test_printing.py create mode 100644 test/test_round.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57fa349 --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ +.vscode/settings.json +.vscode/launch.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..064619b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + +- repo: https://github.com/psf/black + rev: 24.4.0 + hooks: + - id: black-jupyter + +- repo: local + hooks: + - id: etc/check-copyright.sh + name: check copyright + entry: etc/check-copyright.sh + language: script + exclude: | + (?x)( + ^docs/Makefile$| + ^docs/make.bat$| + (/|)requirements.*\.txt$ + ) diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..64c2159 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,15 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +version: 2 + +build: + os: "ubuntu-22.04" + tools: + python: "3.10" + +python: + install: + - requirements: docs/requirements-rtd.txt + +sphinx: + configuration: docs/source/conf.py diff --git a/BUILDING.md b/BUILDING.md new file mode 100644 index 0000000..d8417a5 --- /dev/null +++ b/BUILDING.md @@ -0,0 +1,13 @@ + + +## BUILDING + +``` +pip install -e . +( cd docs && make html ) +``` + +#### Pushing +``` +sh etc/package.sh +``` diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..6555896 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,17 @@ + +0.3: Jun 10, 2024 + - Use python ints throughout, adding float64 to test + - Simplify round, fix directed rounding + - Rename "ival" to "code" in FloatValue + - Shorten format names from "format_info_*" to "*" + + +0.2: May 21, 2024 + - Add MX Formats + - Improved CI + - Add value table pretty-printing + +0.1: May 2, 2024 + - First released version + +Copyright (c) 2024 Graphcore Ltd. All rights reserved. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..68dc306 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Graphcore Ltd. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..0ecc625 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,70 @@ +Metadata-Version: 2.1 +Name: gfloat +Version: 0.3 +Summary: Generic floating point handling in Python +Author-email: Andrew Fitzgibbon +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python :: 3 +Classifier: Development Status :: 3 - Alpha +Requires-Python: >=3.8.1 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: numpy +Provides-Extra: dev +Requires-Dist: pytest; extra == "dev" +Requires-Dist: ml_dtypes; extra == "dev" +Requires-Dist: pre-commit; extra == "dev" +Requires-Dist: black; extra == "dev" +Requires-Dist: mypy; extra == "dev" +Requires-Dist: black[jupyter]; extra == "dev" +Requires-Dist: isort; extra == "dev" +Requires-Dist: sphinx==7.1.2; extra == "dev" +Requires-Dist: sphinx-rtd-theme==1.3.0rc1; extra == "dev" +Requires-Dist: sphinx_paramlinks; extra == "dev" +Requires-Dist: myst_nb; extra == "dev" +Requires-Dist: airium; extra == "dev" +Requires-Dist: pandas; extra == "dev" + + + +# gfloat: Generic floating-point types in Python + +An implementation of generic floating point encode/decode logic, +handling various current and proposed floating point types: + + - [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754): Binary16, Binary32 + - [OCP Float8](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-06-20-pdf): E5M2, E4M3 + - [IEEE WG P3109](https://github.com/awf/P3109-Public/blob/main/Shared%20Reports/P3109%20WG%20Interim%20report.pdf): P{p} for p in 1..7 + - [OCP MX Formats](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf): E2M1, M2M3, E3M2, E8M0, INT8, and the MX block formats. + +The library favours readability and extensibility over speed - for fast implementations of these datatypes see, for example, [ml_dtypes](https://github.com/jax-ml/ml_dtypes), +[bitstring](https://github.com/scott-griffiths/bitstring), +[MX PyTorch Emulation Library](https://github.com/microsoft/microxcaling). + +See https://gfloat.readthedocs.io for documentation, or dive into the notebooks to explore the formats. + +For example, here's a table from the [02-value-stats](docs/source/02-value-stats.ipynb) notebook: + +|name|B: Bits in the format|P: Precision in bits|E: Exponent field width in bits|0 + +# gfloat: Generic floating-point types in Python + +An implementation of generic floating point encode/decode logic, +handling various current and proposed floating point types: + + - [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754): Binary16, Binary32 + - [OCP Float8](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-06-20-pdf): E5M2, E4M3 + - [IEEE WG P3109](https://github.com/awf/P3109-Public/blob/main/Shared%20Reports/P3109%20WG%20Interim%20report.pdf): P{p} for p in 1..7 + - [OCP MX Formats](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf): E2M1, M2M3, E3M2, E8M0, INT8, and the MX block formats. + +The library favours readability and extensibility over speed - for fast implementations of these datatypes see, for example, [ml_dtypes](https://github.com/jax-ml/ml_dtypes), +[bitstring](https://github.com/scott-griffiths/bitstring), +[MX PyTorch Emulation Library](https://github.com/microsoft/microxcaling). + +See https://gfloat.readthedocs.io for documentation, or dive into the notebooks to explore the formats. + +For example, here's a table from the [02-value-stats](docs/source/02-value-stats.ipynb) notebook: + +|name|B: Bits in the format|P: Precision in bits|E: Exponent field width in bits|0 Sat, 28 Jan 2023 13:46:49 +0800 + -- Scott Kitterman Thu, 13 Jun 2024 00:15:48 -0400 + +python-gfloat (0.2.1-1) unstable; urgency=medium + + * New upstream release + + -- Scott Kitterman Tue, 28 May 2024 09:59:43 -0400 + +python-gfloat (0.1-2) unstable; urgency=medium + + * No change upload to build binaries on buildd + + -- Scott Kitterman Wed, 08 May 2024 21:31:55 -0400 + +python-gfloat (0.1-1) unstable; urgency=low + + * Initial release (Closes: #1070718) + + -- Scott Kitterman Tue, 07 May 2024 15:47:03 -0400 diff --git a/debian/compat b/debian/compat deleted file mode 100644 index b4de394..0000000 --- a/debian/compat +++ /dev/null @@ -1 +0,0 @@ -11 diff --git a/debian/control b/debian/control index cb7c4a0..7a2cda2 100644 --- a/debian/control +++ b/debian/control @@ -1,15 +1,55 @@ -Source: template-repository -Section: unknown +Source: python-gfloat +Section: python Priority: optional -Maintainer: Tsic404 -Build-Depends: debhelper (>= 11) -Standards-Version: 4.1.3 -Homepage: https://github.com/deepin-community/template-repository -#Vcs-Browser: https://salsa.debian.org/debian/deepin-community-template-repository -#Vcs-Git: https://salsa.debian.org/debian/deepin-community-template-repository.git +Maintainer: Debian Python Team +Uploaders: Scott Kitterman +Build-Depends: debhelper-compat (= 13), + pybuild-plugin-pyproject, python3-setuptools, + python3-all, + python3-myst-nb , + python3-numpy , + python3-sphinx , + python3-sphinx-paramlinks , + python3-sphinx-rtd-theme , +Standards-Version: 4.7.0 +Vcs-Git: https://salsa.debian.org/python-team/packages/python-gfloat.git +Vcs-Browser: https://salsa.debian.org/python-team/packages/python-gfloat +Homepage: https://github.com/graphcore-research/gfloat +Rules-Requires-Root: no -Package: template-repository -Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends} -Description: - +Package: python3-gfloat +Architecture: all +Depends: ${misc:Depends}, ${python3:Depends}, +Suggests: python-gfloat-doc +Description: Python module of generic floating point encode/decode logic + An implementation of generic floating point encode/decode logic, handling + various current and proposed floating point types: + . + - IEEE 754: Binary16, Binary32 + - OCP Float8: E5M2, E4M3 + - IEEE WG P3109: P{p} for p in 1..7 + - OCP MX Formats: E2M1, M2M3, E3M2, E8M0, INT8, and the MX block formats. + . + The library favours readability and extensibility over speed - for fast + implementations of these datatypes see, for example, ml_dtypes, bitstring, MX + PyTorch Emulation Library. + +Package: python-gfloat-doc +Section: doc +Architecture: all +Multi-Arch: foreign +Depends: ${misc:Depends}, ${sphinxdoc:Depends} +Description: documentation for the gfloat Python library + An implementation of generic floating point encode/decode logic, handling + various current and proposed floating point types: + . + - IEEE 754: Binary16, Binary32 + - OCP Float8: E5M2, E4M3 + - IEEE WG P3109: P{p} for p in 1..7 + - OCP MX Formats: E2M1, M2M3, E3M2, E8M0, INT8, and the MX block formats. + . + The library favours readability and extensibility over speed - for fast + implementations of these datatypes see, for example, ml_dtypes, bitstring, MX + PyTorch Emulation Library. + . + This package provides documentation for python3-gfloat. diff --git a/debian/copyright b/debian/copyright index f5c805e..27b7a6e 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,22 +1,32 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Upstream-Name: template-repository -Source: https://github.com/deepin-community/template-repository +Upstream-Name: gfloat +Upstream-Contact: Andrew Fitzgibbon +Source: Files: * -Copyright: 2023 Tsic404 -License: GPL-2+ - This package is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - . - This package is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +Copyright: Copyright (c) 2023 - 2024 Graphcore Ltd. All rights reserved. +License: Expat + + +Files: debian/* +Copyright: 2024 Scott Kitterman +License: Expat + +License: Expat + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: . - You should have received a copy of the GNU General Public License - along with this program. If not, see + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. . - On Debian systems, the complete text of the GNU General - Public License version 2 can be found in "/usr/share/common-licenses/GPL-2". + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. diff --git a/debian/gbp.conf b/debian/gbp.conf new file mode 100644 index 0000000..3879982 --- /dev/null +++ b/debian/gbp.conf @@ -0,0 +1,2 @@ +[DEFAULT] +debian-branch=debian/master diff --git a/debian/python-gfloat-doc.docs b/debian/python-gfloat-doc.docs new file mode 100644 index 0000000..1643905 --- /dev/null +++ b/debian/python-gfloat-doc.docs @@ -0,0 +1,2 @@ +.pybuild/docs/* +README.md diff --git a/debian/rules b/debian/rules index 2d33f6a..cf85d6f 100755 --- a/debian/rules +++ b/debian/rules @@ -1,4 +1,16 @@ -#!/usr/bin/make -f +#! /usr/bin/make -f +export PYBUILD_NAME=gfloat %: - dh $@ + dh $@ --with python3,sphinxdoc --buildsystem=pybuild + +execute_after_dh_auto_build-indep: +ifeq (,$(filter nodoc,$(DEB_BUILD_OPTIONS))) + cd docs && \ + PYTHONPATH=$(CURDIR)/src http_proxy='http://127.0.0.1:9/' https_proxy='https://127.0.0.1:9/' \ + sphinx-build -N -E -T -b html source $(CURDIR)/.pybuild/docs/html/ + rm -rf $(CURDIR)/.pybuild/docs/html/.doctrees +endif + +override_dh_auto_test: + # Requires unpackaged modules diff --git a/debian/source/options b/debian/source/options new file mode 100644 index 0000000..a1c7187 --- /dev/null +++ b/debian/source/options @@ -0,0 +1 @@ +extend-diff-ignore="src/gfloat.(egg-info|dist-info)/" diff --git a/debian/watch b/debian/watch new file mode 100644 index 0000000..950ebf0 --- /dev/null +++ b/debian/watch @@ -0,0 +1,4 @@ +version=4 +# try also https://pypi.debian.net/pylsqpack/watch +opts=uversionmangle=s/(rc|a|b|c)/~$1/ \ +https://pypi.debian.net/gfloat/gfloat-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..6247f7e --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements-rtd.txt b/docs/requirements-rtd.txt new file mode 100644 index 0000000..e7ab444 --- /dev/null +++ b/docs/requirements-rtd.txt @@ -0,0 +1 @@ +.[dev] diff --git a/docs/source/01-decode.ipynb b/docs/source/01-decode.ipynb new file mode 100644 index 0000000..155f26b --- /dev/null +++ b/docs/source/01-decode.ipynb @@ -0,0 +1,401 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "# GFloat Basics\n", + "\n", + "This notebook shows the use of `decode_float` to explore properties of some float formats.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Install packages\n", + "from pandas import DataFrame\n", + "import numpy as np\n", + "\n", + "from gfloat import decode_float\n", + "from gfloat.formats import *" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## List all the values in a format\n", + "\n", + "The first example shows how to list all values in a given format.\n", + "We will choose the [OCP](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-12-01-pdf-1) E5M2 format.\n", + "\n", + "The object `format_info_ocp_e5m2` is from the `gfloat.formats` package, and describes the characteristics of that format:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "FormatInfo(name='ocp_e5m2', k=8, precision=3, emax=15, has_nz=True, has_infs=True, num_high_nans=3, has_subnormals=True, is_signed=True, is_twos_complement=False)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "format_info_ocp_e5m2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We shall use the format to decode all values from 0..255, and gather them in a pandas DataFrame.\n", + "We see that `decode_float` returns a lot more than just the value - it also splits out the exponent, significand, and sign, and returns the `FloatClass`, which allows us to distinguish normal and subnormal numbers, as well as zero, infinity, and nan." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fvalexpexpvalsignificandfsignificandsignbitfclass
code
00.000000e+000-1400.000FloatClass.ZERO
11.525879e-050-1410.250FloatClass.SUBNORMAL
23.051758e-050-1420.500FloatClass.SUBNORMAL
34.577637e-050-1430.750FloatClass.SUBNORMAL
46.103516e-051-1401.000FloatClass.NORMAL
........................
251-5.734400e+04301531.751FloatClass.NORMAL
252-inf311601.001FloatClass.INFINITE
253NaN311611.251FloatClass.NAN
254NaN311621.501FloatClass.NAN
255NaN311631.751FloatClass.NAN
\n", + "

256 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " fval exp expval significand fsignificand signbit \\\n", + "code \n", + "0 0.000000e+00 0 -14 0 0.00 0 \n", + "1 1.525879e-05 0 -14 1 0.25 0 \n", + "2 3.051758e-05 0 -14 2 0.50 0 \n", + "3 4.577637e-05 0 -14 3 0.75 0 \n", + "4 6.103516e-05 1 -14 0 1.00 0 \n", + "... ... ... ... ... ... ... \n", + "251 -5.734400e+04 30 15 3 1.75 1 \n", + "252 -inf 31 16 0 1.00 1 \n", + "253 NaN 31 16 1 1.25 1 \n", + "254 NaN 31 16 2 1.50 1 \n", + "255 NaN 31 16 3 1.75 1 \n", + "\n", + " fclass \n", + "code \n", + "0 FloatClass.ZERO \n", + "1 FloatClass.SUBNORMAL \n", + "2 FloatClass.SUBNORMAL \n", + "3 FloatClass.SUBNORMAL \n", + "4 FloatClass.NORMAL \n", + "... ... \n", + "251 FloatClass.NORMAL \n", + "252 FloatClass.INFINITE \n", + "253 FloatClass.NAN \n", + "254 FloatClass.NAN \n", + "255 FloatClass.NAN \n", + "\n", + "[256 rows x 7 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fmt = format_info_ocp_e5m2\n", + "vals = [decode_float(fmt, i) for i in range(256)]\n", + "DataFrame(vals).set_index(\"code\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Additional format info: special values, min, max, dynamic range\n", + "\n", + "In addition, `FormatInfo` can tell us about other characteristics of each format.\n", + "To reproduce some of the OCP spec's tables 1 and 2:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Format ocp_e4m3 ocp_e5m2 p3109_p3\n", + "Max exponent (emax) 8 15 15\n", + "Exponent bias 7 15 16\n", + "Infinities 0 2 2\n", + "Number of NaNs 2 6 1\n", + "Number of zeros 2 2 1\n", + "Max normal number 448.0 57344.0 49152.0\n", + "Min normal number 0.015625 6.103515625e-05 3.0517578125e-05\n", + "Min subnormal number 0.001953125 1.52587890625e-05 7.62939453125e-06\n", + "Dynamic range (binades) 18 32 33\n" + ] + } + ], + "source": [ + "def compute_dynamic_range(fi):\n", + " return np.log2(fi.max / fi.smallest)\n", + "\n", + "\n", + "for prop, probe in (\n", + " (\"Format \", lambda fi: fi.name.replace(\"format_info_\", \"\")),\n", + " (\"Max exponent (emax) \", lambda fi: fi.emax),\n", + " (\"Exponent bias \", lambda fi: fi.expBias),\n", + " (\"Infinities \", lambda fi: 2 * int(fi.has_infs)),\n", + " (\"Number of NaNs \", lambda fi: fi.num_nans),\n", + " (\"Number of zeros \", lambda fi: int(fi.has_zero) + int(fi.has_nz)),\n", + " (\"Max normal number \", lambda fi: fi.max),\n", + " (\"Min normal number \", lambda fi: fi.smallest_normal),\n", + " (\"Min subnormal number \", lambda fi: fi.smallest_subnormal),\n", + " (\"Dynamic range (binades)\", lambda x: round(compute_dynamic_range(x))),\n", + "):\n", + " print(\n", + " f\"{prop} {probe(format_info_ocp_e4m3):<20} {probe(format_info_ocp_e5m2):<20} {probe(format_info_p3109(3))}\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How do subnormals affect dynamic range?\n", + "\n", + "Most, if not all, low-precision formats include subnormal numbers, as they increase the number of values near zero, and increase dynamic range.\n", + "A natural question is \"by how much?\". To answer this, we can create a mythical new format, a copy of `e4m3`, but with `has_subnormals` set to true." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "\n", + "e4m3_no_subnormals = copy.copy(format_info_ocp_e4m3)\n", + "e4m3_no_subnormals.has_subnormals = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And now compute the dynamic range with and without:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dynamic range with subnormals = 17.807354922057606\n", + "Dynamic range without subnormals = 15.637429920615292\n", + "Ratio = 4.5\n" + ] + } + ], + "source": [ + "dr_with = compute_dynamic_range(format_info_ocp_e4m3)\n", + "dr_without = compute_dynamic_range(e4m3_no_subnormals)\n", + "\n", + "print(f\"Dynamic range with subnormals = {dr_with}\")\n", + "print(f\"Dynamic range without subnormals = {dr_without}\")\n", + "print(f\"Ratio = {2**(dr_with - dr_without):.1f}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ml_dtypes", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/02-value-stats.ipynb b/docs/source/02-value-stats.ipynb new file mode 100644 index 0000000..cf15cb6 --- /dev/null +++ b/docs/source/02-value-stats.ipynb @@ -0,0 +1,436 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "# Collect value statistics for formats\n", + "\n", + "This notebook computes various statistics for a variety of float formats,\n", + "by exhaustively enumerating the values. Naturally, most of these statistics can be computed directly, and indeed many are already supplied on the `FormatInfo` class as methods, for example `max`, `smallest_subnormal`, etc. However this method serves as a useful cross-check against the direct formulae.\n", + "\n", + "## Statistics collected\n", + "\n", + " - name: Format\n", + " - B: Bits in the format\n", + " - P: Precision in bits\n", + " - E: Exponent field width in bits\n", + " - T: Trailing significand field width in bits\n", + " - lt1: Number of values x such that `0 < x < 1`\n", + " - gt1: Number of values x such that `1 < x < Inf`\n", + " - rt16: True if all values are exactly representable in IEEE binary16\n", + " - maxFinite: Largest finite value\n", + " - minFinite: Smallest finite value\n", + " - maxNormal: Largest finite normal value, NaN if all finite values are subnormal\n", + " - minNormal: Smallest positive normal value, NaN if all finite values are subnormal\n", + " - minSubnormal: Smallest positive subnormal value, NaN if no finite values are - subnormal\n", + " - maxSubnormal: Largest subnormal value, NaN if no finite values are subnormal\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from gfloat import *\n", + "from gfloat.formats import *\n", + "\n", + "import pandas\n", + "import numpy as np\n", + "from IPython.display import HTML\n", + "\n", + "\n", + "def collect_stats(fi: FormatInfo):\n", + " # Generate all values\n", + " values = [decode_float(fi, i) for i in range(2**fi.bits)]\n", + " df = pandas.DataFrame(values)\n", + "\n", + " # Extract format information parameters\n", + " E = fi.expBits\n", + " S = fi.tSignificandBits\n", + "\n", + " # Compute statistics: lt1,gt1\n", + " fval = df[\"fval\"]\n", + " total_01 = fval.between(0, 1, inclusive=\"neither\").sum()\n", + " total_1Inf = fval.between(1, np.inf, inclusive=\"neither\").sum()\n", + "\n", + " # Compute statistics: maxFinite,minFinite\n", + " finite_vals = fval[np.isfinite(fval)]\n", + " maxFinite = finite_vals.loc[finite_vals.idxmax()]\n", + " minFinite = finite_vals.loc[finite_vals.idxmin()]\n", + "\n", + " # Compute statistics: maxNormal,minNormal\n", + " normal_vals = fval[(df[\"fclass\"] == FloatClass.NORMAL) & (fval > 0)]\n", + " maxNormal = normal_vals.loc[normal_vals.idxmax()] if normal_vals.any() else np.nan\n", + " minNormal = normal_vals.loc[normal_vals.idxmin()] if normal_vals.any() else np.nan\n", + "\n", + " # Compute statistics: minSubnormal\n", + " pos_subnormal = fval[(df[\"fclass\"] == FloatClass.SUBNORMAL) & (fval > 0)]\n", + " maxSubnormal = (\n", + " pos_subnormal.loc[pos_subnormal.idxmax()] if pos_subnormal.any() else np.nan\n", + " )\n", + " minSubnormal = (\n", + " pos_subnormal.loc[pos_subnormal.idxmin()] if pos_subnormal.any() else np.nan\n", + " )\n", + "\n", + " # Compute roundtrips: rt16, rt32\n", + " with np.errstate(over=\"ignore\"):\n", + " rt16 = (np.float64(np.float16(fval)) == np.float64(fval)) | ~np.isfinite(fval)\n", + " rt32 = (np.float64(np.float32(fval)) == np.float64(fval)) | ~np.isfinite(fval)\n", + "\n", + " rt16 = rt16.all()\n", + " rt32 = rt32.all()\n", + " assert rt32 # If not, we should include rt32 in the table\n", + "\n", + " # Assemble tuple\n", + " return dict(\n", + " name=fi.name,\n", + " B=fi.bits,\n", + " P=fi.precision,\n", + " E=E,\n", + " T=S,\n", + " lt1=total_01,\n", + " gt1=total_1Inf,\n", + " rt16=rt16,\n", + " maxFinite=maxFinite,\n", + " minFinite=minFinite,\n", + " maxNormal=maxNormal,\n", + " minNormal=minNormal,\n", + " minSubnormal=minSubnormal,\n", + " maxSubnormal=maxSubnormal,\n", + " )\n", + "\n", + "\n", + "formats_to_check = (\n", + " tiny_formats\n", + " + fp8_formats\n", + " + [format_info_bfloat16, format_info_ocp_int8, format_info_ocp_e8m0]\n", + ")\n", + "stats = [collect_stats(fi) for fi in formats_to_check]\n", + "df = pandas.DataFrame(stats)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Emit HTML table" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameBPETlt1gt1rt16maxFiniteminFinitemaxNormalminNormalminSubnormalmaxSubnormal
ocp_e2m1422115True 6 -6 6 1 0.5 0.5
ocp_e2m36423723True 7.5 -7.5 7.5 1 0.125 0.875
ocp_e3m263321119True 28 -28 28 0.25 0.0625 0.1875
ocp_e4m384435570True 448 -448 4480.0156251*2^-97/4*2^-7
ocp_e5m283525963True 57344 -57344 573441*2^-141*2^-163/2*2^-15
p3109_p181706263False1*2^63-1*2^631*2^631*2^-62nannan
p3109_p282616362False1*2^31-1*2^311*2^311*2^-311*2^-321*2^-32
p3109_p383526362True 49152 -49152 491521*2^-151*2^-173/2*2^-16
p3109_p484436362True 224 -224 2240.00781251*2^-107/4*2^-8
p3109_p585346362True 15 -15 15 0.1250.007812515/8*2^-4
p3109_p686256362True 3.875 -3.875 3.875 0.50.01562531/16*2^-2
bfloat16168871625516383False255/128*2^127-255/128*2^127255/128*2^1271*2^-1261*2^-133127/64*2^-127
ocp_int888076363True127/64*2^0 -2nannan0.015625127/64*2^0
ocp_e8m08180127127False1*2^1271*2^-1271*2^1271*2^-127nannan
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Special rendering for float values - if they don't render nicely in 10.5g,\n", + "# use float_pow2str\n", + "def render_float(v):\n", + " s = f\"{v:8.5g}\"\n", + " if not \"e\" in s and float(s) == v:\n", + " return s\n", + " else:\n", + " return float_pow2str(v)\n", + "\n", + "\n", + "for field in (\n", + " \"maxFinite\",\n", + " \"minFinite\",\n", + " \"maxNormal\",\n", + " \"minNormal\",\n", + " \"minSubnormal\",\n", + " \"maxSubnormal\",\n", + "):\n", + " df[field] = df[field].map(render_float)\n", + "\n", + "\n", + "HTML(df.style.hide().to_html())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/03-value-tables.ipynb b/docs/source/03-value-tables.ipynb new file mode 100644 index 0000000..0bd9d89 --- /dev/null +++ b/docs/source/03-value-tables.ipynb @@ -0,0 +1,4114 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "# Making value tables\n", + "\n", + "In this notebook, we generate value tables akin to those at [P3109](https://htmlpreview.github.io/?https://raw.githubusercontent.com/P3109/Public/main/Value%20Tables/html/index.html).\n", + "\n", + "Thes tables comprise one-line summaries of each float value in the form\n", + "```text\n", + "Code Binary = Exact binary E = Float16 equivalent Float16 binary E = Float Value\n", + "0x21 0_0100_001 = +0b1.001*2^-4 = 0_01011_0010000000 +0b1.0010000000*2^-4 = ~0.0703\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from gfloat import *\n", + "from gfloat.formats import *\n", + "import numpy as np\n", + "from IPython.display import HTML\n", + "import airium" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define some helpers.\n", + "\n", + "### Render with underscores separating s_e_m\n", + "\n", + "E.g `0_1011_110`. For formats with zero significand bits or zero exponent bits, we use `0_1011110_` or `0__10111110`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def str_bits_with_underscores(fi, fv):\n", + " # 0_1011110_\n", + " if fi.tSignificandBits == 0:\n", + " return f\"{fv.signbit}_{fv.exp:0{fi.expBits}b}_\"\n", + "\n", + " # 0__1011110\n", + " if fi.expBits == 0:\n", + " return f\"{fv.signbit}__{fv.significand:0{fi.tSignificandBits}b}\"\n", + "\n", + " # 0_101_1110\n", + " return (\n", + " f\"{fv.signbit}_{fv.exp:0{fi.expBits}b}_{fv.significand:0{fi.tSignificandBits}b}\"\n", + " )\n", + "\n", + "\n", + "fi = format_info_p3109(3)\n", + "assert str_bits_with_underscores(fi, decode_float(fi, 0x41)) == \"0_10000_01\"\n", + "\n", + "fi = format_info_p3109(1)\n", + "assert str_bits_with_underscores(fi, decode_float(fi, 0x41)) == \"0_1000001_\"\n", + "\n", + "fi = format_info_p3109(7)\n", + "assert str_bits_with_underscores(fi, decode_float(fi, 0x41)) == \"0_1_000001\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Render a binary16 value\n", + "\n", + "Returns two strings, like this:\n", + "```\n", + "'0_00010_1010000000', '+0b1.1010000000*2^-13'\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import struct\n", + "\n", + "\n", + "def b16_str(val) -> tuple[str, str]:\n", + " \"\"\"\n", + " Represent VAL in binary16.\n", + "\n", + " If val does not convert exactly to binary16,\n", + " returns \"\"\n", + " \"\"\"\n", + " with np.errstate(over=\"ignore\"):\n", + " b16 = np.float16(val)\n", + "\n", + " if float(b16) != val and np.isfinite(b16):\n", + " # Finite, but not representable in float16\n", + " return f\"\", \"\"\n", + " b16_int = struct.unpack(\"!H\", struct.pack(\"!e\", b16))[0]\n", + "\n", + " # bitstr is of the form 0_00000_1100000000\n", + " s = f\"{b16_int:016b}\"\n", + " e_str = s[1:6]\n", + " m_str = s[6:]\n", + " bitstr = f\"{s[0]}_{e_str}_{m_str}\"\n", + "\n", + " # pow2str is of the form '+0b0.1100000000*2^-15', or '' for nonfinite values\n", + " e = int(e_str, 2) - 15\n", + " m = int(m_str, 2)\n", + " leading_bit = 0 if e == -15 else 1\n", + " signstr = \"-\" if s[0] == \"1\" else \"+\"\n", + " if np.isfinite(b16):\n", + " pow2str = f\"{signstr}0b{leading_bit}.{m:010b}*2^{e}\"\n", + " else:\n", + " pow2str = \"\"\n", + " return bitstr, pow2str\n", + "\n", + "\n", + "assert b16_str(13 * 2**-16) == (\"0_00010_1010000000\", \"+0b1.1010000000*2^-13\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Print one table row" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p3109_p3\n", + "0x00 0_00000_00 = 0.0\n", + "0x01 0_00000_01 = +0b0.01*2^-15 = 0_00000_0010000000 +0b0.0010000000*2^-15 = ~7.629e-06\n", + "0x07 0_00001_11 = +0b1.11*2^-15 = 0_00000_1110000000 +0b0.1110000000*2^-15 = ~5.341e-05\n", + "0x21 0_01000_01 = +0b1.01*2^-8 = 0_00111_0100000000 +0b1.0100000000*2^-8 = ~0.0049\n", + "0x40 0_10000_00 = +0b1.00*2^0 = 0_01111_0000000000 +0b1.0000000000*2^0 = 1.0\n", + "0x41 0_10000_01 = +0b1.01*2^0 = 0_01111_0100000000 +0b1.0100000000*2^0 = 1.25\n", + "0x7e 0_11111_10 = +0b1.10*2^15 = 0_11110_1000000000 +0b1.1000000000*2^15 = 49152.0\n", + "0x7f 0_11111_11 = inf\n", + "0x80 1_00000_00 = nan\n", + "0x81 1_00000_01 = -0b0.01*2^-15 = 1_00000_0010000000 -0b0.0010000000*2^-15 = ~-7.629e-06\n", + "0xe6 1_11001_10 = -0b1.10*2^9 = 1_11000_1000000000 -0b1.1000000000*2^9 = -768.0\n", + "0xfe 1_11111_10 = -0b1.10*2^15 = 1_11110_1000000000 -0b1.1000000000*2^15 = -49152.0\n", + "0xff 1_11111_11 = -inf\n", + "p3109_p1\n", + "0x00 0_0000000_ = 0.0\n", + "0x01 0_0000001_ = +0b1.0*2^-62 = = ~2.168e-19\n", + "0x07 0_0000111_ = +0b1.0*2^-56 = = ~1.388e-17\n", + "0x21 0_0100001_ = +0b1.0*2^-30 = = ~9.313e-10\n", + "0x40 0_1000000_ = +0b1.0*2^1 = 0_10000_0000000000 +0b1.0000000000*2^1 = 2.0\n", + "0x41 0_1000001_ = +0b1.0*2^2 = 0_10001_0000000000 +0b1.0000000000*2^2 = 4.0\n", + "0x7e 0_1111110_ = +0b1.0*2^63 = 0_11111_0000000000 = ~9.223e+18\n", + "0x7f 0_1111111_ = inf\n", + "0x80 1_0000000_ = nan\n", + "0x81 1_0000001_ = -0b1.0*2^-62 = = ~-2.168e-19\n", + "0xe6 1_1100110_ = -0b1.0*2^39 = 1_11111_0000000000 = ~-5.498e+11\n", + "0xfe 1_1111110_ = -0b1.0*2^63 = 1_11111_0000000000 = ~-9.223e+18\n", + "0xff 1_1111111_ = -inf\n" + ] + } + ], + "source": [ + "def str_tablerow(fi, fv: FloatValue, show_b16_info=True, vs_width=14, vs_d=8):\n", + " \"\"\"\n", + " Create a string of the form\n", + " 0x41 0_10000_01 = +0b1.01*2^0 = 1.25\n", + " optionally adding binary16 info\n", + " 0x41 0_10000_01 = +0b1.01*2^0 = 0_01111_0100000000 +0b1.0100000000*2^0 = 1.25\n", + " \"\"\"\n", + " text = []\n", + "\n", + " # 0x45 0_1000_101\n", + " text.append(f\"0x{fv.code:02x} {str_bits_with_underscores(fi, fv)}\")\n", + "\n", + " finite_nonzero = np.isfinite(fv.fval) and fv.fval != 0\n", + "\n", + " # = +0b1.101*2^-7 =\n", + " if finite_nonzero:\n", + "\n", + " def signstr(fv):\n", + " return \"-\" if fv.signbit else \"+\"\n", + "\n", + " b = \"0\" if fv.fclass == FloatClass.SUBNORMAL else \"1\"\n", + " binary_pow2 = f\"{signstr(fv)}0b{b}.{fv.significand:0{fi.tSignificandBits}b}*2^{fv.expval:<3}\"\n", + " text.append(binary_pow2)\n", + "\n", + " if show_b16_info and finite_nonzero:\n", + " b16_binary_str, b16_bscistr = b16_str(fv.fval)\n", + " text.append(f\"{b16_binary_str} {b16_bscistr}\")\n", + "\n", + " # 1.125\n", + " text.append(float_tilde_unless_roundtrip_str(fv.fval, width=vs_width, d=vs_d))\n", + "\n", + " # Return tuple\n", + " return \" = \".join(text)\n", + "\n", + "\n", + "for fi in (format_info_p3109(3), format_info_p3109(1)):\n", + " print(fi.name)\n", + " for i in (\n", + " 0x00,\n", + " 0x01,\n", + " 0x07,\n", + " 0x21,\n", + " 0x40,\n", + " 0x41,\n", + " 0x7E,\n", + " 0x7F,\n", + " 0x80,\n", + " 0x81,\n", + " 0xE6,\n", + " 0xFE,\n", + " 0xFF,\n", + " ):\n", + " print(\n", + " str_tablerow(\n", + " fi, decode_float(fi, i), show_b16_info=True, vs_width=8, vs_d=4\n", + " )\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make HTML table" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

FP8 Value Table, ocp_e2m1

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
0x00 0_00_0 = 0.0
\n", + "
\n", + "
0x08 1_00_0 = -0.0
\n", + "
\n", + "
0x01 0_00_1 = +0b0.1*2^0   = 0.5
\n", + "
\n", + "
0x09 1_00_1 = -0b0.1*2^0   = -0.5
\n", + "
\n", + "
0x02 0_01_0 = +0b1.0*2^0   = 1.0
\n", + "
\n", + "
0x0a 1_01_0 = -0b1.0*2^0   = -1.0
\n", + "
\n", + "
0x03 0_01_1 = +0b1.1*2^0   = 1.5
\n", + "
\n", + "
0x0b 1_01_1 = -0b1.1*2^0   = -1.5
\n", + "
\n", + "
0x04 0_10_0 = +0b1.0*2^1   = 2.0
\n", + "
\n", + "
0x0c 1_10_0 = -0b1.0*2^1   = -2.0
\n", + "
\n", + "
0x05 0_10_1 = +0b1.1*2^1   = 3.0
\n", + "
\n", + "
0x0d 1_10_1 = -0b1.1*2^1   = -3.0
\n", + "
\n", + "
0x06 0_11_0 = +0b1.0*2^2   = 4.0
\n", + "
\n", + "
0x0e 1_11_0 = -0b1.0*2^2   = -4.0
\n", + "
\n", + "
0x07 0_11_1 = +0b1.1*2^2   = 6.0
\n", + "
\n", + "
0x0f 1_11_1 = -0b1.1*2^2   = -6.0
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def mktbl(fi: FormatInfo, cols=4, skip_rows=None, **kw):\n", + " # Make tables\n", + " nvals = 2**fi.bits\n", + " rows = nvals // cols\n", + "\n", + " style = f\"\"\"\n", + " div.cell_output td {{\n", + " margin: 0pt;\n", + " text-align: left;\n", + " }}\n", + "\n", + " div.cell_output table {{\n", + " margin: 0pt;\n", + " text-align: left;\n", + " font-family: monospace;\n", + " font-size: xx-small;\n", + " font-weight: bold;\n", + " border-collapse: collapse;\n", + " }}\n", + "\n", + " \n", + " table {{\n", + " margin: 0pt;\n", + " font-family: monospace;\n", + " font-size: xx-small;\n", + " font-weight: bold;\n", + " border-collapse: collapse;\n", + " }}\n", + "\n", + " tr.blankrow {{\n", + " height: 4ex;\n", + " vertical-align: top;\n", + " }}\n", + " \n", + " td {{\n", + " text-align: left;\n", + " border: solid 2px #ccc;\n", + " width: {98/cols}%;\n", + " }}\n", + " \n", + " .special {{\n", + " color: #874723;\n", + " }}\n", + " \n", + " .subnormal {{\n", + " color: #0121a7;\n", + " }}\n", + " \n", + " .normal {{\n", + " }}\n", + " \n", + " @media (prefers-color-scheme: dark) {{\n", + " .special {{\n", + " color: orange;\n", + " }}\n", + "\n", + " .subnormal {{\n", + " color: cyan;\n", + " }}\n", + " \n", + " .normal {{\n", + " }}\n", + " }}\n", + "\n", + " pre {{\n", + " margin: 1pt 1pt 1pt 13pt;\n", + " display: inline;\n", + " }}\n", + "\"\"\"\n", + "\n", + " def table_style(fv):\n", + " \"\"\"\n", + " Select from the table entry styles defined in CSS above.\n", + " \"\"\"\n", + " if fv.fclass == FloatClass.SUBNORMAL:\n", + " return \"subnormal\"\n", + "\n", + " if fv.fclass == FloatClass.NORMAL:\n", + " return \"normal\"\n", + "\n", + " if fv.fclass == FloatClass.ZERO and not fv.signbit:\n", + " return \"normal\"\n", + "\n", + " # Everyting else is special\n", + " return \"special\"\n", + "\n", + " title = f\"FP8 Value Table, {fi.name}\"\n", + " a = airium.Airium()\n", + " a.style(_t=style)\n", + " a.h3(_t=title)\n", + "\n", + " with a.table():\n", + " for i in range(0, rows):\n", + " if skip_rows and (skip_rows[0] <= i < skip_rows[1]):\n", + " if i == skip_rows[0]:\n", + " a.tr(klass=\"blankrow\").td(\"...\")\n", + " continue\n", + " trklass = \"blankrow\" if i > 0 and i % 16 == 0 else \"\"\n", + " with a.tr(klass=trklass):\n", + " for n in range(i, nvals, rows):\n", + " fv = decode_float(fi, n)\n", + " text = str_tablerow(fi, fv, show_b16_info=False, **kw)\n", + " a.td(klass=table_style(fv)).pre(_t=text)\n", + "\n", + " return str(a)\n", + "\n", + "\n", + "HTML(mktbl(format_info_ocp_e2m1, cols=2, vs_width=8, vs_d=3))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### OCP E2M3\n", + "\n", + "This 6-bit format has 32 values, with no `NaN` or `Inf`, but does have `-0`.\n", + "The positive subnormals are the linear ramp of eighths: [n/8 for n in 1:7].\n", + "\n", + "One might describe the format in text as:\n", + "\n", + "> zero to one by eighths, two to four by quarters, four to eight by halves\n", + "\n", + "where \"to\" is open-ended, or \"to\" is not \"thru\"." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

FP8 Value Table, ocp_e2m3

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
0x00 0_00_000 = 0.0
\n", + "
\n", + "
0x20 1_00_000 = -0.0
\n", + "
\n", + "
0x01 0_00_001 = +0b0.001*2^0   = 0.125
\n", + "
\n", + "
0x21 1_00_001 = -0b0.001*2^0   = -0.125
\n", + "
\n", + "
0x02 0_00_010 = +0b0.010*2^0   = 0.25
\n", + "
\n", + "
0x22 1_00_010 = -0b0.010*2^0   = -0.25
\n", + "
\n", + "
0x03 0_00_011 = +0b0.011*2^0   = 0.375
\n", + "
\n", + "
0x23 1_00_011 = -0b0.011*2^0   = -0.375
\n", + "
\n", + "
0x04 0_00_100 = +0b0.100*2^0   = 0.5
\n", + "
\n", + "
0x24 1_00_100 = -0b0.100*2^0   = -0.5
\n", + "
\n", + "
0x05 0_00_101 = +0b0.101*2^0   = 0.625
\n", + "
\n", + "
0x25 1_00_101 = -0b0.101*2^0   = -0.625
\n", + "
\n", + "
0x06 0_00_110 = +0b0.110*2^0   = 0.75
\n", + "
\n", + "
0x26 1_00_110 = -0b0.110*2^0   = -0.75
\n", + "
\n", + "
0x07 0_00_111 = +0b0.111*2^0   = 0.875
\n", + "
\n", + "
0x27 1_00_111 = -0b0.111*2^0   = -0.875
\n", + "
\n", + "
0x08 0_01_000 = +0b1.000*2^0   = 1.0
\n", + "
\n", + "
0x28 1_01_000 = -0b1.000*2^0   = -1.0
\n", + "
\n", + "
0x09 0_01_001 = +0b1.001*2^0   = 1.125
\n", + "
\n", + "
0x29 1_01_001 = -0b1.001*2^0   = -1.125
\n", + "
\n", + "
0x0a 0_01_010 = +0b1.010*2^0   = 1.25
\n", + "
\n", + "
0x2a 1_01_010 = -0b1.010*2^0   = -1.25
\n", + "
\n", + "
0x0b 0_01_011 = +0b1.011*2^0   = 1.375
\n", + "
\n", + "
0x2b 1_01_011 = -0b1.011*2^0   = -1.375
\n", + "
\n", + "
0x0c 0_01_100 = +0b1.100*2^0   = 1.5
\n", + "
\n", + "
0x2c 1_01_100 = -0b1.100*2^0   = -1.5
\n", + "
\n", + "
0x0d 0_01_101 = +0b1.101*2^0   = 1.625
\n", + "
\n", + "
0x2d 1_01_101 = -0b1.101*2^0   = -1.625
\n", + "
\n", + "
0x0e 0_01_110 = +0b1.110*2^0   = 1.75
\n", + "
\n", + "
0x2e 1_01_110 = -0b1.110*2^0   = -1.75
\n", + "
\n", + "
0x0f 0_01_111 = +0b1.111*2^0   = 1.875
\n", + "
\n", + "
0x2f 1_01_111 = -0b1.111*2^0   = -1.875
\n", + "
\n", + "
0x10 0_10_000 = +0b1.000*2^1   = 2.0
\n", + "
\n", + "
0x30 1_10_000 = -0b1.000*2^1   = -2.0
\n", + "
\n", + "
0x11 0_10_001 = +0b1.001*2^1   = 2.25
\n", + "
\n", + "
0x31 1_10_001 = -0b1.001*2^1   = -2.25
\n", + "
\n", + "
0x12 0_10_010 = +0b1.010*2^1   = 2.5
\n", + "
\n", + "
0x32 1_10_010 = -0b1.010*2^1   = -2.5
\n", + "
\n", + "
0x13 0_10_011 = +0b1.011*2^1   = 2.75
\n", + "
\n", + "
0x33 1_10_011 = -0b1.011*2^1   = -2.75
\n", + "
\n", + "
0x14 0_10_100 = +0b1.100*2^1   = 3.0
\n", + "
\n", + "
0x34 1_10_100 = -0b1.100*2^1   = -3.0
\n", + "
\n", + "
0x15 0_10_101 = +0b1.101*2^1   = 3.25
\n", + "
\n", + "
0x35 1_10_101 = -0b1.101*2^1   = -3.25
\n", + "
\n", + "
0x16 0_10_110 = +0b1.110*2^1   = 3.5
\n", + "
\n", + "
0x36 1_10_110 = -0b1.110*2^1   = -3.5
\n", + "
\n", + "
0x17 0_10_111 = +0b1.111*2^1   = 3.75
\n", + "
\n", + "
0x37 1_10_111 = -0b1.111*2^1   = -3.75
\n", + "
\n", + "
0x18 0_11_000 = +0b1.000*2^2   = 4.0
\n", + "
\n", + "
0x38 1_11_000 = -0b1.000*2^2   = -4.0
\n", + "
\n", + "
0x19 0_11_001 = +0b1.001*2^2   = 4.5
\n", + "
\n", + "
0x39 1_11_001 = -0b1.001*2^2   = -4.5
\n", + "
\n", + "
0x1a 0_11_010 = +0b1.010*2^2   = 5.0
\n", + "
\n", + "
0x3a 1_11_010 = -0b1.010*2^2   = -5.0
\n", + "
\n", + "
0x1b 0_11_011 = +0b1.011*2^2   = 5.5
\n", + "
\n", + "
0x3b 1_11_011 = -0b1.011*2^2   = -5.5
\n", + "
\n", + "
0x1c 0_11_100 = +0b1.100*2^2   = 6.0
\n", + "
\n", + "
0x3c 1_11_100 = -0b1.100*2^2   = -6.0
\n", + "
\n", + "
0x1d 0_11_101 = +0b1.101*2^2   = 6.5
\n", + "
\n", + "
0x3d 1_11_101 = -0b1.101*2^2   = -6.5
\n", + "
\n", + "
0x1e 0_11_110 = +0b1.110*2^2   = 7.0
\n", + "
\n", + "
0x3e 1_11_110 = -0b1.110*2^2   = -7.0
\n", + "
\n", + "
0x1f 0_11_111 = +0b1.111*2^2   = 7.5
\n", + "
\n", + "
0x3f 1_11_111 = -0b1.111*2^2   = -7.5
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HTML(mktbl(format_info_ocp_e2m3, cols=2, vs_width=8, vs_d=3))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# And here's a 6-bit \"IEEE-754\" float:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

FP8 Value Table, 754-fp6

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
0x00 0_000_00 = 0.0
\n", + "
\n", + "
0x20 1_000_00 = -0.0
\n", + "
\n", + "
0x01 0_000_01 = +0b0.01*2^-2  = 0.0625
\n", + "
\n", + "
0x21 1_000_01 = -0b0.01*2^-2  = -0.0625
\n", + "
\n", + "
0x02 0_000_10 = +0b0.10*2^-2  = 0.125
\n", + "
\n", + "
0x22 1_000_10 = -0b0.10*2^-2  = -0.125
\n", + "
\n", + "
0x03 0_000_11 = +0b0.11*2^-2  = 0.1875
\n", + "
\n", + "
0x23 1_000_11 = -0b0.11*2^-2  = -0.1875
\n", + "
\n", + "
0x04 0_001_00 = +0b1.00*2^-2  = 0.25
\n", + "
\n", + "
0x24 1_001_00 = -0b1.00*2^-2  = -0.25
\n", + "
\n", + "
0x05 0_001_01 = +0b1.01*2^-2  = 0.3125
\n", + "
\n", + "
0x25 1_001_01 = -0b1.01*2^-2  = -0.3125
\n", + "
\n", + "
0x06 0_001_10 = +0b1.10*2^-2  = 0.375
\n", + "
\n", + "
0x26 1_001_10 = -0b1.10*2^-2  = -0.375
\n", + "
\n", + "
0x07 0_001_11 = +0b1.11*2^-2  = 0.4375
\n", + "
\n", + "
0x27 1_001_11 = -0b1.11*2^-2  = -0.4375
\n", + "
\n", + "
0x08 0_010_00 = +0b1.00*2^-1  = 0.5
\n", + "
\n", + "
0x28 1_010_00 = -0b1.00*2^-1  = -0.5
\n", + "
\n", + "
0x09 0_010_01 = +0b1.01*2^-1  = 0.625
\n", + "
\n", + "
0x29 1_010_01 = -0b1.01*2^-1  = -0.625
\n", + "
\n", + "
0x0a 0_010_10 = +0b1.10*2^-1  = 0.75
\n", + "
\n", + "
0x2a 1_010_10 = -0b1.10*2^-1  = -0.75
\n", + "
\n", + "
0x0b 0_010_11 = +0b1.11*2^-1  = 0.875
\n", + "
\n", + "
0x2b 1_010_11 = -0b1.11*2^-1  = -0.875
\n", + "
\n", + "
0x0c 0_011_00 = +0b1.00*2^0   = 1.0
\n", + "
\n", + "
0x2c 1_011_00 = -0b1.00*2^0   = -1.0
\n", + "
\n", + "
0x0d 0_011_01 = +0b1.01*2^0   = 1.25
\n", + "
\n", + "
0x2d 1_011_01 = -0b1.01*2^0   = -1.25
\n", + "
\n", + "
0x0e 0_011_10 = +0b1.10*2^0   = 1.5
\n", + "
\n", + "
0x2e 1_011_10 = -0b1.10*2^0   = -1.5
\n", + "
\n", + "
0x0f 0_011_11 = +0b1.11*2^0   = 1.75
\n", + "
\n", + "
0x2f 1_011_11 = -0b1.11*2^0   = -1.75
\n", + "
\n", + "
0x10 0_100_00 = +0b1.00*2^1   = 2.0
\n", + "
\n", + "
0x30 1_100_00 = -0b1.00*2^1   = -2.0
\n", + "
\n", + "
0x11 0_100_01 = +0b1.01*2^1   = 2.5
\n", + "
\n", + "
0x31 1_100_01 = -0b1.01*2^1   = -2.5
\n", + "
\n", + "
0x12 0_100_10 = +0b1.10*2^1   = 3.0
\n", + "
\n", + "
0x32 1_100_10 = -0b1.10*2^1   = -3.0
\n", + "
\n", + "
0x13 0_100_11 = +0b1.11*2^1   = 3.5
\n", + "
\n", + "
0x33 1_100_11 = -0b1.11*2^1   = -3.5
\n", + "
\n", + "
0x14 0_101_00 = +0b1.00*2^2   = 4.0
\n", + "
\n", + "
0x34 1_101_00 = -0b1.00*2^2   = -4.0
\n", + "
\n", + "
0x15 0_101_01 = +0b1.01*2^2   = 5.0
\n", + "
\n", + "
0x35 1_101_01 = -0b1.01*2^2   = -5.0
\n", + "
\n", + "
0x16 0_101_10 = +0b1.10*2^2   = 6.0
\n", + "
\n", + "
0x36 1_101_10 = -0b1.10*2^2   = -6.0
\n", + "
\n", + "
0x17 0_101_11 = +0b1.11*2^2   = 7.0
\n", + "
\n", + "
0x37 1_101_11 = -0b1.11*2^2   = -7.0
\n", + "
\n", + "
0x18 0_110_00 = +0b1.00*2^3   = 8.0
\n", + "
\n", + "
0x38 1_110_00 = -0b1.00*2^3   = -8.0
\n", + "
\n", + "
0x19 0_110_01 = +0b1.01*2^3   = 10.0
\n", + "
\n", + "
0x39 1_110_01 = -0b1.01*2^3   = -10.0
\n", + "
\n", + "
0x1a 0_110_10 = +0b1.10*2^3   = 12.0
\n", + "
\n", + "
0x3a 1_110_10 = -0b1.10*2^3   = -12.0
\n", + "
\n", + "
0x1b 0_110_11 = +0b1.11*2^3   = 14.0
\n", + "
\n", + "
0x3b 1_110_11 = -0b1.11*2^3   = -14.0
\n", + "
\n", + "
0x1c 0_111_00 = inf
\n", + "
\n", + "
0x3c 1_111_00 = -inf
\n", + "
\n", + "
0x1d 0_111_01 = nan
\n", + "
\n", + "
0x3d 1_111_01 = nan
\n", + "
\n", + "
0x1e 0_111_10 = nan
\n", + "
\n", + "
0x3e 1_111_10 = nan
\n", + "
\n", + "
0x1f 0_111_11 = nan
\n", + "
\n", + "
0x3f 1_111_11 = nan
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from gfloat import FormatInfo\n", + "\n", + "fi = FormatInfo(\"754-fp6\", 6, 3, 3, True, True, 3, True, True, False)\n", + "HTML(mktbl(fi, cols=2, vs_width=8, vs_d=3))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

FP8 Value Table, P3109-fp6

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
0x00 0_000_00 = 0.0
\n", + "
\n", + "
0x20 1_000_00 = nan
\n", + "
\n", + "
0x01 0_000_01 = +0b0.01*2^-3  = 0.03125
\n", + "
\n", + "
0x21 1_000_01 = -0b0.01*2^-3  = -0.03125
\n", + "
\n", + "
0x02 0_000_10 = +0b0.10*2^-3  = 0.0625
\n", + "
\n", + "
0x22 1_000_10 = -0b0.10*2^-3  = -0.0625
\n", + "
\n", + "
0x03 0_000_11 = +0b0.11*2^-3  = 0.09375
\n", + "
\n", + "
0x23 1_000_11 = -0b0.11*2^-3  = -0.09375
\n", + "
\n", + "
0x04 0_001_00 = +0b1.00*2^-3  = 0.125
\n", + "
\n", + "
0x24 1_001_00 = -0b1.00*2^-3  = -0.125
\n", + "
\n", + "
0x05 0_001_01 = +0b1.01*2^-3  = 0.15625
\n", + "
\n", + "
0x25 1_001_01 = -0b1.01*2^-3  = -0.15625
\n", + "
\n", + "
0x06 0_001_10 = +0b1.10*2^-3  = 0.1875
\n", + "
\n", + "
0x26 1_001_10 = -0b1.10*2^-3  = -0.1875
\n", + "
\n", + "
0x07 0_001_11 = +0b1.11*2^-3  = 0.21875
\n", + "
\n", + "
0x27 1_001_11 = -0b1.11*2^-3  = -0.21875
\n", + "
\n", + "
0x08 0_010_00 = +0b1.00*2^-2  = 0.25
\n", + "
\n", + "
0x28 1_010_00 = -0b1.00*2^-2  = -0.25
\n", + "
\n", + "
0x09 0_010_01 = +0b1.01*2^-2  = 0.3125
\n", + "
\n", + "
0x29 1_010_01 = -0b1.01*2^-2  = -0.3125
\n", + "
\n", + "
0x0a 0_010_10 = +0b1.10*2^-2  = 0.375
\n", + "
\n", + "
0x2a 1_010_10 = -0b1.10*2^-2  = -0.375
\n", + "
\n", + "
0x0b 0_010_11 = +0b1.11*2^-2  = 0.4375
\n", + "
\n", + "
0x2b 1_010_11 = -0b1.11*2^-2  = -0.4375
\n", + "
\n", + "
0x0c 0_011_00 = +0b1.00*2^-1  = 0.5
\n", + "
\n", + "
0x2c 1_011_00 = -0b1.00*2^-1  = -0.5
\n", + "
\n", + "
0x0d 0_011_01 = +0b1.01*2^-1  = 0.625
\n", + "
\n", + "
0x2d 1_011_01 = -0b1.01*2^-1  = -0.625
\n", + "
\n", + "
0x0e 0_011_10 = +0b1.10*2^-1  = 0.75
\n", + "
\n", + "
0x2e 1_011_10 = -0b1.10*2^-1  = -0.75
\n", + "
\n", + "
0x0f 0_011_11 = +0b1.11*2^-1  = 0.875
\n", + "
\n", + "
0x2f 1_011_11 = -0b1.11*2^-1  = -0.875
\n", + "
\n", + "
0x10 0_100_00 = +0b1.00*2^0   = 1.0
\n", + "
\n", + "
0x30 1_100_00 = -0b1.00*2^0   = -1.0
\n", + "
\n", + "
0x11 0_100_01 = +0b1.01*2^0   = 1.25
\n", + "
\n", + "
0x31 1_100_01 = -0b1.01*2^0   = -1.25
\n", + "
\n", + "
0x12 0_100_10 = +0b1.10*2^0   = 1.5
\n", + "
\n", + "
0x32 1_100_10 = -0b1.10*2^0   = -1.5
\n", + "
\n", + "
0x13 0_100_11 = +0b1.11*2^0   = 1.75
\n", + "
\n", + "
0x33 1_100_11 = -0b1.11*2^0   = -1.75
\n", + "
\n", + "
0x14 0_101_00 = +0b1.00*2^1   = 2.0
\n", + "
\n", + "
0x34 1_101_00 = -0b1.00*2^1   = -2.0
\n", + "
\n", + "
0x15 0_101_01 = +0b1.01*2^1   = 2.5
\n", + "
\n", + "
0x35 1_101_01 = -0b1.01*2^1   = -2.5
\n", + "
\n", + "
0x16 0_101_10 = +0b1.10*2^1   = 3.0
\n", + "
\n", + "
0x36 1_101_10 = -0b1.10*2^1   = -3.0
\n", + "
\n", + "
0x17 0_101_11 = +0b1.11*2^1   = 3.5
\n", + "
\n", + "
0x37 1_101_11 = -0b1.11*2^1   = -3.5
\n", + "
\n", + "
0x18 0_110_00 = +0b1.00*2^2   = 4.0
\n", + "
\n", + "
0x38 1_110_00 = -0b1.00*2^2   = -4.0
\n", + "
\n", + "
0x19 0_110_01 = +0b1.01*2^2   = 5.0
\n", + "
\n", + "
0x39 1_110_01 = -0b1.01*2^2   = -5.0
\n", + "
\n", + "
0x1a 0_110_10 = +0b1.10*2^2   = 6.0
\n", + "
\n", + "
0x3a 1_110_10 = -0b1.10*2^2   = -6.0
\n", + "
\n", + "
0x1b 0_110_11 = +0b1.11*2^2   = 7.0
\n", + "
\n", + "
0x3b 1_110_11 = -0b1.11*2^2   = -7.0
\n", + "
\n", + "
0x1c 0_111_00 = +0b1.00*2^3   = 8.0
\n", + "
\n", + "
0x3c 1_111_00 = -0b1.00*2^3   = -8.0
\n", + "
\n", + "
0x1d 0_111_01 = +0b1.01*2^3   = 10.0
\n", + "
\n", + "
0x3d 1_111_01 = -0b1.01*2^3   = -10.0
\n", + "
\n", + "
0x1e 0_111_10 = +0b1.10*2^3   = 12.0
\n", + "
\n", + "
0x3e 1_111_10 = -0b1.10*2^3   = -12.0
\n", + "
\n", + "
0x1f 0_111_11 = inf
\n", + "
\n", + "
0x3f 1_111_11 = -inf
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# P3109\n", + "fi = FormatInfo(\"P3109-fp6\", 6, 3, 3, False, True, 0, True, True, False)\n", + "HTML(mktbl(fi, cols=2, vs_width=8, vs_d=3))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

FP8 Value Table, P3109-fp6

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
0x00 0_000_00 = 0.0
\n", + "
\n", + "
0x20 1_000_00 = nan
\n", + "
\n", + "
0x01 0_000_01 = +0b0.01*2^-3  = 0.03125
\n", + "
\n", + "
0x21 1_000_01 = -0b0.01*2^-3  = -0.03125
\n", + "
\n", + "
0x02 0_000_10 = +0b0.10*2^-3  = 0.0625
\n", + "
\n", + "
0x22 1_000_10 = -0b0.10*2^-3  = -0.0625
\n", + "
\n", + "
0x03 0_000_11 = +0b0.11*2^-3  = 0.09375
\n", + "
\n", + "
0x23 1_000_11 = -0b0.11*2^-3  = -0.09375
\n", + "
\n", + "
0x04 0_001_00 = +0b1.00*2^-3  = 0.125
\n", + "
\n", + "
0x24 1_001_00 = -0b1.00*2^-3  = -0.125
\n", + "
\n", + "
0x05 0_001_01 = +0b1.01*2^-3  = 0.15625
\n", + "
\n", + "
0x25 1_001_01 = -0b1.01*2^-3  = -0.15625
\n", + "
\n", + "
0x06 0_001_10 = +0b1.10*2^-3  = 0.1875
\n", + "
\n", + "
0x26 1_001_10 = -0b1.10*2^-3  = -0.1875
\n", + "
\n", + "
0x07 0_001_11 = +0b1.11*2^-3  = 0.21875
\n", + "
\n", + "
0x27 1_001_11 = -0b1.11*2^-3  = -0.21875
\n", + "
\n", + "
0x08 0_010_00 = +0b1.00*2^-2  = 0.25
\n", + "
\n", + "
0x28 1_010_00 = -0b1.00*2^-2  = -0.25
\n", + "
\n", + "
0x09 0_010_01 = +0b1.01*2^-2  = 0.3125
\n", + "
\n", + "
0x29 1_010_01 = -0b1.01*2^-2  = -0.3125
\n", + "
\n", + "
0x0a 0_010_10 = +0b1.10*2^-2  = 0.375
\n", + "
\n", + "
0x2a 1_010_10 = -0b1.10*2^-2  = -0.375
\n", + "
\n", + "
0x0b 0_010_11 = +0b1.11*2^-2  = 0.4375
\n", + "
\n", + "
0x2b 1_010_11 = -0b1.11*2^-2  = -0.4375
\n", + "
\n", + "
0x0c 0_011_00 = +0b1.00*2^-1  = 0.5
\n", + "
\n", + "
0x2c 1_011_00 = -0b1.00*2^-1  = -0.5
\n", + "
\n", + "
0x0d 0_011_01 = +0b1.01*2^-1  = 0.625
\n", + "
\n", + "
0x2d 1_011_01 = -0b1.01*2^-1  = -0.625
\n", + "
\n", + "
0x0e 0_011_10 = +0b1.10*2^-1  = 0.75
\n", + "
\n", + "
0x2e 1_011_10 = -0b1.10*2^-1  = -0.75
\n", + "
\n", + "
0x0f 0_011_11 = +0b1.11*2^-1  = 0.875
\n", + "
\n", + "
0x2f 1_011_11 = -0b1.11*2^-1  = -0.875
\n", + "
\n", + "
0x10 0_100_00 = +0b1.00*2^0   = 1.0
\n", + "
\n", + "
0x30 1_100_00 = -0b1.00*2^0   = -1.0
\n", + "
\n", + "
0x11 0_100_01 = +0b1.01*2^0   = 1.25
\n", + "
\n", + "
0x31 1_100_01 = -0b1.01*2^0   = -1.25
\n", + "
\n", + "
0x12 0_100_10 = +0b1.10*2^0   = 1.5
\n", + "
\n", + "
0x32 1_100_10 = -0b1.10*2^0   = -1.5
\n", + "
\n", + "
0x13 0_100_11 = +0b1.11*2^0   = 1.75
\n", + "
\n", + "
0x33 1_100_11 = -0b1.11*2^0   = -1.75
\n", + "
\n", + "
0x14 0_101_00 = +0b1.00*2^1   = 2.0
\n", + "
\n", + "
0x34 1_101_00 = -0b1.00*2^1   = -2.0
\n", + "
\n", + "
0x15 0_101_01 = +0b1.01*2^1   = 2.5
\n", + "
\n", + "
0x35 1_101_01 = -0b1.01*2^1   = -2.5
\n", + "
\n", + "
0x16 0_101_10 = +0b1.10*2^1   = 3.0
\n", + "
\n", + "
0x36 1_101_10 = -0b1.10*2^1   = -3.0
\n", + "
\n", + "
0x17 0_101_11 = +0b1.11*2^1   = 3.5
\n", + "
\n", + "
0x37 1_101_11 = -0b1.11*2^1   = -3.5
\n", + "
\n", + "
0x18 0_110_00 = +0b1.00*2^2   = 4.0
\n", + "
\n", + "
0x38 1_110_00 = -0b1.00*2^2   = -4.0
\n", + "
\n", + "
0x19 0_110_01 = +0b1.01*2^2   = 5.0
\n", + "
\n", + "
0x39 1_110_01 = -0b1.01*2^2   = -5.0
\n", + "
\n", + "
0x1a 0_110_10 = +0b1.10*2^2   = 6.0
\n", + "
\n", + "
0x3a 1_110_10 = -0b1.10*2^2   = -6.0
\n", + "
\n", + "
0x1b 0_110_11 = +0b1.11*2^2   = 7.0
\n", + "
\n", + "
0x3b 1_110_11 = -0b1.11*2^2   = -7.0
\n", + "
\n", + "
0x1c 0_111_00 = +0b1.00*2^3   = 8.0
\n", + "
\n", + "
0x3c 1_111_00 = -0b1.00*2^3   = -8.0
\n", + "
\n", + "
0x1d 0_111_01 = +0b1.01*2^3   = 10.0
\n", + "
\n", + "
0x3d 1_111_01 = -0b1.01*2^3   = -10.0
\n", + "
\n", + "
0x1e 0_111_10 = +0b1.10*2^3   = 12.0
\n", + "
\n", + "
0x3e 1_111_10 = -0b1.10*2^3   = -12.0
\n", + "
\n", + "
0x1f 0_111_11 = +0b1.11*2^3   = 14.0
\n", + "
\n", + "
0x3f 1_111_11 = -0b1.11*2^3   = -14.0
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# P3109\n", + "fi = FormatInfo(\"P3109-fp6\", 6, 3, 3, False, False, 0, True, True, False)\n", + "HTML(mktbl(fi, cols=2, vs_width=8, vs_d=3))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### OCP Formats: E5M2, E4M3" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

FP8 Value Table, ocp_e5m2

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
0x00 0_00000_00 = 0.0
\n", + "
\n", + "
0x40 0_10000_00 = +0b1.00*2^1   = 2.0
\n", + "
\n", + "
0x80 1_00000_00 = -0.0
\n", + "
\n", + "
0xc0 1_10000_00 = -0b1.00*2^1   = -2.0
\n", + "
\n", + "
0x01 0_00000_01 = +0b0.01*2^-14 = ~1.5259e-05
\n", + "
\n", + "
0x41 0_10000_01 = +0b1.01*2^1   = 2.5
\n", + "
\n", + "
0x81 1_00000_01 = -0b0.01*2^-14 = ~-1.5259e-05
\n", + "
\n", + "
0xc1 1_10000_01 = -0b1.01*2^1   = -2.5
\n", + "
\n", + "
0x02 0_00000_10 = +0b0.10*2^-14 = ~3.0518e-05
\n", + "
\n", + "
0x42 0_10000_10 = +0b1.10*2^1   = 3.0
\n", + "
\n", + "
0x82 1_00000_10 = -0b0.10*2^-14 = ~-3.0518e-05
\n", + "
\n", + "
0xc2 1_10000_10 = -0b1.10*2^1   = -3.0
\n", + "
\n", + "
0x03 0_00000_11 = +0b0.11*2^-14 = ~4.5776e-05
\n", + "
\n", + "
0x43 0_10000_11 = +0b1.11*2^1   = 3.5
\n", + "
\n", + "
0x83 1_00000_11 = -0b0.11*2^-14 = ~-4.5776e-05
\n", + "
\n", + "
0xc3 1_10000_11 = -0b1.11*2^1   = -3.5
\n", + "
\n", + "
0x04 0_00001_00 = +0b1.00*2^-14 = ~6.1035e-05
\n", + "
\n", + "
0x44 0_10001_00 = +0b1.00*2^2   = 4.0
\n", + "
\n", + "
0x84 1_00001_00 = -0b1.00*2^-14 = ~-6.1035e-05
\n", + "
\n", + "
0xc4 1_10001_00 = -0b1.00*2^2   = -4.0
\n", + "
\n", + "
0x05 0_00001_01 = +0b1.01*2^-14 = ~7.6294e-05
\n", + "
\n", + "
0x45 0_10001_01 = +0b1.01*2^2   = 5.0
\n", + "
\n", + "
0x85 1_00001_01 = -0b1.01*2^-14 = ~-7.6294e-05
\n", + "
\n", + "
0xc5 1_10001_01 = -0b1.01*2^2   = -5.0
\n", + "
\n", + "
0x06 0_00001_10 = +0b1.10*2^-14 = ~9.1553e-05
\n", + "
\n", + "
0x46 0_10001_10 = +0b1.10*2^2   = 6.0
\n", + "
\n", + "
0x86 1_00001_10 = -0b1.10*2^-14 = ~-9.1553e-05
\n", + "
\n", + "
0xc6 1_10001_10 = -0b1.10*2^2   = -6.0
\n", + "
\n", + "
0x07 0_00001_11 = +0b1.11*2^-14 = ~0.00011
\n", + "
\n", + "
0x47 0_10001_11 = +0b1.11*2^2   = 7.0
\n", + "
\n", + "
0x87 1_00001_11 = -0b1.11*2^-14 = ~-0.00011
\n", + "
\n", + "
0xc7 1_10001_11 = -0b1.11*2^2   = -7.0
\n", + "
\n", + "
0x08 0_00010_00 = +0b1.00*2^-13 = ~0.00012
\n", + "
\n", + "
0x48 0_10010_00 = +0b1.00*2^3   = 8.0
\n", + "
\n", + "
0x88 1_00010_00 = -0b1.00*2^-13 = ~-0.00012
\n", + "
\n", + "
0xc8 1_10010_00 = -0b1.00*2^3   = -8.0
\n", + "
\n", + "
0x09 0_00010_01 = +0b1.01*2^-13 = ~0.00015
\n", + "
\n", + "
0x49 0_10010_01 = +0b1.01*2^3   = 10.0
\n", + "
\n", + "
0x89 1_00010_01 = -0b1.01*2^-13 = ~-0.00015
\n", + "
\n", + "
0xc9 1_10010_01 = -0b1.01*2^3   = -10.0
\n", + "
\n", + "
0x0a 0_00010_10 = +0b1.10*2^-13 = ~0.00018
\n", + "
\n", + "
0x4a 0_10010_10 = +0b1.10*2^3   = 12.0
\n", + "
\n", + "
0x8a 1_00010_10 = -0b1.10*2^-13 = ~-0.00018
\n", + "
\n", + "
0xca 1_10010_10 = -0b1.10*2^3   = -12.0
\n", + "
\n", + "
0x0b 0_00010_11 = +0b1.11*2^-13 = ~0.00021
\n", + "
\n", + "
0x4b 0_10010_11 = +0b1.11*2^3   = 14.0
\n", + "
\n", + "
0x8b 1_00010_11 = -0b1.11*2^-13 = ~-0.00021
\n", + "
\n", + "
0xcb 1_10010_11 = -0b1.11*2^3   = -14.0
\n", + "
\n", + "
0x0c 0_00011_00 = +0b1.00*2^-12 = ~0.00024
\n", + "
\n", + "
0x4c 0_10011_00 = +0b1.00*2^4   = 16.0
\n", + "
\n", + "
0x8c 1_00011_00 = -0b1.00*2^-12 = ~-0.00024
\n", + "
\n", + "
0xcc 1_10011_00 = -0b1.00*2^4   = -16.0
\n", + "
\n", + "
0x0d 0_00011_01 = +0b1.01*2^-12 = ~0.00031
\n", + "
\n", + "
0x4d 0_10011_01 = +0b1.01*2^4   = 20.0
\n", + "
\n", + "
0x8d 1_00011_01 = -0b1.01*2^-12 = ~-0.00031
\n", + "
\n", + "
0xcd 1_10011_01 = -0b1.01*2^4   = -20.0
\n", + "
\n", + "
0x0e 0_00011_10 = +0b1.10*2^-12 = ~0.00037
\n", + "
\n", + "
0x4e 0_10011_10 = +0b1.10*2^4   = 24.0
\n", + "
\n", + "
0x8e 1_00011_10 = -0b1.10*2^-12 = ~-0.00037
\n", + "
\n", + "
0xce 1_10011_10 = -0b1.10*2^4   = -24.0
\n", + "
\n", + "
0x0f 0_00011_11 = +0b1.11*2^-12 = ~0.00043
\n", + "
\n", + "
0x4f 0_10011_11 = +0b1.11*2^4   = 28.0
\n", + "
\n", + "
0x8f 1_00011_11 = -0b1.11*2^-12 = ~-0.00043
\n", + "
\n", + "
0xcf 1_10011_11 = -0b1.11*2^4   = -28.0
\n", + "
\n", + "
0x30 0_01100_00 = +0b1.00*2^-3  = 0.125
\n", + "
\n", + "
0x70 0_11100_00 = +0b1.00*2^13  = 8192.0
\n", + "
\n", + "
0xb0 1_01100_00 = -0b1.00*2^-3  = -0.125
\n", + "
\n", + "
0xf0 1_11100_00 = -0b1.00*2^13  = -8192.0
\n", + "
\n", + "
0x31 0_01100_01 = +0b1.01*2^-3  = 0.15625
\n", + "
\n", + "
0x71 0_11100_01 = +0b1.01*2^13  = 10240.0
\n", + "
\n", + "
0xb1 1_01100_01 = -0b1.01*2^-3  = -0.15625
\n", + "
\n", + "
0xf1 1_11100_01 = -0b1.01*2^13  = -10240.0
\n", + "
\n", + "
0x32 0_01100_10 = +0b1.10*2^-3  = 0.1875
\n", + "
\n", + "
0x72 0_11100_10 = +0b1.10*2^13  = 12288.0
\n", + "
\n", + "
0xb2 1_01100_10 = -0b1.10*2^-3  = -0.1875
\n", + "
\n", + "
0xf2 1_11100_10 = -0b1.10*2^13  = -12288.0
\n", + "
\n", + "
0x33 0_01100_11 = +0b1.11*2^-3  = 0.21875
\n", + "
\n", + "
0x73 0_11100_11 = +0b1.11*2^13  = 14336.0
\n", + "
\n", + "
0xb3 1_01100_11 = -0b1.11*2^-3  = -0.21875
\n", + "
\n", + "
0xf3 1_11100_11 = -0b1.11*2^13  = -14336.0
\n", + "
\n", + "
0x34 0_01101_00 = +0b1.00*2^-2  = 0.25
\n", + "
\n", + "
0x74 0_11101_00 = +0b1.00*2^14  = 16384.0
\n", + "
\n", + "
0xb4 1_01101_00 = -0b1.00*2^-2  = -0.25
\n", + "
\n", + "
0xf4 1_11101_00 = -0b1.00*2^14  = -16384.0
\n", + "
\n", + "
0x35 0_01101_01 = +0b1.01*2^-2  = 0.3125
\n", + "
\n", + "
0x75 0_11101_01 = +0b1.01*2^14  = 20480.0
\n", + "
\n", + "
0xb5 1_01101_01 = -0b1.01*2^-2  = -0.3125
\n", + "
\n", + "
0xf5 1_11101_01 = -0b1.01*2^14  = -20480.0
\n", + "
\n", + "
0x36 0_01101_10 = +0b1.10*2^-2  = 0.375
\n", + "
\n", + "
0x76 0_11101_10 = +0b1.10*2^14  = 24576.0
\n", + "
\n", + "
0xb6 1_01101_10 = -0b1.10*2^-2  = -0.375
\n", + "
\n", + "
0xf6 1_11101_10 = -0b1.10*2^14  = -24576.0
\n", + "
\n", + "
0x37 0_01101_11 = +0b1.11*2^-2  = 0.4375
\n", + "
\n", + "
0x77 0_11101_11 = +0b1.11*2^14  = 28672.0
\n", + "
\n", + "
0xb7 1_01101_11 = -0b1.11*2^-2  = -0.4375
\n", + "
\n", + "
0xf7 1_11101_11 = -0b1.11*2^14  = -28672.0
\n", + "
\n", + "
0x38 0_01110_00 = +0b1.00*2^-1  = 0.5
\n", + "
\n", + "
0x78 0_11110_00 = +0b1.00*2^15  = 32768.0
\n", + "
\n", + "
0xb8 1_01110_00 = -0b1.00*2^-1  = -0.5
\n", + "
\n", + "
0xf8 1_11110_00 = -0b1.00*2^15  = -32768.0
\n", + "
\n", + "
0x39 0_01110_01 = +0b1.01*2^-1  = 0.625
\n", + "
\n", + "
0x79 0_11110_01 = +0b1.01*2^15  = 40960.0
\n", + "
\n", + "
0xb9 1_01110_01 = -0b1.01*2^-1  = -0.625
\n", + "
\n", + "
0xf9 1_11110_01 = -0b1.01*2^15  = -40960.0
\n", + "
\n", + "
0x3a 0_01110_10 = +0b1.10*2^-1  = 0.75
\n", + "
\n", + "
0x7a 0_11110_10 = +0b1.10*2^15  = 49152.0
\n", + "
\n", + "
0xba 1_01110_10 = -0b1.10*2^-1  = -0.75
\n", + "
\n", + "
0xfa 1_11110_10 = -0b1.10*2^15  = -49152.0
\n", + "
\n", + "
0x3b 0_01110_11 = +0b1.11*2^-1  = 0.875
\n", + "
\n", + "
0x7b 0_11110_11 = +0b1.11*2^15  = 57344.0
\n", + "
\n", + "
0xbb 1_01110_11 = -0b1.11*2^-1  = -0.875
\n", + "
\n", + "
0xfb 1_11110_11 = -0b1.11*2^15  = -57344.0
\n", + "
\n", + "
0x3c 0_01111_00 = +0b1.00*2^0   = 1.0
\n", + "
\n", + "
0x7c 0_11111_00 = inf
\n", + "
\n", + "
0xbc 1_01111_00 = -0b1.00*2^0   = -1.0
\n", + "
\n", + "
0xfc 1_11111_00 = -inf
\n", + "
\n", + "
0x3d 0_01111_01 = +0b1.01*2^0   = 1.25
\n", + "
\n", + "
0x7d 0_11111_01 = nan
\n", + "
\n", + "
0xbd 1_01111_01 = -0b1.01*2^0   = -1.25
\n", + "
\n", + "
0xfd 1_11111_01 = nan
\n", + "
\n", + "
0x3e 0_01111_10 = +0b1.10*2^0   = 1.5
\n", + "
\n", + "
0x7e 0_11111_10 = nan
\n", + "
\n", + "
0xbe 1_01111_10 = -0b1.10*2^0   = -1.5
\n", + "
\n", + "
0xfe 1_11111_10 = nan
\n", + "
\n", + "
0x3f 0_01111_11 = +0b1.11*2^0   = 1.75
\n", + "
\n", + "
0x7f 0_11111_11 = nan
\n", + "
\n", + "
0xbf 1_01111_11 = -0b1.11*2^0   = -1.75
\n", + "
\n", + "
0xff 1_11111_11 = nan
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HTML(mktbl(format_info_ocp_e5m2, cols=4, skip_rows=(0x10, 0x30), vs_width=8, vs_d=5))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

FP8 Value Table, ocp_e4m3

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
0x00 0_0000_000 = 0.0
\n", + "
\n", + "
0x40 0_1000_000 = +0b1.000*2^1   = 2.0
\n", + "
\n", + "
0x80 1_0000_000 = -0.0
\n", + "
\n", + "
0xc0 1_1000_000 = -0b1.000*2^1   = -2.0
\n", + "
\n", + "
0x01 0_0000_001 = +0b0.001*2^-6  = ~0.00195
\n", + "
\n", + "
0x41 0_1000_001 = +0b1.001*2^1   = 2.25
\n", + "
\n", + "
0x81 1_0000_001 = -0b0.001*2^-6  = ~-0.00195
\n", + "
\n", + "
0xc1 1_1000_001 = -0b1.001*2^1   = -2.25
\n", + "
\n", + "
0x02 0_0000_010 = +0b0.010*2^-6  = ~0.00391
\n", + "
\n", + "
0x42 0_1000_010 = +0b1.010*2^1   = 2.5
\n", + "
\n", + "
0x82 1_0000_010 = -0b0.010*2^-6  = ~-0.00391
\n", + "
\n", + "
0xc2 1_1000_010 = -0b1.010*2^1   = -2.5
\n", + "
\n", + "
0x03 0_0000_011 = +0b0.011*2^-6  = ~0.00586
\n", + "
\n", + "
0x43 0_1000_011 = +0b1.011*2^1   = 2.75
\n", + "
\n", + "
0x83 1_0000_011 = -0b0.011*2^-6  = ~-0.00586
\n", + "
\n", + "
0xc3 1_1000_011 = -0b1.011*2^1   = -2.75
\n", + "
\n", + "
0x04 0_0000_100 = +0b0.100*2^-6  = ~0.00781
\n", + "
\n", + "
0x44 0_1000_100 = +0b1.100*2^1   = 3.0
\n", + "
\n", + "
0x84 1_0000_100 = -0b0.100*2^-6  = ~-0.00781
\n", + "
\n", + "
0xc4 1_1000_100 = -0b1.100*2^1   = -3.0
\n", + "
\n", + "
0x05 0_0000_101 = +0b0.101*2^-6  = ~0.00977
\n", + "
\n", + "
0x45 0_1000_101 = +0b1.101*2^1   = 3.25
\n", + "
\n", + "
0x85 1_0000_101 = -0b0.101*2^-6  = ~-0.00977
\n", + "
\n", + "
0xc5 1_1000_101 = -0b1.101*2^1   = -3.25
\n", + "
\n", + "
0x06 0_0000_110 = +0b0.110*2^-6  = ~0.01172
\n", + "
\n", + "
0x46 0_1000_110 = +0b1.110*2^1   = 3.5
\n", + "
\n", + "
0x86 1_0000_110 = -0b0.110*2^-6  = ~-0.01172
\n", + "
\n", + "
0xc6 1_1000_110 = -0b1.110*2^1   = -3.5
\n", + "
\n", + "
0x07 0_0000_111 = +0b0.111*2^-6  = ~0.01367
\n", + "
\n", + "
0x47 0_1000_111 = +0b1.111*2^1   = 3.75
\n", + "
\n", + "
0x87 1_0000_111 = -0b0.111*2^-6  = ~-0.01367
\n", + "
\n", + "
0xc7 1_1000_111 = -0b1.111*2^1   = -3.75
\n", + "
\n", + "
0x08 0_0001_000 = +0b1.000*2^-6  = 0.015625
\n", + "
\n", + "
0x48 0_1001_000 = +0b1.000*2^2   = 4.0
\n", + "
\n", + "
0x88 1_0001_000 = -0b1.000*2^-6  = ~-0.01562
\n", + "
\n", + "
0xc8 1_1001_000 = -0b1.000*2^2   = -4.0
\n", + "
\n", + "
0x09 0_0001_001 = +0b1.001*2^-6  = ~0.01758
\n", + "
\n", + "
0x49 0_1001_001 = +0b1.001*2^2   = 4.5
\n", + "
\n", + "
0x89 1_0001_001 = -0b1.001*2^-6  = ~-0.01758
\n", + "
\n", + "
0xc9 1_1001_001 = -0b1.001*2^2   = -4.5
\n", + "
\n", + "
0x0a 0_0001_010 = +0b1.010*2^-6  = ~0.01953
\n", + "
\n", + "
0x4a 0_1001_010 = +0b1.010*2^2   = 5.0
\n", + "
\n", + "
0x8a 1_0001_010 = -0b1.010*2^-6  = ~-0.01953
\n", + "
\n", + "
0xca 1_1001_010 = -0b1.010*2^2   = -5.0
\n", + "
\n", + "
0x0b 0_0001_011 = +0b1.011*2^-6  = ~0.02148
\n", + "
\n", + "
0x4b 0_1001_011 = +0b1.011*2^2   = 5.5
\n", + "
\n", + "
0x8b 1_0001_011 = -0b1.011*2^-6  = ~-0.02148
\n", + "
\n", + "
0xcb 1_1001_011 = -0b1.011*2^2   = -5.5
\n", + "
\n", + "
0x0c 0_0001_100 = +0b1.100*2^-6  = ~0.02344
\n", + "
\n", + "
0x4c 0_1001_100 = +0b1.100*2^2   = 6.0
\n", + "
\n", + "
0x8c 1_0001_100 = -0b1.100*2^-6  = ~-0.02344
\n", + "
\n", + "
0xcc 1_1001_100 = -0b1.100*2^2   = -6.0
\n", + "
\n", + "
0x0d 0_0001_101 = +0b1.101*2^-6  = ~0.02539
\n", + "
\n", + "
0x4d 0_1001_101 = +0b1.101*2^2   = 6.5
\n", + "
\n", + "
0x8d 1_0001_101 = -0b1.101*2^-6  = ~-0.02539
\n", + "
\n", + "
0xcd 1_1001_101 = -0b1.101*2^2   = -6.5
\n", + "
\n", + "
0x0e 0_0001_110 = +0b1.110*2^-6  = ~0.02734
\n", + "
\n", + "
0x4e 0_1001_110 = +0b1.110*2^2   = 7.0
\n", + "
\n", + "
0x8e 1_0001_110 = -0b1.110*2^-6  = ~-0.02734
\n", + "
\n", + "
0xce 1_1001_110 = -0b1.110*2^2   = -7.0
\n", + "
\n", + "
0x0f 0_0001_111 = +0b1.111*2^-6  = ~0.02930
\n", + "
\n", + "
0x4f 0_1001_111 = +0b1.111*2^2   = 7.5
\n", + "
\n", + "
0x8f 1_0001_111 = -0b1.111*2^-6  = ~-0.02930
\n", + "
\n", + "
0xcf 1_1001_111 = -0b1.111*2^2   = -7.5
\n", + "
\n", + "
0x30 0_0110_000 = +0b1.000*2^-1  = 0.5
\n", + "
\n", + "
0x70 0_1110_000 = +0b1.000*2^7   = 128.0
\n", + "
\n", + "
0xb0 1_0110_000 = -0b1.000*2^-1  = -0.5
\n", + "
\n", + "
0xf0 1_1110_000 = -0b1.000*2^7   = -128.0
\n", + "
\n", + "
0x31 0_0110_001 = +0b1.001*2^-1  = 0.5625
\n", + "
\n", + "
0x71 0_1110_001 = +0b1.001*2^7   = 144.0
\n", + "
\n", + "
0xb1 1_0110_001 = -0b1.001*2^-1  = -0.5625
\n", + "
\n", + "
0xf1 1_1110_001 = -0b1.001*2^7   = -144.0
\n", + "
\n", + "
0x32 0_0110_010 = +0b1.010*2^-1  = 0.625
\n", + "
\n", + "
0x72 0_1110_010 = +0b1.010*2^7   = 160.0
\n", + "
\n", + "
0xb2 1_0110_010 = -0b1.010*2^-1  = -0.625
\n", + "
\n", + "
0xf2 1_1110_010 = -0b1.010*2^7   = -160.0
\n", + "
\n", + "
0x33 0_0110_011 = +0b1.011*2^-1  = 0.6875
\n", + "
\n", + "
0x73 0_1110_011 = +0b1.011*2^7   = 176.0
\n", + "
\n", + "
0xb3 1_0110_011 = -0b1.011*2^-1  = -0.6875
\n", + "
\n", + "
0xf3 1_1110_011 = -0b1.011*2^7   = -176.0
\n", + "
\n", + "
0x34 0_0110_100 = +0b1.100*2^-1  = 0.75
\n", + "
\n", + "
0x74 0_1110_100 = +0b1.100*2^7   = 192.0
\n", + "
\n", + "
0xb4 1_0110_100 = -0b1.100*2^-1  = -0.75
\n", + "
\n", + "
0xf4 1_1110_100 = -0b1.100*2^7   = -192.0
\n", + "
\n", + "
0x35 0_0110_101 = +0b1.101*2^-1  = 0.8125
\n", + "
\n", + "
0x75 0_1110_101 = +0b1.101*2^7   = 208.0
\n", + "
\n", + "
0xb5 1_0110_101 = -0b1.101*2^-1  = -0.8125
\n", + "
\n", + "
0xf5 1_1110_101 = -0b1.101*2^7   = -208.0
\n", + "
\n", + "
0x36 0_0110_110 = +0b1.110*2^-1  = 0.875
\n", + "
\n", + "
0x76 0_1110_110 = +0b1.110*2^7   = 224.0
\n", + "
\n", + "
0xb6 1_0110_110 = -0b1.110*2^-1  = -0.875
\n", + "
\n", + "
0xf6 1_1110_110 = -0b1.110*2^7   = -224.0
\n", + "
\n", + "
0x37 0_0110_111 = +0b1.111*2^-1  = 0.9375
\n", + "
\n", + "
0x77 0_1110_111 = +0b1.111*2^7   = 240.0
\n", + "
\n", + "
0xb7 1_0110_111 = -0b1.111*2^-1  = -0.9375
\n", + "
\n", + "
0xf7 1_1110_111 = -0b1.111*2^7   = -240.0
\n", + "
\n", + "
0x38 0_0111_000 = +0b1.000*2^0   = 1.0
\n", + "
\n", + "
0x78 0_1111_000 = +0b1.000*2^8   = 256.0
\n", + "
\n", + "
0xb8 1_0111_000 = -0b1.000*2^0   = -1.0
\n", + "
\n", + "
0xf8 1_1111_000 = -0b1.000*2^8   = -256.0
\n", + "
\n", + "
0x39 0_0111_001 = +0b1.001*2^0   = 1.125
\n", + "
\n", + "
0x79 0_1111_001 = +0b1.001*2^8   = 288.0
\n", + "
\n", + "
0xb9 1_0111_001 = -0b1.001*2^0   = -1.125
\n", + "
\n", + "
0xf9 1_1111_001 = -0b1.001*2^8   = -288.0
\n", + "
\n", + "
0x3a 0_0111_010 = +0b1.010*2^0   = 1.25
\n", + "
\n", + "
0x7a 0_1111_010 = +0b1.010*2^8   = 320.0
\n", + "
\n", + "
0xba 1_0111_010 = -0b1.010*2^0   = -1.25
\n", + "
\n", + "
0xfa 1_1111_010 = -0b1.010*2^8   = -320.0
\n", + "
\n", + "
0x3b 0_0111_011 = +0b1.011*2^0   = 1.375
\n", + "
\n", + "
0x7b 0_1111_011 = +0b1.011*2^8   = 352.0
\n", + "
\n", + "
0xbb 1_0111_011 = -0b1.011*2^0   = -1.375
\n", + "
\n", + "
0xfb 1_1111_011 = -0b1.011*2^8   = -352.0
\n", + "
\n", + "
0x3c 0_0111_100 = +0b1.100*2^0   = 1.5
\n", + "
\n", + "
0x7c 0_1111_100 = +0b1.100*2^8   = 384.0
\n", + "
\n", + "
0xbc 1_0111_100 = -0b1.100*2^0   = -1.5
\n", + "
\n", + "
0xfc 1_1111_100 = -0b1.100*2^8   = -384.0
\n", + "
\n", + "
0x3d 0_0111_101 = +0b1.101*2^0   = 1.625
\n", + "
\n", + "
0x7d 0_1111_101 = +0b1.101*2^8   = 416.0
\n", + "
\n", + "
0xbd 1_0111_101 = -0b1.101*2^0   = -1.625
\n", + "
\n", + "
0xfd 1_1111_101 = -0b1.101*2^8   = -416.0
\n", + "
\n", + "
0x3e 0_0111_110 = +0b1.110*2^0   = 1.75
\n", + "
\n", + "
0x7e 0_1111_110 = +0b1.110*2^8   = 448.0
\n", + "
\n", + "
0xbe 1_0111_110 = -0b1.110*2^0   = -1.75
\n", + "
\n", + "
0xfe 1_1111_110 = -0b1.110*2^8   = -448.0
\n", + "
\n", + "
0x3f 0_0111_111 = +0b1.111*2^0   = 1.875
\n", + "
\n", + "
0x7f 0_1111_111 = nan
\n", + "
\n", + "
0xbf 1_0111_111 = -0b1.111*2^0   = -1.875
\n", + "
\n", + "
0xff 1_1111_111 = nan
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HTML(mktbl(format_info_ocp_e4m3, cols=4, skip_rows=(0x10, 0x30), vs_width=8, vs_d=5))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### IEEE WG P3109 {P} formats\n", + "\n", + "We choose just one example: `p3109(p=3)`, which has the same number of exponent bits as OCP E5 " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

FP8 Value Table, p3109_p3

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
0x00 0_00000_00 = 0.0
\n", + "
\n", + "
0x40 0_10000_00 = +0b1.00*2^0   = 1.0
\n", + "
\n", + "
0x80 1_00000_00 = nan
\n", + "
\n", + "
0xc0 1_10000_00 = -0b1.00*2^0   = -1.0
\n", + "
\n", + "
0x01 0_00000_01 = +0b0.01*2^-15 = ~7.6294e-06
\n", + "
\n", + "
0x41 0_10000_01 = +0b1.01*2^0   = 1.25
\n", + "
\n", + "
0x81 1_00000_01 = -0b0.01*2^-15 = ~-7.6294e-06
\n", + "
\n", + "
0xc1 1_10000_01 = -0b1.01*2^0   = -1.25
\n", + "
\n", + "
0x02 0_00000_10 = +0b0.10*2^-15 = ~1.5259e-05
\n", + "
\n", + "
0x42 0_10000_10 = +0b1.10*2^0   = 1.5
\n", + "
\n", + "
0x82 1_00000_10 = -0b0.10*2^-15 = ~-1.5259e-05
\n", + "
\n", + "
0xc2 1_10000_10 = -0b1.10*2^0   = -1.5
\n", + "
\n", + "
0x03 0_00000_11 = +0b0.11*2^-15 = ~2.2888e-05
\n", + "
\n", + "
0x43 0_10000_11 = +0b1.11*2^0   = 1.75
\n", + "
\n", + "
0x83 1_00000_11 = -0b0.11*2^-15 = ~-2.2888e-05
\n", + "
\n", + "
0xc3 1_10000_11 = -0b1.11*2^0   = -1.75
\n", + "
\n", + "
0x04 0_00001_00 = +0b1.00*2^-15 = ~3.0518e-05
\n", + "
\n", + "
0x44 0_10001_00 = +0b1.00*2^1   = 2.0
\n", + "
\n", + "
0x84 1_00001_00 = -0b1.00*2^-15 = ~-3.0518e-05
\n", + "
\n", + "
0xc4 1_10001_00 = -0b1.00*2^1   = -2.0
\n", + "
\n", + "
0x05 0_00001_01 = +0b1.01*2^-15 = ~3.8147e-05
\n", + "
\n", + "
0x45 0_10001_01 = +0b1.01*2^1   = 2.5
\n", + "
\n", + "
0x85 1_00001_01 = -0b1.01*2^-15 = ~-3.8147e-05
\n", + "
\n", + "
0xc5 1_10001_01 = -0b1.01*2^1   = -2.5
\n", + "
\n", + "
0x06 0_00001_10 = +0b1.10*2^-15 = ~4.5776e-05
\n", + "
\n", + "
0x46 0_10001_10 = +0b1.10*2^1   = 3.0
\n", + "
\n", + "
0x86 1_00001_10 = -0b1.10*2^-15 = ~-4.5776e-05
\n", + "
\n", + "
0xc6 1_10001_10 = -0b1.10*2^1   = -3.0
\n", + "
\n", + "
0x07 0_00001_11 = +0b1.11*2^-15 = ~5.3406e-05
\n", + "
\n", + "
0x47 0_10001_11 = +0b1.11*2^1   = 3.5
\n", + "
\n", + "
0x87 1_00001_11 = -0b1.11*2^-15 = ~-5.3406e-05
\n", + "
\n", + "
0xc7 1_10001_11 = -0b1.11*2^1   = -3.5
\n", + "
\n", + "
0x08 0_00010_00 = +0b1.00*2^-14 = ~6.1035e-05
\n", + "
\n", + "
0x48 0_10010_00 = +0b1.00*2^2   = 4.0
\n", + "
\n", + "
0x88 1_00010_00 = -0b1.00*2^-14 = ~-6.1035e-05
\n", + "
\n", + "
0xc8 1_10010_00 = -0b1.00*2^2   = -4.0
\n", + "
\n", + "
0x09 0_00010_01 = +0b1.01*2^-14 = ~7.6294e-05
\n", + "
\n", + "
0x49 0_10010_01 = +0b1.01*2^2   = 5.0
\n", + "
\n", + "
0x89 1_00010_01 = -0b1.01*2^-14 = ~-7.6294e-05
\n", + "
\n", + "
0xc9 1_10010_01 = -0b1.01*2^2   = -5.0
\n", + "
\n", + "
0x0a 0_00010_10 = +0b1.10*2^-14 = ~9.1553e-05
\n", + "
\n", + "
0x4a 0_10010_10 = +0b1.10*2^2   = 6.0
\n", + "
\n", + "
0x8a 1_00010_10 = -0b1.10*2^-14 = ~-9.1553e-05
\n", + "
\n", + "
0xca 1_10010_10 = -0b1.10*2^2   = -6.0
\n", + "
\n", + "
0x0b 0_00010_11 = +0b1.11*2^-14 = ~0.00011
\n", + "
\n", + "
0x4b 0_10010_11 = +0b1.11*2^2   = 7.0
\n", + "
\n", + "
0x8b 1_00010_11 = -0b1.11*2^-14 = ~-0.00011
\n", + "
\n", + "
0xcb 1_10010_11 = -0b1.11*2^2   = -7.0
\n", + "
\n", + "
0x0c 0_00011_00 = +0b1.00*2^-13 = ~0.00012
\n", + "
\n", + "
0x4c 0_10011_00 = +0b1.00*2^3   = 8.0
\n", + "
\n", + "
0x8c 1_00011_00 = -0b1.00*2^-13 = ~-0.00012
\n", + "
\n", + "
0xcc 1_10011_00 = -0b1.00*2^3   = -8.0
\n", + "
\n", + "
0x0d 0_00011_01 = +0b1.01*2^-13 = ~0.00015
\n", + "
\n", + "
0x4d 0_10011_01 = +0b1.01*2^3   = 10.0
\n", + "
\n", + "
0x8d 1_00011_01 = -0b1.01*2^-13 = ~-0.00015
\n", + "
\n", + "
0xcd 1_10011_01 = -0b1.01*2^3   = -10.0
\n", + "
\n", + "
0x0e 0_00011_10 = +0b1.10*2^-13 = ~0.00018
\n", + "
\n", + "
0x4e 0_10011_10 = +0b1.10*2^3   = 12.0
\n", + "
\n", + "
0x8e 1_00011_10 = -0b1.10*2^-13 = ~-0.00018
\n", + "
\n", + "
0xce 1_10011_10 = -0b1.10*2^3   = -12.0
\n", + "
\n", + "
0x0f 0_00011_11 = +0b1.11*2^-13 = ~0.00021
\n", + "
\n", + "
0x4f 0_10011_11 = +0b1.11*2^3   = 14.0
\n", + "
\n", + "
0x8f 1_00011_11 = -0b1.11*2^-13 = ~-0.00021
\n", + "
\n", + "
0xcf 1_10011_11 = -0b1.11*2^3   = -14.0
\n", + "
\n", + "
0x30 0_01100_00 = +0b1.00*2^-4  = 0.0625
\n", + "
\n", + "
0x70 0_11100_00 = +0b1.00*2^12  = 4096.0
\n", + "
\n", + "
0xb0 1_01100_00 = -0b1.00*2^-4  = -0.0625
\n", + "
\n", + "
0xf0 1_11100_00 = -0b1.00*2^12  = -4096.0
\n", + "
\n", + "
0x31 0_01100_01 = +0b1.01*2^-4  = 0.078125
\n", + "
\n", + "
0x71 0_11100_01 = +0b1.01*2^12  = 5120.0
\n", + "
\n", + "
0xb1 1_01100_01 = -0b1.01*2^-4  = ~-0.07812
\n", + "
\n", + "
0xf1 1_11100_01 = -0b1.01*2^12  = -5120.0
\n", + "
\n", + "
0x32 0_01100_10 = +0b1.10*2^-4  = 0.09375
\n", + "
\n", + "
0x72 0_11100_10 = +0b1.10*2^12  = 6144.0
\n", + "
\n", + "
0xb2 1_01100_10 = -0b1.10*2^-4  = -0.09375
\n", + "
\n", + "
0xf2 1_11100_10 = -0b1.10*2^12  = -6144.0
\n", + "
\n", + "
0x33 0_01100_11 = +0b1.11*2^-4  = 0.109375
\n", + "
\n", + "
0x73 0_11100_11 = +0b1.11*2^12  = 7168.0
\n", + "
\n", + "
0xb3 1_01100_11 = -0b1.11*2^-4  = ~-0.10938
\n", + "
\n", + "
0xf3 1_11100_11 = -0b1.11*2^12  = -7168.0
\n", + "
\n", + "
0x34 0_01101_00 = +0b1.00*2^-3  = 0.125
\n", + "
\n", + "
0x74 0_11101_00 = +0b1.00*2^13  = 8192.0
\n", + "
\n", + "
0xb4 1_01101_00 = -0b1.00*2^-3  = -0.125
\n", + "
\n", + "
0xf4 1_11101_00 = -0b1.00*2^13  = -8192.0
\n", + "
\n", + "
0x35 0_01101_01 = +0b1.01*2^-3  = 0.15625
\n", + "
\n", + "
0x75 0_11101_01 = +0b1.01*2^13  = 10240.0
\n", + "
\n", + "
0xb5 1_01101_01 = -0b1.01*2^-3  = -0.15625
\n", + "
\n", + "
0xf5 1_11101_01 = -0b1.01*2^13  = -10240.0
\n", + "
\n", + "
0x36 0_01101_10 = +0b1.10*2^-3  = 0.1875
\n", + "
\n", + "
0x76 0_11101_10 = +0b1.10*2^13  = 12288.0
\n", + "
\n", + "
0xb6 1_01101_10 = -0b1.10*2^-3  = -0.1875
\n", + "
\n", + "
0xf6 1_11101_10 = -0b1.10*2^13  = -12288.0
\n", + "
\n", + "
0x37 0_01101_11 = +0b1.11*2^-3  = 0.21875
\n", + "
\n", + "
0x77 0_11101_11 = +0b1.11*2^13  = 14336.0
\n", + "
\n", + "
0xb7 1_01101_11 = -0b1.11*2^-3  = -0.21875
\n", + "
\n", + "
0xf7 1_11101_11 = -0b1.11*2^13  = -14336.0
\n", + "
\n", + "
0x38 0_01110_00 = +0b1.00*2^-2  = 0.25
\n", + "
\n", + "
0x78 0_11110_00 = +0b1.00*2^14  = 16384.0
\n", + "
\n", + "
0xb8 1_01110_00 = -0b1.00*2^-2  = -0.25
\n", + "
\n", + "
0xf8 1_11110_00 = -0b1.00*2^14  = -16384.0
\n", + "
\n", + "
0x39 0_01110_01 = +0b1.01*2^-2  = 0.3125
\n", + "
\n", + "
0x79 0_11110_01 = +0b1.01*2^14  = 20480.0
\n", + "
\n", + "
0xb9 1_01110_01 = -0b1.01*2^-2  = -0.3125
\n", + "
\n", + "
0xf9 1_11110_01 = -0b1.01*2^14  = -20480.0
\n", + "
\n", + "
0x3a 0_01110_10 = +0b1.10*2^-2  = 0.375
\n", + "
\n", + "
0x7a 0_11110_10 = +0b1.10*2^14  = 24576.0
\n", + "
\n", + "
0xba 1_01110_10 = -0b1.10*2^-2  = -0.375
\n", + "
\n", + "
0xfa 1_11110_10 = -0b1.10*2^14  = -24576.0
\n", + "
\n", + "
0x3b 0_01110_11 = +0b1.11*2^-2  = 0.4375
\n", + "
\n", + "
0x7b 0_11110_11 = +0b1.11*2^14  = 28672.0
\n", + "
\n", + "
0xbb 1_01110_11 = -0b1.11*2^-2  = -0.4375
\n", + "
\n", + "
0xfb 1_11110_11 = -0b1.11*2^14  = -28672.0
\n", + "
\n", + "
0x3c 0_01111_00 = +0b1.00*2^-1  = 0.5
\n", + "
\n", + "
0x7c 0_11111_00 = +0b1.00*2^15  = 32768.0
\n", + "
\n", + "
0xbc 1_01111_00 = -0b1.00*2^-1  = -0.5
\n", + "
\n", + "
0xfc 1_11111_00 = -0b1.00*2^15  = -32768.0
\n", + "
\n", + "
0x3d 0_01111_01 = +0b1.01*2^-1  = 0.625
\n", + "
\n", + "
0x7d 0_11111_01 = +0b1.01*2^15  = 40960.0
\n", + "
\n", + "
0xbd 1_01111_01 = -0b1.01*2^-1  = -0.625
\n", + "
\n", + "
0xfd 1_11111_01 = -0b1.01*2^15  = -40960.0
\n", + "
\n", + "
0x3e 0_01111_10 = +0b1.10*2^-1  = 0.75
\n", + "
\n", + "
0x7e 0_11111_10 = +0b1.10*2^15  = 49152.0
\n", + "
\n", + "
0xbe 1_01111_10 = -0b1.10*2^-1  = -0.75
\n", + "
\n", + "
0xfe 1_11111_10 = -0b1.10*2^15  = -49152.0
\n", + "
\n", + "
0x3f 0_01111_11 = +0b1.11*2^-1  = 0.875
\n", + "
\n", + "
0x7f 0_11111_11 = inf
\n", + "
\n", + "
0xbf 1_01111_11 = -0b1.11*2^-1  = -0.875
\n", + "
\n", + "
0xff 1_11111_11 = -inf
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HTML(mktbl(format_info_p3109(3), cols=4, skip_rows=(0x10, 0x30), vs_width=8, vs_d=5))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

FP8 Value Table, p3109_p4

\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
0x00 0_0000_000 = 0.0
\n", + "
\n", + "
0x40 0_1000_000 = +0b1.000*2^0   = 1.0
\n", + "
\n", + "
0x80 1_0000_000 = nan
\n", + "
\n", + "
0xc0 1_1000_000 = -0b1.000*2^0   = -1.0
\n", + "
\n", + "
0x01 0_0000_001 = +0b0.001*2^-7  = ~0.00098
\n", + "
\n", + "
0x41 0_1000_001 = +0b1.001*2^0   = 1.125
\n", + "
\n", + "
0x81 1_0000_001 = -0b0.001*2^-7  = ~-0.00098
\n", + "
\n", + "
0xc1 1_1000_001 = -0b1.001*2^0   = -1.125
\n", + "
\n", + "
0x02 0_0000_010 = +0b0.010*2^-7  = ~0.00195
\n", + "
\n", + "
0x42 0_1000_010 = +0b1.010*2^0   = 1.25
\n", + "
\n", + "
0x82 1_0000_010 = -0b0.010*2^-7  = ~-0.00195
\n", + "
\n", + "
0xc2 1_1000_010 = -0b1.010*2^0   = -1.25
\n", + "
\n", + "
0x03 0_0000_011 = +0b0.011*2^-7  = ~0.00293
\n", + "
\n", + "
0x43 0_1000_011 = +0b1.011*2^0   = 1.375
\n", + "
\n", + "
0x83 1_0000_011 = -0b0.011*2^-7  = ~-0.00293
\n", + "
\n", + "
0xc3 1_1000_011 = -0b1.011*2^0   = -1.375
\n", + "
\n", + "
0x04 0_0000_100 = +0b0.100*2^-7  = ~0.00391
\n", + "
\n", + "
0x44 0_1000_100 = +0b1.100*2^0   = 1.5
\n", + "
\n", + "
0x84 1_0000_100 = -0b0.100*2^-7  = ~-0.00391
\n", + "
\n", + "
0xc4 1_1000_100 = -0b1.100*2^0   = -1.5
\n", + "
\n", + "
0x05 0_0000_101 = +0b0.101*2^-7  = ~0.00488
\n", + "
\n", + "
0x45 0_1000_101 = +0b1.101*2^0   = 1.625
\n", + "
\n", + "
0x85 1_0000_101 = -0b0.101*2^-7  = ~-0.00488
\n", + "
\n", + "
0xc5 1_1000_101 = -0b1.101*2^0   = -1.625
\n", + "
\n", + "
0x06 0_0000_110 = +0b0.110*2^-7  = ~0.00586
\n", + "
\n", + "
0x46 0_1000_110 = +0b1.110*2^0   = 1.75
\n", + "
\n", + "
0x86 1_0000_110 = -0b0.110*2^-7  = ~-0.00586
\n", + "
\n", + "
0xc6 1_1000_110 = -0b1.110*2^0   = -1.75
\n", + "
\n", + "
0x07 0_0000_111 = +0b0.111*2^-7  = ~0.00684
\n", + "
\n", + "
0x47 0_1000_111 = +0b1.111*2^0   = 1.875
\n", + "
\n", + "
0x87 1_0000_111 = -0b0.111*2^-7  = ~-0.00684
\n", + "
\n", + "
0xc7 1_1000_111 = -0b1.111*2^0   = -1.875
\n", + "
\n", + "
0x08 0_0001_000 = +0b1.000*2^-7  = ~0.00781
\n", + "
\n", + "
0x48 0_1001_000 = +0b1.000*2^1   = 2.0
\n", + "
\n", + "
0x88 1_0001_000 = -0b1.000*2^-7  = ~-0.00781
\n", + "
\n", + "
0xc8 1_1001_000 = -0b1.000*2^1   = -2.0
\n", + "
\n", + "
0x09 0_0001_001 = +0b1.001*2^-7  = ~0.00879
\n", + "
\n", + "
0x49 0_1001_001 = +0b1.001*2^1   = 2.25
\n", + "
\n", + "
0x89 1_0001_001 = -0b1.001*2^-7  = ~-0.00879
\n", + "
\n", + "
0xc9 1_1001_001 = -0b1.001*2^1   = -2.25
\n", + "
\n", + "
0x0a 0_0001_010 = +0b1.010*2^-7  = ~0.00977
\n", + "
\n", + "
0x4a 0_1001_010 = +0b1.010*2^1   = 2.5
\n", + "
\n", + "
0x8a 1_0001_010 = -0b1.010*2^-7  = ~-0.00977
\n", + "
\n", + "
0xca 1_1001_010 = -0b1.010*2^1   = -2.5
\n", + "
\n", + "
0x0b 0_0001_011 = +0b1.011*2^-7  = ~0.01074
\n", + "
\n", + "
0x4b 0_1001_011 = +0b1.011*2^1   = 2.75
\n", + "
\n", + "
0x8b 1_0001_011 = -0b1.011*2^-7  = ~-0.01074
\n", + "
\n", + "
0xcb 1_1001_011 = -0b1.011*2^1   = -2.75
\n", + "
\n", + "
0x0c 0_0001_100 = +0b1.100*2^-7  = ~0.01172
\n", + "
\n", + "
0x4c 0_1001_100 = +0b1.100*2^1   = 3.0
\n", + "
\n", + "
0x8c 1_0001_100 = -0b1.100*2^-7  = ~-0.01172
\n", + "
\n", + "
0xcc 1_1001_100 = -0b1.100*2^1   = -3.0
\n", + "
\n", + "
0x0d 0_0001_101 = +0b1.101*2^-7  = ~0.01270
\n", + "
\n", + "
0x4d 0_1001_101 = +0b1.101*2^1   = 3.25
\n", + "
\n", + "
0x8d 1_0001_101 = -0b1.101*2^-7  = ~-0.01270
\n", + "
\n", + "
0xcd 1_1001_101 = -0b1.101*2^1   = -3.25
\n", + "
\n", + "
0x0e 0_0001_110 = +0b1.110*2^-7  = ~0.01367
\n", + "
\n", + "
0x4e 0_1001_110 = +0b1.110*2^1   = 3.5
\n", + "
\n", + "
0x8e 1_0001_110 = -0b1.110*2^-7  = ~-0.01367
\n", + "
\n", + "
0xce 1_1001_110 = -0b1.110*2^1   = -3.5
\n", + "
\n", + "
0x0f 0_0001_111 = +0b1.111*2^-7  = ~0.01465
\n", + "
\n", + "
0x4f 0_1001_111 = +0b1.111*2^1   = 3.75
\n", + "
\n", + "
0x8f 1_0001_111 = -0b1.111*2^-7  = ~-0.01465
\n", + "
\n", + "
0xcf 1_1001_111 = -0b1.111*2^1   = -3.75
\n", + "
\n", + "
0x30 0_0110_000 = +0b1.000*2^-2  = 0.25
\n", + "
\n", + "
0x70 0_1110_000 = +0b1.000*2^6   = 64.0
\n", + "
\n", + "
0xb0 1_0110_000 = -0b1.000*2^-2  = -0.25
\n", + "
\n", + "
0xf0 1_1110_000 = -0b1.000*2^6   = -64.0
\n", + "
\n", + "
0x31 0_0110_001 = +0b1.001*2^-2  = 0.28125
\n", + "
\n", + "
0x71 0_1110_001 = +0b1.001*2^6   = 72.0
\n", + "
\n", + "
0xb1 1_0110_001 = -0b1.001*2^-2  = -0.28125
\n", + "
\n", + "
0xf1 1_1110_001 = -0b1.001*2^6   = -72.0
\n", + "
\n", + "
0x32 0_0110_010 = +0b1.010*2^-2  = 0.3125
\n", + "
\n", + "
0x72 0_1110_010 = +0b1.010*2^6   = 80.0
\n", + "
\n", + "
0xb2 1_0110_010 = -0b1.010*2^-2  = -0.3125
\n", + "
\n", + "
0xf2 1_1110_010 = -0b1.010*2^6   = -80.0
\n", + "
\n", + "
0x33 0_0110_011 = +0b1.011*2^-2  = 0.34375
\n", + "
\n", + "
0x73 0_1110_011 = +0b1.011*2^6   = 88.0
\n", + "
\n", + "
0xb3 1_0110_011 = -0b1.011*2^-2  = -0.34375
\n", + "
\n", + "
0xf3 1_1110_011 = -0b1.011*2^6   = -88.0
\n", + "
\n", + "
0x34 0_0110_100 = +0b1.100*2^-2  = 0.375
\n", + "
\n", + "
0x74 0_1110_100 = +0b1.100*2^6   = 96.0
\n", + "
\n", + "
0xb4 1_0110_100 = -0b1.100*2^-2  = -0.375
\n", + "
\n", + "
0xf4 1_1110_100 = -0b1.100*2^6   = -96.0
\n", + "
\n", + "
0x35 0_0110_101 = +0b1.101*2^-2  = 0.40625
\n", + "
\n", + "
0x75 0_1110_101 = +0b1.101*2^6   = 104.0
\n", + "
\n", + "
0xb5 1_0110_101 = -0b1.101*2^-2  = -0.40625
\n", + "
\n", + "
0xf5 1_1110_101 = -0b1.101*2^6   = -104.0
\n", + "
\n", + "
0x36 0_0110_110 = +0b1.110*2^-2  = 0.4375
\n", + "
\n", + "
0x76 0_1110_110 = +0b1.110*2^6   = 112.0
\n", + "
\n", + "
0xb6 1_0110_110 = -0b1.110*2^-2  = -0.4375
\n", + "
\n", + "
0xf6 1_1110_110 = -0b1.110*2^6   = -112.0
\n", + "
\n", + "
0x37 0_0110_111 = +0b1.111*2^-2  = 0.46875
\n", + "
\n", + "
0x77 0_1110_111 = +0b1.111*2^6   = 120.0
\n", + "
\n", + "
0xb7 1_0110_111 = -0b1.111*2^-2  = -0.46875
\n", + "
\n", + "
0xf7 1_1110_111 = -0b1.111*2^6   = -120.0
\n", + "
\n", + "
0x38 0_0111_000 = +0b1.000*2^-1  = 0.5
\n", + "
\n", + "
0x78 0_1111_000 = +0b1.000*2^7   = 128.0
\n", + "
\n", + "
0xb8 1_0111_000 = -0b1.000*2^-1  = -0.5
\n", + "
\n", + "
0xf8 1_1111_000 = -0b1.000*2^7   = -128.0
\n", + "
\n", + "
0x39 0_0111_001 = +0b1.001*2^-1  = 0.5625
\n", + "
\n", + "
0x79 0_1111_001 = +0b1.001*2^7   = 144.0
\n", + "
\n", + "
0xb9 1_0111_001 = -0b1.001*2^-1  = -0.5625
\n", + "
\n", + "
0xf9 1_1111_001 = -0b1.001*2^7   = -144.0
\n", + "
\n", + "
0x3a 0_0111_010 = +0b1.010*2^-1  = 0.625
\n", + "
\n", + "
0x7a 0_1111_010 = +0b1.010*2^7   = 160.0
\n", + "
\n", + "
0xba 1_0111_010 = -0b1.010*2^-1  = -0.625
\n", + "
\n", + "
0xfa 1_1111_010 = -0b1.010*2^7   = -160.0
\n", + "
\n", + "
0x3b 0_0111_011 = +0b1.011*2^-1  = 0.6875
\n", + "
\n", + "
0x7b 0_1111_011 = +0b1.011*2^7   = 176.0
\n", + "
\n", + "
0xbb 1_0111_011 = -0b1.011*2^-1  = -0.6875
\n", + "
\n", + "
0xfb 1_1111_011 = -0b1.011*2^7   = -176.0
\n", + "
\n", + "
0x3c 0_0111_100 = +0b1.100*2^-1  = 0.75
\n", + "
\n", + "
0x7c 0_1111_100 = +0b1.100*2^7   = 192.0
\n", + "
\n", + "
0xbc 1_0111_100 = -0b1.100*2^-1  = -0.75
\n", + "
\n", + "
0xfc 1_1111_100 = -0b1.100*2^7   = -192.0
\n", + "
\n", + "
0x3d 0_0111_101 = +0b1.101*2^-1  = 0.8125
\n", + "
\n", + "
0x7d 0_1111_101 = +0b1.101*2^7   = 208.0
\n", + "
\n", + "
0xbd 1_0111_101 = -0b1.101*2^-1  = -0.8125
\n", + "
\n", + "
0xfd 1_1111_101 = -0b1.101*2^7   = -208.0
\n", + "
\n", + "
0x3e 0_0111_110 = +0b1.110*2^-1  = 0.875
\n", + "
\n", + "
0x7e 0_1111_110 = +0b1.110*2^7   = 224.0
\n", + "
\n", + "
0xbe 1_0111_110 = -0b1.110*2^-1  = -0.875
\n", + "
\n", + "
0xfe 1_1111_110 = -0b1.110*2^7   = -224.0
\n", + "
\n", + "
0x3f 0_0111_111 = +0b1.111*2^-1  = 0.9375
\n", + "
\n", + "
0x7f 0_1111_111 = inf
\n", + "
\n", + "
0xbf 1_0111_111 = -0b1.111*2^-1  = -0.9375
\n", + "
\n", + "
0xff 1_1111_111 = -inf
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HTML(mktbl(format_info_p3109(4), cols=4, skip_rows=(0x10, 0x30), vs_width=8, vs_d=5))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 0000000..5a9b682 --- /dev/null +++ b/docs/source/api.rst @@ -0,0 +1,43 @@ +.. Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +API +=== + +.. module:: gfloat + +Scalar Functions +---------------- + +.. autofunction:: decode_float +.. autofunction:: round_float +.. autofunction:: encode_float + +Block format functions +---------------------- + +.. autofunction:: decode_block +.. autofunction:: encode_block +.. autofunction:: quantize_block + +.. autofunction:: compute_scale_amax + + +Classes +------- + +.. autoclass:: FormatInfo() + :members: +.. autoclass:: FloatClass() + :members: +.. autoclass:: RoundMode() + :members: +.. autoclass:: FloatValue() + :members: +.. autoclass:: BlockFormatInfo() + :members: + +Pretty printers +--------------- + +.. autofunction:: float_pow2str +.. autofunction:: float_tilde_unless_roundtrip_str diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..4d732bc --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,54 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +# Configuration file for the Sphinx documentation builder. + +# -- Project information + +project = "GFloat" +copyright = "2024, Graphcore Ltd" +author = "Andrew Fitzgibbon" +release = "0.3" # Set version in package.sh +version = "0.3" # Set version in package.sh + +# -- General configuration + +extensions = [ + "sphinx.ext.duration", + "sphinx.ext.doctest", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx_paramlinks", + "myst_nb", +] + +autodoc_typehints = "none" # We have them in the parameter descriptors +autodoc_typehints_format = "short" +python_use_unqualified_type_names = True + +autodoc_type_aliases = { + "Iterable": "Iterable", + "npt.ArrayLike": "ArrayLike", + "npt.NDArray": "NDArray", +} + +autodoc_default_options = { + "member-order": "bysource", +} + +intersphinx_mapping = { + "python": ("https://docs.python.org/3/", None), + "sphinx": ("https://www.sphinx-doc.org/en/master/", None), +} +intersphinx_disabled_domains = ["std"] + +templates_path = ["_templates"] + +# -- Options for HTML output + +html_theme = "sphinx_rtd_theme" + +# -- Options for EPUB output +epub_show_urls = "footnote" diff --git a/docs/source/formats.rst b/docs/source/formats.rst new file mode 100644 index 0000000..d6820e2 --- /dev/null +++ b/docs/source/formats.rst @@ -0,0 +1,44 @@ +.. Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +Defined Formats +=============== + +.. module:: gfloat.formats + +IEEE 754 Formats +---------------- + +.. autodata:: format_info_binary16 +.. autodata:: format_info_binary32 +.. autodata:: format_info_binary64 + +BFloat16 +---------------- + +.. autodata:: format_info_bfloat16 + +Open Compute Platform (OCP) Formats +----------------------------------- + +.. autodata:: format_info_ocp_e5m2 +.. autodata:: format_info_ocp_e4m3 +.. autodata:: format_info_ocp_e3m2 +.. autodata:: format_info_ocp_e2m3 +.. autodata:: format_info_ocp_e2m1 +.. autodata:: format_info_ocp_e8m0 +.. autodata:: format_info_ocp_int8 + +IEEE WG P3109 Formats +--------------------- + +.. autofunction:: format_info_p3109 + +Block Formats +--------------------- + +.. autodata:: format_info_mxfp8_e5m2 +.. autodata:: format_info_mxfp8_e4m3 +.. autodata:: format_info_mxfp6_e3m2 +.. autodata:: format_info_mxfp6_e2m3 +.. autodata:: format_info_mxfp4_e2m1 +.. autodata:: format_info_mxint8 diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..a74659d --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,55 @@ +.. Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +.. note:: + + Check the version number of this documentation against the `gfloat` version + you are using. "Latest" refers to the head on https://github.com/graphcore-research/gfloat, + while pypi versions installed using `pip install` will have corresponding `vX.Y.Z` tags. + +GFloat: Generic floating point formats in Python +================================================ + +GFloat is designed to allow experimentation with a variety of floating-point +formats in Python. Formats are parameterized by the primary IEEE-754 parameters +of: + + * Width in bits (k) + * Precision (p) + * Maximum exponent (emax) + +with additional fields defining the encoding of infinities, Not-a-number (NaN) values, +and negative zero, among others (see :class:`gfloat.FormatInfo`.) + +This allows an implementation of generic floating point encode/decode logic, +handling various current and proposed floating point types: + + - `IEEE 754 `_: Binary16, Binary32 + - `OCP Float8 `_: E5M2, E4M3, and MX formats + - `IEEE WG P3109 `_: P{p} for p in 1..7 + +The library favours readability and extensibility over speed - for fast +implementations of these datatypes see, for example, +`ml_dtypes `_, +`bitstring `_, +`MX PyTorch Emulation Library `_. + +To get started with the library, we recommend perusing the notebooks, +otherwise you may wish to jump straight into the API. + +.. toctree:: + :hidden: + + self + +.. toctree:: + + notebooks + api + formats + + +Index and Search +================ + +* :ref:`genindex` +* :ref:`search` diff --git a/docs/source/notebooks.rst b/docs/source/notebooks.rst new file mode 100644 index 0000000..c46e624 --- /dev/null +++ b/docs/source/notebooks.rst @@ -0,0 +1,13 @@ +.. Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +Notebooks +========= + +Some notebooks to illustrate uses of the library + +.. toctree:: + :maxdepth: 1 + + 01-decode.ipynb + 02-value-stats.ipynb + 03-value-tables.ipynb diff --git a/etc/check-copyright.sh b/etc/check-copyright.sh new file mode 100755 index 0000000..0ebcd30 --- /dev/null +++ b/etc/check-copyright.sh @@ -0,0 +1,12 @@ +#!/usr/bin/bash +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +PATTERN='Copyright \(c\) 202[0-9] Graphcore Ltd\. +All rights reserved\.' + +# We "grep ." so the exit code signals that the first grep generated output +if grep -L -E "$PATTERN" "$@" | grep . +then + # There was output, signal unsuccessful + exit 1 +fi +# Normal exit, signalling success diff --git a/etc/package.sh b/etc/package.sh new file mode 100644 index 0000000..f64e7c1 --- /dev/null +++ b/etc/package.sh @@ -0,0 +1,22 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +# Set version numbers, make package, and publish + +set -o errexit + +# This is the master location at which to change version number +VERSION="0.3" + +# Run the script to change the version elsewhere +perl -pi -e 's/^(release|version) = "([\d.]+)"/$1 = "'$VERSION'"/' docs/source/conf.py +perl -pi -e 's/^version = "([\d.]+)"/version = "'$VERSION'"/' pyproject.toml + +# Build docs to embed version +( cd docs && make html ) + +# Build distribution +rm -rf dist +pip install build twine +python -m build +echo "Enter PyPI API Token" +echo __token__ | twine upload --repository pypi dist/* --verbose diff --git a/etc/test-check-copyright.sh b/etc/test-check-copyright.sh new file mode 100644 index 0000000..d93a1b5 --- /dev/null +++ b/etc/test-check-copyright.sh @@ -0,0 +1,30 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +tmpdir=$(mktemp -d) +test -d $tmpdir || exit -1 + +cleanup () { + echo "Removing $tmpdir" + rm $tmpdir/t.sh + rmdir $tmpdir +} + +trap cleanup EXIT + +# Passing case +echo "Copyright (c) 2024 Graphcore Ltd. All rights reserved." > $tmpdir/t.sh +if sh etc/check-copyright.sh $tmpdir/t.sh +then + echo Pass: Should have passed +else + echo FAIL: Should have passed +fi + +# Failing case +echo "Copyright (c) 2024 Graphcore Ltd. All rights xreserved." > $tmpdir/t.sh +if sh etc/check-copyright.sh $tmpdir/t.sh +then + echo FAIL: Should have failed, but passed +else + echo Pass: Should have failed +fi diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f40e7b7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,40 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +packages = ['gfloat'] +package-dir = {"" = "src"} + +[project] +name = "gfloat" +version = "0.3" # Set version in package.sh +authors = [ + {name = "Andrew Fitzgibbon", email = "awf@fitzgibbon.ie"}, +] +description = "Generic floating point handling in Python" +readme = "README.md" +classifiers = [ + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Development Status :: 3 - Alpha", +] +requires-python = ">=3.8.1" +dynamic = ["dependencies", "optional-dependencies"] + +[tool.setuptools.dynamic] +# version = {attr = "gfloat.VERSION"} # Wow: https://github.com/pypa/setuptools/issues/1724 +dependencies = {file = ["requirements.txt"]} +optional-dependencies = {dev = {file = ["requirements-dev.txt"]}} + +[tool.black] +line-length = 88 +fast = true + +[tool.mypy] +[[tool.mypy.overrides]] +module = "mx.*" +ignore_missing_imports = true diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..22bc22b --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,21 @@ +# Requirements for tests +pytest +ml_dtypes +# See requirements-tests also for direct dependencies + +# Requirements for development +pre-commit +black +mypy +black[jupyter] +isort + +# Requirements for docs +sphinx==7.1.2 +sphinx-rtd-theme==1.3.0rc1 +sphinx_paramlinks +myst_nb + +# Requirements for notebooks +airium +pandas diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..e685591 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,3 @@ +# PyPI doesn't like direct dependencies - see https://github.com/microsoft/microxcaling/issues/22 + +mx @ git+https://github.com/microsoft/microxcaling diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..24ce15a --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +numpy diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..8bfd5a1 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,4 @@ +[egg_info] +tag_build = +tag_date = 0 + diff --git a/src/gfloat.egg-info/PKG-INFO b/src/gfloat.egg-info/PKG-INFO new file mode 100644 index 0000000..0ecc625 --- /dev/null +++ b/src/gfloat.egg-info/PKG-INFO @@ -0,0 +1,70 @@ +Metadata-Version: 2.1 +Name: gfloat +Version: 0.3 +Summary: Generic floating point handling in Python +Author-email: Andrew Fitzgibbon +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python :: 3 +Classifier: Development Status :: 3 - Alpha +Requires-Python: >=3.8.1 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: numpy +Provides-Extra: dev +Requires-Dist: pytest; extra == "dev" +Requires-Dist: ml_dtypes; extra == "dev" +Requires-Dist: pre-commit; extra == "dev" +Requires-Dist: black; extra == "dev" +Requires-Dist: mypy; extra == "dev" +Requires-Dist: black[jupyter]; extra == "dev" +Requires-Dist: isort; extra == "dev" +Requires-Dist: sphinx==7.1.2; extra == "dev" +Requires-Dist: sphinx-rtd-theme==1.3.0rc1; extra == "dev" +Requires-Dist: sphinx_paramlinks; extra == "dev" +Requires-Dist: myst_nb; extra == "dev" +Requires-Dist: airium; extra == "dev" +Requires-Dist: pandas; extra == "dev" + + + +# gfloat: Generic floating-point types in Python + +An implementation of generic floating point encode/decode logic, +handling various current and proposed floating point types: + + - [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754): Binary16, Binary32 + - [OCP Float8](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-06-20-pdf): E5M2, E4M3 + - [IEEE WG P3109](https://github.com/awf/P3109-Public/blob/main/Shared%20Reports/P3109%20WG%20Interim%20report.pdf): P{p} for p in 1..7 + - [OCP MX Formats](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf): E2M1, M2M3, E3M2, E8M0, INT8, and the MX block formats. + +The library favours readability and extensibility over speed - for fast implementations of these datatypes see, for example, [ml_dtypes](https://github.com/jax-ml/ml_dtypes), +[bitstring](https://github.com/scott-griffiths/bitstring), +[MX PyTorch Emulation Library](https://github.com/microsoft/microxcaling). + +See https://gfloat.readthedocs.io for documentation, or dive into the notebooks to explore the formats. + +For example, here's a table from the [02-value-stats](docs/source/02-value-stats.ipynb) notebook: + +|name|B: Bits in the format|P: Precision in bits|E: Exponent field width in bits|0 int: + """The number of bits in each element, d""" + return self.etype.k + + @property + def scale_bits(self) -> int: + """The number of bits in the scale, w""" + return self.stype.k + + @property + def block_size_bytes(self) -> int: + """The number of bytes in a block""" + bits = self.element_bits * self.k + self.scale_bits + assert bits % 8 == 0 + return bits // 8 + + @property + def __name__(self) -> str: + return self.name + + def __str__(self) -> str: + return f"BlockFormatInfo:{self.name})" + + +def decode_block(fi: BlockFormatInfo, block: Iterable[int]) -> Iterable[float]: + """ + Decode a :paramref:`block` of integer codepoints in Block Format :paramref:`fi` + + The scale is encoded in the first value of :paramref:`block`, + with the remaining values encoding the block elements. + + The size of the iterable is not checked against the format descriptor. + + Args: + fi (BlockFormatInfo): Describes the block format + block (Iterable[int]): Input block + + Returns: + A sequence of floats representing the encoded values. + """ + it = iter(block) + + scale_encoding = next(it) + scale = decode_float(fi.stype, scale_encoding).fval + + for val_encoding in it: + val = scale * decode_float(fi.etype, val_encoding).fval + yield val + + # TODO: Assert length of block was k+1? Messy unless block is len()able + + +def encode_block( + fi: BlockFormatInfo, + scale: float, + vals: Iterable[float], + round: RoundMode = RoundMode.TiesToEven, +) -> Iterable[int]: + """ + Encode float :paramref:`vals` into block Format described by :paramref:`fi` + + The :paramref:`scale` is explicitly passed, and the :paramref:`vals` are + assumed to already be multiplied by `1/scale`. + That is, this is pure encoding, scaling is computed and applied elsewhere + (see e.g. :func:`quantize_block`). + + It is checked for overflow in the target format, + and will raise an exception if it does. + + Args: + fi (BlockFormatInfo): Describes the target block format + scale (float): Scale to be recorded in the block + vals (Iterable[float]): Input block + round (RoundMode): Rounding mode to use, defaults to `TiesToEven` + + Returns: + A sequence of ints representing the encoded values. + + Raises: + ValueError: The scale overflows the target scale encoding format. + """ + + if scale > fi.stype.max or scale < fi.stype.min: + raise ValueError(f"Scaled {scale} out of range for {fi.stype}") + + sat = True # Saturate elements if out of range + + def enc(ty: FormatInfo, x: float) -> int: + return encode_float(ty, round_float(ty, x, round, sat)) + + yield enc(fi.stype, scale) + + for val in vals: + yield enc(fi.etype, val) + + +ComputeScaleCallable = Callable[[float, npt.ArrayLike], float] + + +def compute_scale_amax(emax: float, vals: npt.ArrayLike) -> float: + """ + Compute a scale factor such that :paramref:`vals` can be scaled to the + range [0, 2**emax]. That is, `scale` is computed such that the largest + exponent in the array `vals * scale` will be `emax`. + + The scale is clipped to the range 2**[-127, 127]. + + If all values are zero, any scale value smaller than emax would be accurate, + but returning the smallest possible means that quick checks on the magnitude + to identify near-zero blocks will also find the all-zero blocks. + + Args: + emax (float): Maximum exponent to appear in `vals * scale` + vals (ArrayLike): Input block + + Returns: + A float such that `vals * scale` has exponents less than or equal to `emax`. + + Note: + If all vals are zero, 1.0 is returned. + """ + amax = np.max(np.abs(vals)) + if amax == 0.0: + q_log2scale = -127.0 + else: + q_log2scale = np.floor(np.log2(amax)) - emax + q_log2scale = np.clip(q_log2scale, -127.0, 127.0) + return 2.0**q_log2scale + + +def quantize_block( + fi: BlockFormatInfo, + vals: npt.NDArray[np.float64], + compute_scale: ComputeScaleCallable, + round: RoundMode = RoundMode.TiesToEven, +) -> npt.NDArray[np.float64]: + """ + Encode and decode a block of :paramref:`vals` of bytes into + block format described by :paramref:`fi` + + Args: + fi (BlockFormatInfo): Describes the target block format + vals (numpy.array): Input block + compute_scale ((float, ArrayLike) -> float): + Callable to compute the scale, defaults to :func:`compute_scale_amax` + round (RoundMode): Rounding mode to use, defaults to `TiesToEven` + + Returns: + An array of floats representing the quantized values. + + Raises: + ValueError: The scale overflows the target scale encoding format. + """ + + q_scale = compute_scale(fi.etype.emax, vals) + scaled_vals = vals / q_scale + enc = encode_block(fi, q_scale, scaled_vals, round) + return np.fromiter(decode_block(fi, enc), float) diff --git a/src/gfloat/decode.py b/src/gfloat/decode.py new file mode 100644 index 0000000..ffb0093 --- /dev/null +++ b/src/gfloat/decode.py @@ -0,0 +1,98 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +import numpy as np + +from .types import FloatClass, FloatValue, FormatInfo + + +def decode_float(fi: FormatInfo, i: int) -> FloatValue: + r""" + Given :py:class:`FormatInfo` and integer code point, decode to a :py:class:`FloatValue` + + Args: + fi (FormatInfo): Floating point format descriptor. + i (int): Integer code point, in the range :math:`0 \le i < 2^{k}`, + where :math:`k` = ``fi.k`` + + Returns: + Decoded float value + + Raises: + ValueError: + If :paramref:`i` is outside the range of valid code points in :paramref:`fi`. + """ + assert isinstance(i, int) + + k = fi.k + p = fi.precision + t = p - 1 # Trailing significand field width + num_signbits = 1 if fi.is_signed else 0 + w = k - t - num_signbits # Exponent field width + + if i < 0 or i >= 2**k: + raise ValueError(f"Code point {i} not in range [0, 2**{k})") + + if fi.is_signed: + signmask = 1 << (k - 1) + signbit = 1 if i & signmask else 0 + sign = -1 if signbit else 1 + else: + signmask = None + signbit = 0 + sign = 1 + + exp = (i >> t) & ((1 << w) - 1) + significand = i & ((1 << t) - 1) + if fi.is_twos_complement and signbit: + significand = (1 << t) - significand + + expBias = fi.expBias + + iszero = exp == 0 and significand == 0 and fi.has_zero + issubnormal = fi.has_subnormals and (exp == 0) and (significand != 0) + isnormal = not iszero and not issubnormal + if iszero or issubnormal: + expval = 1 - expBias + fsignificand = significand * 2**-t + else: + expval = exp - expBias + fsignificand = 1.0 + significand * 2**-t + + # Handle specials: Infs, NaN, -0, NaN_0 + signed_infinity = -np.inf if signbit else np.inf + + fval = None + # All-bits-special exponent (ABSE) + if w > 0 and exp == 2**w - 1: + min_i_with_nan = 2 ** (p - 1) - fi.num_high_nans + if significand >= min_i_with_nan: + fval = np.nan + if fi.has_infs and significand == min_i_with_nan - 1: + fval = signed_infinity + + # Negative zero or NaN + if iszero and i == signmask and not fi.is_twos_complement: + if fi.has_nz: + fval = -0.0 + else: + fval = np.nan + + # In range - compute value + if fval is None: + fval = sign * fsignificand * 2.0**expval + + # Compute FloatClass + fclass = None + if fval == 0: + fclass = FloatClass.ZERO + elif np.isnan(fval): + fclass = FloatClass.NAN + elif np.isfinite(fval): + if isnormal: + fclass = FloatClass.NORMAL + else: + fclass = FloatClass.SUBNORMAL + else: + fclass = FloatClass.INFINITE + + return FloatValue(i, fval, exp, expval, significand, fsignificand, signbit, fclass) diff --git a/src/gfloat/formats.py b/src/gfloat/formats.py new file mode 100644 index 0000000..2c9d026 --- /dev/null +++ b/src/gfloat/formats.py @@ -0,0 +1,262 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +from .block import BlockFormatInfo +from .types import FormatInfo + +#: FormatInfo for IEEE-754 Binary64 format +format_info_binary64 = FormatInfo( + name="binary64", + k=64, + precision=53, + emax=1023, + has_nz=True, + has_infs=True, + num_high_nans=2**52 - 1, + has_subnormals=True, + is_signed=True, + is_twos_complement=False, +) + +#: FormatInfo for IEEE-754 Binary32 format +format_info_binary32 = FormatInfo( + name="binary32", + k=32, + precision=24, + emax=127, + has_nz=True, + has_infs=True, + num_high_nans=2**23 - 1, + has_subnormals=True, + is_signed=True, + is_twos_complement=False, +) + +#: FormatInfo for IEEE-754 Binary16 format +format_info_binary16 = FormatInfo( + name="binary16", + k=16, + precision=11, + emax=15, + has_nz=True, + has_infs=True, + num_high_nans=2**10 - 1, + has_subnormals=True, + is_signed=True, + is_twos_complement=False, +) + +#: FormatInfo for Google BFloat16 format +format_info_bfloat16 = FormatInfo( + name="bfloat16", + k=16, + precision=8, + emax=127, + has_nz=True, + has_infs=True, + num_high_nans=2**7 - 1, + has_subnormals=True, + is_signed=True, + is_twos_complement=False, +) + +#: FormatInfo for OCP E5M2 format +format_info_ocp_e5m2 = FormatInfo( + name="ocp_e5m2", + k=8, + precision=3, + emax=15, + has_nz=True, + has_infs=True, + num_high_nans=2**2 - 1, + has_subnormals=True, + is_signed=True, + is_twos_complement=False, +) + +#: FormatInfo for OCP E4M3 format +format_info_ocp_e4m3 = FormatInfo( + name="ocp_e4m3", + k=8, + precision=4, + emax=8, + has_nz=True, + has_infs=False, + num_high_nans=1, + has_subnormals=True, + is_signed=True, + is_twos_complement=False, +) + +#: FormatInfo for OCP MX E2M3 format +format_info_ocp_e2m3 = FormatInfo( + name="ocp_e2m3", + k=6, + precision=4, + emax=2, + has_nz=True, + has_infs=False, + num_high_nans=0, + has_subnormals=True, + is_signed=True, + is_twos_complement=False, +) + +#: FormatInfo for OCP MX E3M2 format +format_info_ocp_e3m2 = FormatInfo( + name="ocp_e3m2", + k=6, + precision=3, + emax=4, + has_nz=True, + has_infs=False, + num_high_nans=0, + has_subnormals=True, + is_signed=True, + is_twos_complement=False, +) + +#: FormatInfo for OCP MX E2M1 format +format_info_ocp_e2m1 = FormatInfo( + name="ocp_e2m1", + k=4, + precision=2, + emax=2, + has_nz=True, + has_infs=False, + num_high_nans=0, + has_subnormals=True, + is_signed=True, + is_twos_complement=False, +) + +#: FormatInfo for OCP MX E8M0 format +format_info_ocp_e8m0 = FormatInfo( + name="ocp_e8m0", + k=8, + precision=1, + emax=127, + has_nz=False, + has_infs=False, + num_high_nans=1, + has_subnormals=False, + is_signed=False, + is_twos_complement=False, +) + +#: FormatInfo for OCP MX INT8 format +format_info_ocp_int8 = FormatInfo( + name="ocp_int8", + k=8, + precision=8, + emax=0, + has_nz=False, + has_infs=False, + num_high_nans=0, + has_subnormals=True, + is_signed=True, + is_twos_complement=True, +) + + +def format_info_p3109(precision: int) -> FormatInfo: + """ + FormatInfo for P3109 P{p} formats + + Args: + p (int): Precision in bits + + Returns: + FormatInfo class describing the format + + Raises: + ValueError: If p is not in 1..7 + """ + if precision < 1 or precision > 7: + raise ValueError(f"P3109 format not defined for p={precision}") + + name = f"p3109_p{precision}" + emax = 2 ** (7 - precision) - 1 + + return FormatInfo( + name, + k=8, + precision=precision, + emax=emax, + has_nz=False, + has_infs=True, + num_high_nans=0, + has_subnormals=True, + is_signed=True, + is_twos_complement=False, + ) + + +# Collections of formats +_tiny_formats = [ + format_info_ocp_e2m1, + format_info_ocp_e2m3, + format_info_ocp_e3m2, +] + +p3109_formats = [format_info_p3109(p) for p in range(1, 7)] + +_fp8_formats = [ + format_info_ocp_e4m3, + format_info_ocp_e5m2, + *p3109_formats, +] + +_fp16_formats = [ + format_info_binary16, + format_info_bfloat16, +] + +all_formats = [ + format_info_ocp_e8m0, + format_info_ocp_int8, + *_tiny_formats, + *_fp8_formats, + *_fp16_formats, + format_info_binary32, + format_info_binary64, +] + +# ------ +# Block formats + +format_info_mxfp8_e5m2 = BlockFormatInfo( + "mxfp8_e5m2", format_info_ocp_e5m2, 32, format_info_ocp_e8m0 +) + +format_info_mxfp8_e4m3 = BlockFormatInfo( + "mxfp8_e4m3", format_info_ocp_e4m3, 32, format_info_ocp_e8m0 +) + +format_info_mxfp6_e3m2 = BlockFormatInfo( + "mxfp6_e3m2", format_info_ocp_e3m2, 32, format_info_ocp_e8m0 +) + +format_info_mxfp6_e2m3 = BlockFormatInfo( + "mxfp6_e2m3", format_info_ocp_e2m3, 32, format_info_ocp_e8m0 +) + +format_info_mxfp4_e2m1 = BlockFormatInfo( + "mxfp4_e2m1", format_info_ocp_e2m1, 32, format_info_ocp_e8m0 +) + +format_info_mxfp4_e2m1 = BlockFormatInfo( + "mxfp4_e2m1", format_info_ocp_e2m1, 32, format_info_ocp_e8m0 +) + +format_info_mxint8 = BlockFormatInfo( + "mxint8", format_info_ocp_int8, 32, format_info_ocp_e8m0 +) + +all_block_formats = [ + format_info_mxfp8_e5m2, + format_info_mxfp8_e4m3, + format_info_mxfp6_e3m2, + format_info_mxfp6_e2m3, + format_info_mxfp4_e2m1, + format_info_mxint8, +] diff --git a/src/gfloat/printing.py b/src/gfloat/printing.py new file mode 100644 index 0000000..3968520 --- /dev/null +++ b/src/gfloat/printing.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +import fractions + +import numpy as np + + +def float_pow2str(v: float, min_exponent: float = -np.inf) -> str: + """ + Render floating point values as exact fractions times a power of two. + + Example: float_pow2str(127.0) is "127/64*2^6", + + That is (a significand between 1 and 2) times (a power of two). + + If `min_exponent` is supplied, then values with exponent below `min_exponent`, + are printed as fractions less than 1, with exponent set to `min_exponent`. + This is typically used to represent subnormal values. + + """ + if not np.isfinite(v): + return str(v) + + s = np.sign(v) + x = np.abs(v) + e = np.floor(np.log2(x)) + sig = x * 2.0**-e + if e < min_exponent: + sig *= 2.0 ** (e - min_exponent) + e = min_exponent + + significand = fractions.Fraction(sig) + return ("-" if s < 0 else "") + f"{significand}*2^{int(e):d}" + + +def float_tilde_unless_roundtrip_str(v: float, width: int = 14, d: int = 8) -> str: + """ + Return a string representation of :paramref:`v`, in base 10, + with maximum width :paramref:`width` and decimal digits :paramref:`d` + + + """ + # valstr: string representation of value in base 10 + # If the representation does not roundtrip to the value, + # it is preceded by a "~" to indicate "approximately equal to" + s = f"{v}" + if len(s) > width: + if abs(v) < 1 and not "e" in s: + s = f"{v:.{d}f}" + else: + s = f"{v:.{d}}" + if np.isfinite(v) and float(s) != v: + s = "~" + s + + return s diff --git a/src/gfloat/round.py b/src/gfloat/round.py new file mode 100644 index 0000000..4fe5ce7 --- /dev/null +++ b/src/gfloat/round.py @@ -0,0 +1,231 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +import math + +import numpy as np + +from .types import FormatInfo, RoundMode + + +def _isodd(v: int) -> bool: + return v & 0x1 == 1 + + +def round_float( + fi: FormatInfo, v: float, rnd: RoundMode = RoundMode.TiesToEven, sat: bool = False +) -> float: + """ + Round input to the given :py:class:`FormatInfo`, given rounding mode and saturation flag + + An input NaN will convert to a NaN in the target. + An input Infinity will convert to the largest float if :paramref:`sat`, + otherwise to an Inf, if present, otherwise to a NaN. + Negative zero will be returned if the format has negative zero, otherwise zero. + + Args: + fi (FormatInfo): Describes the target format + v (float): Input value to be rounded + rnd (RoundMode): Rounding mode to use + sat (bool): Saturation flag: if True, round overflowed values to `fi.max` + + Returns: + A float which is one of the values in the format. + + Raises: + ValueError: The target format cannot represent the input + (e.g. converting a `NaN`, or an `Inf` when the target has no + `NaN` or `Inf`, and :paramref:`sat` is false) + """ + + # Constants + p = fi.precision + bias = fi.expBias + + if np.isnan(v): + if fi.num_nans == 0: + raise ValueError(f"No NaN in format {fi}") + + # Note that this does not preserve the NaN payload + return np.nan + + # Extract sign + sign = np.signbit(v) and fi.is_signed + vpos = -v if sign else v + + if np.isinf(vpos): + result = np.inf + + elif vpos == 0: + result = 0 + + else: + # Extract exponent + expval = int(np.floor(np.log2(vpos))) + + # Effective precision, accounting for right shift for subnormal values + if fi.has_subnormals: + expval = max(expval, 1 - bias) + + # Lift to "integer * 2^e" + expval = expval - p + 1 + + # use ldexp instead of vpos*2**-expval to avoid overflow + fsignificand = math.ldexp(vpos, -expval) + + # Round + isignificand = math.floor(fsignificand) + delta = fsignificand - isignificand + if ( + (rnd == RoundMode.TowardPositive and not sign and delta > 0) + or (rnd == RoundMode.TowardNegative and sign and delta > 0) + or (rnd == RoundMode.TiesToAway and delta >= 0.5) + or (rnd == RoundMode.TiesToEven and delta > 0.5) + or (rnd == RoundMode.TiesToEven and delta == 0.5 and _isodd(isignificand)) + ): + isignificand += 1 + + ## Special case for Precision=1, all-log format with zero. + # The logic is simply duplicated (and isignificand overwritten) for clarity. + if fi.precision == 1: + isignificand = math.floor(fsignificand) + code_is_odd = isignificand != 0 and _isodd(expval + bias) + if ( + (rnd == RoundMode.TowardPositive and not sign and delta > 0) + or (rnd == RoundMode.TowardNegative and sign and delta > 0) + or (rnd == RoundMode.TiesToAway and delta >= 0.5) + or (rnd == RoundMode.TiesToEven and delta > 0.5) + or (rnd == RoundMode.TiesToEven and delta == 0.5 and code_is_odd) + ): + # Go to nextUp. + # Increment isignificand if zero, + # else increment exponent + if isignificand == 0: + isignificand = 1 + else: + assert isignificand == 1 + expval += 1 + ## End special case for Precision=1. + + # Reconstruct rounded result to float + result = isignificand * (2.0**expval) + + if result == 0: + if sign and fi.has_nz: + return -0.0 + else: + return 0.0 + + # Overflow + amax = -fi.min if sign else fi.max + if result > amax: + if ( + sat + or (rnd == RoundMode.TowardNegative and not sign and np.isfinite(v)) + or (rnd == RoundMode.TowardPositive and sign and np.isfinite(v)) + or (rnd == RoundMode.TowardZero and np.isfinite(v)) + ): + result = amax + else: + if fi.has_infs: + result = np.inf + elif fi.num_nans > 0: + result = np.nan + else: + raise ValueError(f"No Infs or NaNs in format {fi}, and sat=False") + + # Set sign + if sign: + result = -result + + return result + + +def encode_float(fi: FormatInfo, v: float) -> int: + """ + Encode input to the given :py:class:`FormatInfo`. + + Will round toward zero if :paramref:`v` is not in the value set. + Will saturate to `Inf`, `NaN`, `fi.max` in order of precedence. + Encode -0 to 0 if not `fi.has_nz` + + For other roundings and saturations, call :func:`round_float` first. + + Args: + fi (FormatInfo): Describes the target format + v (float): The value to be encoded. + + Returns: + The integer code point + """ + + # Format Constants + k = fi.bits + p = fi.precision + t = p - 1 + + # Encode + if np.isnan(v): + return fi.code_of_nan + + # Overflow/underflow + if v > fi.max: + if fi.has_infs: + return fi.code_of_posinf + if fi.num_nans > 0: + return fi.code_of_nan + return fi.code_of_max + + if v < fi.min: + if fi.has_infs: + return fi.code_of_neginf + if fi.num_nans > 0: + return fi.code_of_nan + return fi.code_of_min + + # Finite values + sign = fi.is_signed and np.signbit(v) + vpos = -v if sign else v + + if fi.has_subnormals and vpos <= fi.smallest_subnormal / 2: + isig = 0 + biased_exp = 0 + else: + sig, exp = np.frexp(vpos) + exp = int(exp) # All calculations in Python ints + + # sig in range [0.5, 1) + sig *= 2 + exp -= 1 + # now sig in range [1, 2) + + biased_exp = exp + fi.expBias + if biased_exp < 1 and fi.has_subnormals: + # subnormal + sig *= 2.0 ** (biased_exp - 1) + biased_exp = 0 + assert vpos == sig * 2 ** (1 - fi.expBias) + else: + if sig > 0: + sig -= 1.0 + + isig = math.floor(sig * 2**t) + + # Zero + if isig == 0 and biased_exp == 0 and fi.has_zero: + if sign and fi.has_nz: + return fi.code_of_negzero + else: + return fi.code_of_zero + + # Nonzero + assert isig < 2**t + assert biased_exp < 2**fi.expBits or fi.is_twos_complement + + # Handle two's complement encoding + if fi.is_twos_complement and sign: + isig = (1 << t) - isig + + # Pack values into a single integer + code = (int(sign) << (k - 1)) | (biased_exp << t) | (isig << 0) + + return code diff --git a/src/gfloat/types.py b/src/gfloat/types.py new file mode 100644 index 0000000..3739bcd --- /dev/null +++ b/src/gfloat/types.py @@ -0,0 +1,408 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +from dataclasses import dataclass +from enum import Enum + + +class RoundMode(Enum): + """ + Enum for IEEE-754 rounding modes. + + Result r is obtained from input v depending on rounding mode as follows + """ + + TowardZero = 1 #: :math:`\max \{ r ~ s.t. ~ |r| \le |v| \}` + TowardNegative = 2 #: :math:`\max \{ r ~ s.t. ~ r \le v \}` + TowardPositive = 3 #: :math:`\min \{ r ~ s.t. ~ r \ge v \}` + TiesToEven = 4 #: Round to nearest, ties to even + TiesToAway = 5 #: Round to nearest, ties away from zero + + +class FloatClass(Enum): + """ + Enum for the classification of a FloatValue. + """ + + NORMAL = 1 #: A positive or negative normalized non-zero value + SUBNORMAL = 2 #: A positive or negative subnormal value + ZERO = 3 #: A positive or negative zero value + INFINITE = 4 #: A positive or negative infinity (+/-Inf) + NAN = 5 #: Not a Number (NaN) + + +@dataclass +class FloatValue: + """ + A floating-point value decoded in great detail. + """ + + code: int #: Integer code point + + #: Value. Assumed to be exactly round-trippable to python float. + #: This is true for all <64bit formats known in 2023. + fval: float + + exp: int #: Raw exponent without bias + expval: int #: Exponent, bias subtracted + significand: int #: Significand as an integer + fsignificand: float #: Significand as a float in the range [0,2) + signbit: int #: Sign bit: 1 => negative, 0 => positive + fclass: FloatClass #: See FloatClass + + +@dataclass +class FormatInfo: + """ + Class describing a floating-point format, parametrized + by width, precision, and special value encoding rules. + + """ + + #: Short name for the format, e.g. binary32, bfloat16 + name: str + + #: Number of bits in the format + k: int + + #: Number of significand bits (including implicit leading bit) + precision: int + + #: Largest exponent, emax, which shall equal floor(log_2(maxFinite)) + emax: int + + #: Set if format encodes -0 at (sgn=1,exp=0,significand=0). + #: If False, that encoding decodes to a NaN labelled NaN_0 + has_nz: bool + + #: Set if format includes +/- Infinity. + #: If set, the non-nan value with the highest encoding for each sign (s) + #: is replaced by (s)Inf. + has_infs: bool + + #: Number of NaNs that are encoded in the highest encodings for each sign + num_high_nans: int + + #: Set if format encodes subnormals + has_subnormals: bool + + #: Set if the format has a sign bit + is_signed: bool + + #: Set if the format uses two's complement encoding for the significand + is_twos_complement: bool + + #: ## Derived values + + @property + def tSignificandBits(self) -> int: + """The number of trailing significand bits, t""" + return self.precision - 1 + + @property + def expBits(self) -> int: + """The number of exponent bits, w""" + return self.k - self.precision + (0 if self.is_signed else 1) + + @property + def signBits(self) -> int: + """The number of sign bits, s""" + return 1 if self.is_signed else 0 + + @property + def expBias(self) -> int: + """The exponent bias derived from (p,emax) + + This is the bias that should be applied so that + :math:`floor(log_2(maxFinite)) = emax` + """ + # Calculate whether all of the all-bits-one-exponent values contain specials. + # If so, emax will be obtained for exponent value 2^w-2, otherwise it is 2^w-1 + t = self.tSignificandBits + num_posinfs = 1 if self.has_infs else 0 + all_bits_one_full = (self.num_high_nans + num_posinfs == 2**t) or ( + self.expBits == 0 and self.has_infs + ) + + # Compute exponent bias. + exp_for_emax = 2**self.expBits - (2 if all_bits_one_full else 1) + return exp_for_emax - self.emax + + # numpy finfo properties + @property + def bits(self) -> int: + """ + The number of bits occupied by the type. + """ + return self.k + + # @property + # def dtype(self) -> np.dtype: + # """ + # Returns the dtype for which `finfo` returns information. For complex + # input, the returned dtype is the associated ``float*`` dtype for its + # real and complex components. + # """ + + @property + def eps(self) -> float: + """ + The difference between 1.0 and the smallest representable float + larger than 1.0. For example, for 64-bit binary floats in the IEEE-754 + standard, ``eps = 2**-52``, approximately 2.22e-16. + """ + # TODO: Check if 1.0 is subnormal for any reasonable format, e.g. p3109(7)? + return 2**self.machep + + @property + def epsneg(self) -> float: + """ + The difference between 1.0 and the largest representable float + less than 1.0. For example, for 64-bit binary floats in the IEEE-754 + standard, ``epsneg = 2**-53``, approximately 1.11e-16. + """ + return self.eps / 2 + + @property + def iexp(self) -> int: + """ + The number of bits in the exponent portion of the floating point + representation. + """ + return self.expBits + + @property + def machep(self) -> int: + """ + The exponent that yields `eps`. + """ + return -self.tSignificandBits + + @property + def max(self) -> float: + """ + The largest representable number. + """ + num_posinfs = 1 if self.has_infs else 0 + num_non_finites = self.num_high_nans + num_posinfs + if num_non_finites == 2**self.tSignificandBits: + # All-bits-one exponent field is full, value is in the + # binade below, so significand is 0xFFF..F + isig = 2**self.tSignificandBits - 1 + else: + # All-bits-one exponent field is not full, value is in the + # final binade, so significand is 0xFFF..F - num_non_finites + isig = 2**self.tSignificandBits - 1 - num_non_finites + + if self.is_all_subnormal: + return 2**self.emax * (isig * 2 ** (1 - self.tSignificandBits)) + else: + return 2**self.emax * (1.0 + isig * 2**-self.tSignificandBits) + + @property + def maxexp(self) -> int: + """ + The smallest positive power of the base (2) that causes overflow. + """ + return self.emax + 1 + + @property + def min(self) -> float: + """ + The smallest representable number, typically ``-max``. + """ + if self.is_signed: + if not self.is_twos_complement: + return -self.max + else: + assert not self.has_infs and self.num_high_nans == 0 and not self.has_nz + return -(2 ** (self.emax + 1)) + elif self.has_zero: + return 0.0 + else: + return 2**-self.expBias + + @property + def num_nans(self) -> int: + """ + The number of code points which decode to NaN + """ + if not self.is_signed: + return self.num_high_nans + + # Signed + if self.is_twos_complement: + assert not self.has_infs and self.num_high_nans == 0 and not self.has_nz + return 0 + + return (0 if self.has_nz else 1) + 2 * self.num_high_nans + + @property + def code_of_nan(self) -> int: + """ + Return a codepoint for a NaN + """ + if self.num_high_nans > 0: + return 2 ** (self.k) - 1 + if not self.has_nz: + return 2 ** (self.k - 1) + raise ValueError(f"No NaN in {self}") + + @property + def code_of_posinf(self) -> int: + """ + Return a codepoint for positive infinity + """ + if not self.has_infs: + raise ValueError(f"No Inf in {self}") + + return 2 ** (self.k - 1) - 1 - self.num_high_nans + + @property + def code_of_neginf(self) -> int: + """ + Return a codepoint for negative infinity + """ + if not self.has_infs: + raise ValueError(f"No Inf in {self}") + + return 2**self.k - 1 - self.num_high_nans + + @property + def code_of_zero(self) -> int: + """ + Return a codepoint for (non-negative) zero + """ + assert self.has_zero + return 0 + + @property + def has_zero(self) -> bool: + """ + Does the format have zero? + + This is false if the mantissa is 0 width and we don't have subnormals - + essentially the mantissa is always decoded as 1. + If we have subnormals, the only subnormal is zero, and the mantissa is + always decoded as 0. + """ + return self.precision > 1 or self.has_subnormals + + @property + def code_of_negzero(self) -> int: + """ + Return a codepoint for negative zero + """ + if not self.has_nz: + raise ValueError(f"No negative zero in {self}") + + return 2 ** (self.k - 1) + + @property + def code_of_max(self) -> int: + """ + Return a codepoint for fi.max + """ + return 2 ** (self.k - self.signBits) - self.num_high_nans - self.has_infs - 1 + + @property + def code_of_min(self) -> int: + """ + Return a codepoint for fi.min + """ + if self.is_signed and not self.is_twos_complement: + return 2**self.k - self.num_high_nans - self.has_infs - 1 + elif self.is_signed and self.is_twos_complement: + return 2 ** (self.k - 1) + else: + return 0 # codepoint of smallest value, whether 0 or 2^-expBias + + # @property + # def minexp(self) -> int: + # """ + # The most negative power of the base (2) consistent with there + # being no leading 0's in the mantissa. + # """ + + # @property + # def negep(self) -> int: + # """ + # The exponent that yields `epsneg`. + # """ + + # @property + # def nexp(self) -> int: + # """ + # The number of bits in the exponent including its sign and bias. + # """ + + # @property + # def nmant(self) -> int: + # """ + # The number of bits in the mantissa. + # """ + + # @property + # def precision(self) -> int: + # """ + # The approximate number of decimal digits to which this kind of + # float is precise. + # """ + + # @property + # def resolution(self) -> float: + # """ + # The approximate decimal resolution of this type, i.e., + # ``10**-precision``. + # """ + + # @property + # def tiny(self) -> float: + # """ + # An alias for `smallest_normal`, kept for backwards compatibility. + # """ + + @property + def smallest_normal(self) -> float: + """ + The smallest positive floating point number with 1 as leading bit in + the significand following IEEE-754. + """ + if self.has_subnormals: + return 2 ** (1 - self.expBias) + elif self.has_zero: + return 2**-self.expBias + 2 ** (-self.expBias - self.tSignificandBits) + else: + return 2**-self.expBias + + @property + def smallest_subnormal(self) -> float: + """ + The smallest positive floating point number with 0 as leading bit in + the significand following IEEE-754. + """ + assert self.has_subnormals, "not implemented" + return 2 ** -(self.expBias + self.tSignificandBits - 1) + + @property + def smallest(self) -> float: + """ + The smallest positive floating point number. + """ + if self.has_subnormals: + return self.smallest_subnormal + else: + return self.smallest_normal + + @property + def is_all_subnormal(self) -> bool: + """ + Are all encoded values subnormal? + """ + return (self.expBits == 0) and self.has_subnormals + + @property + def __name__(self) -> str: + return self.name + + def __str__(self) -> str: + return f"{self.name}" diff --git a/test/test_block.py b/test/test_block.py new file mode 100644 index 0000000..d51c431 --- /dev/null +++ b/test/test_block.py @@ -0,0 +1,30 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +import numpy as np +import pytest + +from gfloat import ( + decode_float, + decode_block, + quantize_block, + encode_block, + compute_scale_amax, +) +from gfloat.formats import * + + +@pytest.mark.parametrize("fi", all_block_formats) +def test_blocks(fi: BlockFormatInfo) -> None: + + vals = np.linspace(-37.0, 42.0, 32) + + scale = compute_scale_amax(fi.etype.emax, vals) + block = list(encode_block(fi, scale, vals / scale)) + decoded_vals = list(decode_block(fi, block)) + + etype_next_under_max = decode_float(fi.etype, fi.etype.code_of_max - 1).fval + atol = (fi.etype.max - etype_next_under_max) * scale / 2 + np.testing.assert_allclose(decoded_vals, vals, atol=atol) + + via_qb = quantize_block(fi, vals, compute_scale_amax) + np.testing.assert_allclose(via_qb, decoded_vals, atol=0.0) diff --git a/test/test_decode.py b/test/test_decode.py new file mode 100644 index 0000000..c356c47 --- /dev/null +++ b/test/test_decode.py @@ -0,0 +1,259 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +import ml_dtypes +import numpy as np +import pytest + +from gfloat import FloatClass, decode_float +from gfloat.formats import * + + +def _isnegzero(x: float) -> bool: + return (x == 0) and (np.signbit(x) == 1) + + +def test_spot_check_ocp_e5m2() -> None: + fi = format_info_ocp_e5m2 + dec = lambda code: decode_float(fi, code).fval + fclass = lambda code: decode_float(fi, code).fclass + assert dec(0x01) == 2.0**-16 + assert dec(0x40) == 2.0 + assert _isnegzero(dec(0x80)) + assert dec(0x7B) == 57344.0 + assert dec(0x7C) == np.inf + assert np.floor(np.log2(dec(0x7B))) == fi.emax + assert dec(0xFC) == -np.inf + assert np.isnan(dec(0x7F)) + assert fclass(0x80) == FloatClass.ZERO + assert fclass(0x00) == FloatClass.ZERO + + +def test_spot_check_ocp_e4m3() -> None: + fi = format_info_ocp_e4m3 + dec = lambda code: decode_float(fi, code).fval + + assert dec(0x40) == 2.0 + assert dec(0x01) == 2.0**-9 + assert _isnegzero(dec(0x80)) + assert np.isnan(dec(0x7F)) + assert dec(0x7E) == 448.0 + assert np.floor(np.log2(dec(0x7E))) == fi.emax + + +def test_spot_check_p3109_p3() -> None: + fi = format_info_p3109(3) + dec = lambda code: decode_float(fi, code).fval + + assert dec(0x01) == 2.0**-17 + assert dec(0x40) == 1.0 + assert np.isnan(dec(0x80)) + assert dec(0xFF) == -np.inf + assert np.floor(np.log2(dec(0x7E))) == fi.emax + + +def test_spot_check_p3109_p1() -> None: + fi = format_info_p3109(1) + dec = lambda code: decode_float(fi, code).fval + + assert dec(0x01) == 2.0**-62 + assert dec(0x40) == 2.0 + assert np.isnan(dec(0x80)) + assert dec(0xFF) == -np.inf + assert np.floor(np.log2(dec(0x7E))) == fi.emax + + +def test_spot_check_binary16() -> None: + fi = format_info_binary16 + dec = lambda code: decode_float(fi, code).fval + + assert dec(0x3C00) == 1.0 + assert dec(0x3C01) == 1.0 + 2**-10 + assert dec(0x4000) == 2.0 + assert dec(0x0001) == 2**-24 + assert dec(0x7BFF) == 65504.0 + assert np.isinf(dec(0x7C00)) + assert np.isnan(dec(0x7C01)) + assert np.isnan(dec(0x7FFF)) + + +def test_spot_check_bfloat16() -> None: + fi = format_info_bfloat16 + dec = lambda code: decode_float(fi, code).fval + + assert dec(0x3F80) == 1 + assert dec(0x4000) == 2 + assert dec(0x0001) == 2**-133 + assert dec(0x4780) == 65536.0 + assert np.isinf(dec(0x7F80)) + assert np.isnan(dec(0x7F81)) + assert np.isnan(dec(0x7FFF)) + + +def test_spot_check_ocp_e2m3() -> None: + # Test against Table 4 in "OCP Microscaling Formats (MX) v1.0 Spec" + fi = format_info_ocp_e2m3 + dec = lambda code: decode_float(fi, code).fval + + assert fi.max == 7.5 + assert fi.smallest_subnormal == 0.125 + assert fi.smallest_normal == 1.0 + assert not fi.has_infs + assert fi.num_nans == 0 + assert fi.has_nz + + assert dec(0b000000) == 0 + assert dec(0b011111) == 7.5 + assert _isnegzero(dec(0b100000)) + + +def test_spot_check_ocp_e3m2() -> None: + # Test against Table 4 in "OCP Microscaling Formats (MX) v1.0 Spec" + fi = format_info_ocp_e3m2 + dec = lambda code: decode_float(fi, code).fval + + assert fi.max == 28.0 + assert fi.smallest_subnormal == 0.0625 + assert fi.smallest_normal == 0.25 + assert not fi.has_infs + assert fi.num_nans == 0 + assert fi.has_nz + + assert dec(0b000000) == 0 + assert dec(0b011111) == 28.0 + assert _isnegzero(dec(0b100000)) + + +def test_spot_check_ocp_e2m1() -> None: + # Test against Table 5 in "OCP Microscaling Formats (MX) v1.0 Spec" + fi = format_info_ocp_e2m1 + dec = lambda code: decode_float(fi, code).fval + + assert fi.max == 6.0 + assert fi.smallest_subnormal == 0.5 + assert fi.smallest_normal == 1.0 + assert not fi.has_infs + assert fi.num_nans == 0 + assert fi.has_nz + + assert dec(0b0000) == 0 + assert dec(0b0001) == 0.5 + assert dec(0b0010) == 1.0 + assert dec(0b0011) == 1.5 + assert dec(0b0100) == 2.0 + assert dec(0b0101) == 3.0 + assert dec(0b0110) == 4.0 + assert dec(0b0111) == 6.0 + assert _isnegzero(dec(0b1000)) + + +def test_spot_check_ocp_e8m0() -> None: + # Test against Table 7 in "OCP Microscaling Formats (MX) v1.0 Spec" + fi = format_info_ocp_e8m0 + dec = lambda code: decode_float(fi, code).fval + fclass = lambda code: decode_float(fi, code).fclass + assert fi.expBias == 127 + assert fi.max == 2.0**127 + assert fi.smallest == 2.0**-127 + assert not fi.has_infs + assert fi.num_nans == 1 + + assert dec(0x00) == 2.0**-127 + assert dec(0x01) == 2.0**-126 + assert dec(0x7F) == 1.0 + assert np.isnan(dec(0xFF)) + assert fclass(0x80) == FloatClass.NORMAL + assert fclass(0x00) == FloatClass.NORMAL + + +def test_spot_check_ocp_int8() -> None: + # Test against Table TODO in "OCP Microscaling Formats (MX) v1.0 Spec" + fi = format_info_ocp_int8 + dec = lambda code: decode_float(fi, code).fval + + assert fi.max == 1.0 + 63.0 / 64 + assert fi.smallest == 2.0**-6 + assert not fi.has_infs + assert fi.num_nans == 0 + + assert dec(0x00) == 0.0 + assert dec(0x01) == fi.smallest + assert dec(0x7F) == fi.max + assert dec(0x80) == -2.0 + assert dec(0x80) == fi.min + assert dec(0xFF) == -fi.smallest + + +@pytest.mark.parametrize("fi", p3109_formats) +def test_specials(fi: FormatInfo) -> None: + assert fi.code_of_nan == 0x80 + assert fi.code_of_zero == 0x00 + assert fi.code_of_posinf == 0x7F + assert fi.code_of_neginf == 0xFF + + +@pytest.mark.parametrize("fi", all_formats) +def test_specials_decode(fi: FormatInfo) -> None: + dec = lambda v: decode_float(fi, v).fval + + if fi.has_zero: + assert dec(fi.code_of_zero) == 0 + + if fi.num_nans > 0: + assert np.isnan(dec(fi.code_of_nan)) + + if fi.has_infs: + assert dec(fi.code_of_posinf) == np.inf + assert dec(fi.code_of_neginf) == -np.inf + + assert dec(fi.code_of_max) == fi.max + assert dec(fi.code_of_min) == fi.min + + if fi.has_zero: + assert dec(1) == fi.smallest + else: + assert dec(0) == fi.smallest + + +@pytest.mark.parametrize( + "fmt,npfmt,int_dtype", + [ + (format_info_binary16, np.float16, np.uint16), + (format_info_bfloat16, ml_dtypes.bfloat16, np.uint16), + (format_info_ocp_e4m3, ml_dtypes.float8_e4m3fn, np.uint8), + ], +) +def test_consistent_decodes_all_values( + fmt: FormatInfo, npfmt: np.dtype, int_dtype: np.dtype +) -> None: + npivals = np.arange( + np.iinfo(int_dtype).min, int(np.iinfo(int_dtype).max) + 1, dtype=int_dtype + ) + npfvals = npivals.view(dtype=npfmt) + for i, npfval in zip(npivals, npfvals): + val = decode_float(fmt, int(i)) + np.testing.assert_equal(val.fval, npfval) + + +@pytest.mark.parametrize("v", [-1, 0x10000]) +def test_except(v: int) -> None: + with pytest.raises(ValueError): + decode_float(format_info_binary16, v) + + +@pytest.mark.parametrize("fi", [fi for fi in all_formats if fi.bits <= 8]) +def test_dense(fi: FormatInfo) -> None: + fvs = [decode_float(fi, i) for i in range(0, 2**fi.bits)] + + vals = np.array([fv.fval for fv in fvs]) + + assert np.min(vals[np.isfinite(vals)]) == fi.min + assert np.max(vals[np.isfinite(vals)]) == fi.max + assert np.min(vals[np.isfinite(vals) & (vals > 0)]) == fi.smallest + + if fi.has_subnormals: + vals_subnormal = np.array( + [fv.fval for fv in fvs if fv.fclass == FloatClass.SUBNORMAL and fv.fval > 0] + ) + if len(vals_subnormal): + # In some formats, zero is the only "subnormal" + assert np.min(vals_subnormal) == fi.smallest_subnormal diff --git a/test/test_encode.py b/test/test_encode.py new file mode 100644 index 0000000..ea5bf68 --- /dev/null +++ b/test/test_encode.py @@ -0,0 +1,47 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +import ml_dtypes +import numpy as np +import pytest + +from gfloat import decode_float, encode_float +from gfloat.formats import * + + +@pytest.mark.parametrize("fi", all_formats) +def test_encode(fi: FormatInfo) -> None: + dec = lambda v: decode_float(fi, v).fval + + if fi.bits <= 8: + step = 1 + elif fi.bits <= 16: + step = 13 + elif fi.bits <= 32: + step = 73013 + elif fi.bits <= 64: + step = (73013 << 32) + 39 + + for i in range(0, 2**fi.bits, step): + fv = decode_float(fi, i) + code = encode_float(fi, fv.fval) + assert (i == code) or np.isnan(fv.fval) + fv2 = decode_float(fi, code) + np.testing.assert_equal(fv2.fval, fv.fval) + + +@pytest.mark.parametrize("fi", all_formats) +def test_encode_edges(fi: FormatInfo) -> None: + assert encode_float(fi, fi.max) == fi.code_of_max + + assert encode_float(fi, fi.max * 1.25) == ( + fi.code_of_posinf + if fi.has_infs + else fi.code_of_nan if fi.num_nans > 0 else fi.code_of_max + ) + + if fi.is_signed: + assert encode_float(fi, fi.min * 1.25) == ( + fi.code_of_neginf + if fi.has_infs + else fi.code_of_nan if fi.num_nans > 0 else fi.code_of_min + ) diff --git a/test/test_finfo.py b/test/test_finfo.py new file mode 100644 index 0000000..c8b2862 --- /dev/null +++ b/test/test_finfo.py @@ -0,0 +1,32 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +# Test that finfo methods on FloatFormat agree with numpy/ml_dtypes + +import ml_dtypes +import numpy as np +import pytest + +from gfloat import decode_float, round_float +from gfloat.formats import * + + +@pytest.mark.parametrize( + "fmt,npfmt", + [ + (format_info_ocp_e5m2, ml_dtypes.float8_e5m2), + (format_info_ocp_e4m3, ml_dtypes.float8_e4m3fn), + (format_info_binary16, np.float16), + (format_info_bfloat16, ml_dtypes.bfloat16), + ], +) +def test_finfo(fmt: FormatInfo, npfmt: np.dtype) -> None: + assert fmt.eps == ml_dtypes.finfo(npfmt).eps + assert fmt.epsneg == ml_dtypes.finfo(npfmt).epsneg + assert fmt.max == ml_dtypes.finfo(npfmt).max + assert fmt.maxexp == ml_dtypes.finfo(npfmt).maxexp + + +def test_constants() -> None: + assert format_info_p3109(1).smallest_subnormal == 2.0**-62 + assert format_info_p3109(4).smallest_subnormal == 2.0**-10 + assert format_info_p3109(7).smallest_subnormal == 2.0**-6 diff --git a/test/test_microxcaling.py b/test/test_microxcaling.py new file mode 100644 index 0000000..fc287fa --- /dev/null +++ b/test/test_microxcaling.py @@ -0,0 +1,91 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +import pytest + +import numpy as np +from numpy.typing import NDArray + +import torch + +from mx.mx_ops import quantize_mx_op +from mx.formats import ElemFormat + + +from gfloat import ( + BlockFormatInfo, + RoundMode, + quantize_block, + compute_scale_amax, + encode_block, +) +from gfloat.formats import * + + +@pytest.mark.parametrize( + ("mx_round,gf_round"), + [("even", RoundMode.TiesToEven), ("nearest", RoundMode.TiesToAway)], +) +@pytest.mark.parametrize( + ("mx_etype,gf_etype"), + [ + (ElemFormat.int8, format_info_ocp_int8), + (ElemFormat.fp6_e3m2, format_info_ocp_e3m2), + (ElemFormat.fp4_e2m1, format_info_ocp_e2m1), + ], +) +@pytest.mark.parametrize( + "A", + [ + np.arange(32) / 2 - 5, + np.zeros(32), + ], + ids=[ + "tennish", + "zeros", + ], +) +def test_mx( + mx_etype: ElemFormat, + gf_etype: FormatInfo, + mx_round: str, + gf_round: RoundMode, + A: NDArray[np.float64], +) -> None: + # MX: Declare block format + mx_specs = dict( + block_size=32, + scale_bits=8, + shared_exp_method="max", + mx_flush_fp32_subnorms=False, + custom_cuda=False, + ) + + # MX: Quantize + mx_dq = quantize_mx_op(torch.tensor(A), mx_specs, mx_etype, axes=0, round=mx_round) + + # GFloat: Declare block format + fi = BlockFormatInfo("test", gf_etype, 32, format_info_ocp_e8m0) + + # GFloat: Quantize + gf_dq = quantize_block(fi, A, compute_scale_amax, gf_round) + + # Compare + np.testing.assert_allclose(gf_dq, mx_dq) + + +def test_mx_exceptions() -> None: + fi = BlockFormatInfo("test", format_info_ocp_e2m1, 32, format_info_ocp_e8m0) + + A = np.ones(32) * 2.0**-139 + + s = compute_scale_amax(fi.etype.emax, A) + assert s == 2.0**-127 + + with pytest.raises(ValueError, match="out of range"): + list(encode_block(fi, fi.stype.max * 2, A)) + + assert not fi.stype.is_signed + scale = fi.stype.min / 2 + assert scale != 0 + with pytest.raises(ValueError, match="out of range"): + list(encode_block(fi, scale, A)) diff --git a/test/test_printing.py b/test/test_printing.py new file mode 100644 index 0000000..c97c5d9 --- /dev/null +++ b/test/test_printing.py @@ -0,0 +1,23 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +import numpy as np + +from gfloat import float_pow2str, float_tilde_unless_roundtrip_str + + +def test_pow2str() -> None: + assert float_pow2str(127) == "127/64*2^6" + assert float_pow2str(1.0625 * 2.0**-12) == "17/16*2^-12" + assert float_pow2str(3.0 * 2.0**-12) == "3/2*2^-11" + assert float_pow2str(3.0 / 16 * 2.0**-8) == "3/2*2^-11" + assert float_pow2str(3.0 / 16 * 2.0**-8, min_exponent=-8) == "3/16*2^-8" + + +def test_tilde_unless_roundtrip() -> None: + assert float_tilde_unless_roundtrip_str(1.52587892525e-05) == "~1.5258789e-05" + assert float_tilde_unless_roundtrip_str(28672.0) == "28672.0" + assert float_tilde_unless_roundtrip_str(0.0009765625) == "0.0009765625" + assert float_tilde_unless_roundtrip_str(120.0) == "120.0" + assert float_tilde_unless_roundtrip_str(0.0010001, width=7, d=4) == "~0.0010" + assert float_tilde_unless_roundtrip_str(np.inf, width=7, d=4) == "inf" + assert float_tilde_unless_roundtrip_str(np.nan, width=7, d=4) == "nan" diff --git a/test/test_round.py b/test/test_round.py new file mode 100644 index 0000000..2c6beb8 --- /dev/null +++ b/test/test_round.py @@ -0,0 +1,455 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. + +from typing import Type + +import ml_dtypes +import numpy as np +import pytest + +from gfloat import RoundMode, decode_float, round_float +from gfloat.formats import * + + +def test_round_p3109() -> None: + fi = format_info_p3109(4) + assert round_float(fi, 0.0068359375) == 0.0068359375 + assert round_float(fi, 0.0029296875) == 0.0029296875 + assert round_float(fi, 0.0078125) == 0.0078125 + assert round_float(fi, 0.017578125) == 0.017578125 + assert round_float(fi, 224.0) == 224.0 + assert round_float(fi, 240.0) == np.inf + + assert round_float(fi, 224.1, RoundMode.TowardPositive) == np.inf + + assert round_float(fi, 232.0) == 224.0 + assert round_float(fi, 232.0, RoundMode.TiesToAway) == np.inf + assert round_float(fi, 232.0, RoundMode.TowardZero) == 224.0 + assert round_float(fi, 232.0, RoundMode.TowardNegative) == 224.0 + assert round_float(fi, 232.0, RoundMode.TowardPositive) == np.inf + + assert round_float(fi, -232.0) == -224.0 + assert round_float(fi, -232.0, RoundMode.TiesToAway) == -np.inf + assert round_float(fi, -232.0, RoundMode.TowardZero) == -224.0 + assert round_float(fi, -232.0, RoundMode.TowardNegative) == -np.inf + assert round_float(fi, -232.0, RoundMode.TowardPositive) == -224.0 + + assert round_float(fi, 232.1) == np.inf + + +p4min = 2**-10 # smallest subnormal in p4 + + +@pytest.mark.parametrize( + "mode, vals", + ( + ( + RoundMode.TowardZero, + ( + (p4min, p4min), + (p4min / 4, 0.0), + (p4min / 2, 0.0), + (-p4min, -p4min), + (-p4min / 4, 0.0), + (-p4min / 2, 0.0), + (64.0, 64.0), + (63.0, 60.0), + (62.0, 60.0), + (-64.0, -64.0), + (-63.0, -60.0), + (-62.0, -60.0), + ), + ), + ( + RoundMode.TowardPositive, + ( + (p4min, p4min), + (p4min / 4, p4min), + (p4min / 2, p4min), + (-p4min, -p4min), + (-p4min / 4, 0.0), + (-p4min / 2, 0.0), + (64.0, 64.0), + (63.0, 64.0), + (62.0, 64.0), + (-64.0, -64.0), + (-63.0, -60.0), + (-62.0, -60.0), + ), + ), + ( + RoundMode.TowardNegative, + ( + (p4min, p4min), + (p4min / 4, 0.0), + (p4min / 2, 0.0), + (-p4min, -p4min), + (-p4min / 4, -p4min), + (-p4min / 2, -p4min), + (64.0, 64.0), + (63.0, 60.0), + (62.0, 60.0), + (-64.0, -64.0), + (-63.0, -64.0), + (-62.0, -64.0), + ), + ), + ( + RoundMode.TiesToEven, + ( + (p4min, p4min), + (p4min / 4, 0.0), + (p4min / 2, 0.0), + (-p4min, -p4min), + (-p4min / 4, 0.0), + (-p4min / 2, 0.0), + (64.0, 64.0), + (63.0, 64.0), + (62.0, 64.0), + (61.0, 60.0), + (-64.0, -64.0), + (-63.0, -64.0), + (-62.0, -64.0), + (-61.0, -60.0), + (-58.0, -56.0), + ), + ), + ( + RoundMode.TiesToAway, + ( + (p4min, p4min), + (p4min / 4, 0.0), + (p4min / 2, p4min), + (-p4min, -p4min), + (-p4min / 4, 0.0), + (-p4min / 2, -p4min), + (64.0, 64.0), + (63.0, 64.0), + (62.0, 64.0), + (61.0, 60.0), + (-64.0, -64.0), + (-63.0, -64.0), + (-62.0, -64.0), + (-61.0, -60.0), + (-58.0, -60.0), + ), + ), + ), +) +def test_round_p3109b(mode: RoundMode, vals: list) -> None: + fi = format_info_p3109(4) + + for sat in (True, False): + for val, expected in vals: + assert round_float(fi, val, mode, sat) == expected + + +p4max = 224.0 +p4maxup = 240.0 +p4maxhalfup = (p4max + p4maxup) / 2 + + +@pytest.mark.parametrize( + "modesat, vals", + ( + ( + (RoundMode.TowardZero, True), + ( + (p4max, p4max), + (p4maxhalfup, p4max), + (p4maxup, p4max), + (np.inf, p4max), + (-p4max, -p4max), + (-p4maxhalfup, -p4max), + (-p4maxup, -p4max), + (-np.inf, -p4max), + ), + ), + ( + (RoundMode.TowardZero, False), + ( + (p4max, p4max), + (p4maxhalfup, p4max), + (p4maxup, p4max), + (np.inf, np.inf), + (-p4max, -p4max), + (-p4maxhalfup, -p4max), + (-p4maxup, -p4max), + (-np.inf, -np.inf), + ), + ), + ( + (RoundMode.TowardPositive, True), + ( + (p4max, p4max), + (p4maxhalfup, p4max), + (p4maxup, p4max), + (np.inf, p4max), + (-p4max, -p4max), + (-p4maxhalfup, -p4max), + (-p4maxup, -p4max), + (-np.inf, -p4max), + ), + ), + ( + (RoundMode.TowardPositive, False), + ( + (p4max, p4max), + (p4maxhalfup, np.inf), + (p4maxup, np.inf), + (np.inf, np.inf), + (-p4max, -p4max), + (-p4maxhalfup, -p4max), + (-p4maxup, -p4max), + (-np.inf, -np.inf), + ), + ), + ( + (RoundMode.TowardNegative, True), + ( + (p4max, p4max), + (p4maxhalfup, p4max), + (p4maxup, p4max), + (np.inf, p4max), + (-p4max, -p4max), + (-p4maxhalfup, -p4max), + (-p4maxup, -p4max), + (-np.inf, -p4max), + ), + ), + ( + (RoundMode.TowardNegative, False), + ( + (p4max, p4max), + (p4maxhalfup, p4max), + (p4maxup, p4max), + (np.inf, np.inf), + (-p4max, -p4max), + (-p4maxhalfup, -np.inf), + (-p4maxup, -np.inf), + (-np.inf, -np.inf), + ), + ), + ( + (RoundMode.TiesToEven, True), + ( + (p4max, p4max), + (p4maxhalfup, p4max), + (p4maxup, p4max), + (np.inf, p4max), + (-p4max, -p4max), + (-p4maxhalfup, -p4max), + (-p4maxup, -p4max), + (-np.inf, -p4max), + ), + ), + ( + (RoundMode.TiesToEven, False), + ( + (p4max, p4max), + (p4maxhalfup, p4max), + (p4maxup, np.inf), + (np.inf, np.inf), + (-p4max, -p4max), + (-p4maxhalfup, -p4max), + (-p4maxup, -np.inf), + (-np.inf, -np.inf), + ), + ), + ( + (RoundMode.TiesToAway, True), + ( + (p4max, p4max), + (p4maxhalfup, p4max), + (p4maxup, p4max), + (np.inf, p4max), + (-p4max, -p4max), + (-p4maxhalfup, -p4max), + (-p4maxup, -p4max), + (-np.inf, -p4max), + ), + ), + ( + (RoundMode.TiesToAway, False), + ( + (p4max, p4max), + (p4maxhalfup, np.inf), + (p4maxup, np.inf), + (np.inf, np.inf), + (-p4max, -p4max), + (-p4maxhalfup, -np.inf), + (-p4maxup, -np.inf), + (-np.inf, -np.inf), + ), + ), + ), + ids=lambda x: f"{str(x[0])}-{'Sat' if x[1] else 'Inf'}" if len(x) == 2 else None, +) +def test_round_p3109_sat(modesat: tuple[RoundMode, bool], vals: list) -> None: + fi = format_info_p3109(4) + + for val, expected in vals: + assert round_float(fi, val, *modesat) == expected + + +def test_round_e5m2() -> None: + fi = format_info_ocp_e5m2 + + assert fi.max == 57344 + + assert round_float(fi, 1.5258789e-05) == 2**-16 + + # Default NONSAT rounding + assert round_float(fi, 57344.0) == 57344 + assert round_float(fi, 57344.1) == 57344 + assert round_float(fi, 61439.9) == 57344 + assert round_float(fi, 61440.0) == np.inf + assert round_float(fi, np.inf, sat=False) == np.inf + assert round_float(fi, -np.inf, sat=False) == -np.inf + assert np.isnan(round_float(fi, np.nan, sat=False)) + + # SAT rounding + assert round_float(fi, 57344.0, sat=True) == 57344 + assert round_float(fi, 57344.1, sat=True) == 57344 + assert round_float(fi, 61439.9, sat=True) == 57344 + assert round_float(fi, 61440.0, sat=True) == 57344 + assert round_float(fi, np.inf, sat=True) == 57344 + assert round_float(fi, -np.inf, sat=True) == -57344 + assert np.isnan(round_float(fi, np.nan, sat=True)) + + +def test_round_e4m3() -> None: + fi = format_info_ocp_e4m3 + + assert fi.max == 448 + + # Default NONSAT rounding + assert round_float(fi, 448.0) == 448 + assert round_float(fi, 448.1) == 448 + assert round_float(fi, 464.0) == 448 + assert np.isnan(round_float(fi, 464.01)) + assert np.isnan(round_float(fi, np.inf, sat=False)) + assert np.isnan(round_float(fi, -np.inf, sat=False)) + assert np.isnan(round_float(fi, np.nan, sat=False)) + + # SAT rounding + assert round_float(fi, 448.0, sat=True) == 448 + assert round_float(fi, 448.1, sat=True) == 448 + assert round_float(fi, 464.0, sat=True) == 448 + assert round_float(fi, 464.01, sat=True) == 448 + assert round_float(fi, np.inf, sat=True) == 448 + assert round_float(fi, -np.inf, sat=True) == -448 + assert np.isnan(round_float(fi, np.nan, sat=True)) + + +some_positive_codepoints = ( + 0x00, + 0x01, + 0x02, + 0x03, + 0x07, + 0x0F, + 0x17, + 0x21, + 0x33, + 0x40, + 0x53, + 0x65, + 0x70, +) + + +@pytest.mark.parametrize( + "fi", + [ + format_info_ocp_e5m2, + format_info_ocp_e4m3, + *p3109_formats, + ], +) +def test_round(fi: FormatInfo) -> None: + """ + Test rounding from values between exact binary8 values + For integer code point i, let + v0 = the float value at i + v1 = the float value at i+1, i.e. nextUp(v0) + dv = v1 - v0 + Then check that: + round(v0) == v0 + round(v0 + 0.3*dv) == v0 + round(v0 + 0.6*dv) == v1 + """ + for i in some_positive_codepoints: + v0 = decode_float(fi, i + 0).fval + v1 = decode_float(fi, i + 1).fval + if np.isfinite([v0, v1]).all(): + dv = v1 - v0 + np.testing.assert_equal(round_float(fi, v0), v0) + np.testing.assert_equal(round_float(fi, v0 + 0.3 * dv), v0) + np.testing.assert_equal(round_float(fi, v0 + 0.49 * dv), v0) + np.testing.assert_equal(round_float(fi, v0 + 0.51 * dv), v1) + np.testing.assert_equal(round_float(fi, v0 + 0.99 * dv), v1) + nearest_even = v0 if (i & 1 == 0) else v1 + np.testing.assert_equal(round_float(fi, v0 + 0.50 * dv), nearest_even) + + +test_formats = [ + (format_info_ocp_e5m2, ml_dtypes.float8_e5m2), + (format_info_ocp_e4m3, ml_dtypes.float8_e4m3fn), +] + + +def _linterp(a: float, b: float, t: float) -> float: + return a * (1 - t) + b * t + + +def _mlround(v: float, dty: Type) -> float: + """ + Round `v` using ml_dtypes library + """ + return np.array([v]).astype(dty).astype(float).item() + + +@pytest.mark.parametrize("fi,mldtype", test_formats) +def test_ml_dtype_compatible(fi: FormatInfo, mldtype: Type) -> None: + """ + Test that rounding is compatible with ml_dtypes + """ + for i in range(255): + # For each float v, check values at various interpolations + # between v and nextUp(v) + v0 = decode_float(fi, i + 0).fval + v1 = decode_float(fi, i + 1).fval + + for alpha in (0, 0.3, 0.5, 0.6, 0.9, 1.25): + v = _linterp(v0, v1, alpha) + if np.isfinite(v): + val = round_float(fi, v, RoundMode.TiesToEven) + + mlval = _mlround(v, mldtype) + np.testing.assert_equal(val, mlval) + + +@pytest.mark.parametrize("fi,mldtype", test_formats) +def test_round_ints(fi: FormatInfo, mldtype: Type) -> None: + for v in np.arange(289).astype(float): + val = round_float(fi, v) + + mlval = _mlround(v, mldtype) + np.testing.assert_equal(val, mlval) + + +@pytest.mark.parametrize("fi", all_formats) +def test_round_roundtrip(fi: FormatInfo) -> None: + if fi.bits <= 8: + step = 1 + elif fi.bits <= 16: + step = 13 + elif fi.bits <= 32: + step = 73013 + elif fi.bits <= 64: + step = (73013 << 32) + 39 + + for i in range(0, 2**fi.bits, step): + fv = decode_float(fi, i) + fval2 = round_float(fi, fv.fval) + np.testing.assert_equal(fval2, fv.fval)