diff --git a/.gitignore b/.gitignore
index 078e14b..84d8e21 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,9 @@
/venv*
.jupyter_cache
jupyter_execute
+__pycache__
# pixi environments
.pixi
*.egg-info
+_ipython-input*-profile
diff --git a/Makefile b/Makefile
index 46f35b9..7b1b39a 100644
--- a/Makefile
+++ b/Makefile
@@ -22,3 +22,12 @@ help:
# Live reload site documents for local development
livehtml:
sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+INTERSPHINX_TARGETS := \
+ intersphinx-python \
+ intersphinx-numpy \
+ intersphinx-ipython
+
+.PHONY: $(INTERSPHINX_TARGETS)
+$(INTERSPHINX_TARGETS): intersphinx-%:
+ cd content && python3 ls_intersphinx_targets.py $(subst intersphinx-,,$@) | less
diff --git a/content/conf.py b/content/conf.py
index 283ddf0..86a9452 100644
--- a/content/conf.py
+++ b/content/conf.py
@@ -37,6 +37,7 @@
# remove once sphinx_rtd_theme updated for contrast and accessibility:
"sphinx_rtd_theme_ext_color_contrast",
"sphinx.ext.todo",
+ "IPython.sphinxext.ipython_console_highlighting",
]
# Settings for myst_nb:
@@ -101,16 +102,31 @@
# :py:mod:`multiprocessing` to link straight to the Python docs of that module.
# List all available references:
# python -msphinx.ext.intersphinx https://docs.python.org/3/objects.inv
-# extensions.append('sphinx.ext.intersphinx')
-# intersphinx_mapping = {
-# #'python': ('https://docs.python.org/3', None),
+extensions.append('sphinx.ext.intersphinx')
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/3', None),
# #'sphinx': ('https://www.sphinx-doc.org/', None),
-# #'numpy': ('https://numpy.org/doc/stable/', None),
+ 'numpy': ('https://numpy.org/doc/stable/', None),
# #'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None),
# #'pandas': ('https://pandas.pydata.org/docs/', None),
# #'matplotlib': ('https://matplotlib.org/', None),
# 'seaborn': ('https://seaborn.pydata.org/', None),
-# }
+ 'ipython': ('https://ipython.readthedocs.io/en/stable/', None),
+}
+
+# sphinx-hoverxref
+extensions.append("hoverxref.extension")
+hoverxref_auto_ref = True
+hoverxref_domains = ["py"]
+hoverxref_role_types = {
+ 'hoverxref': 'modal',
+ 'ref': 'modal', # for hoverxref_auto_ref config
+ 'func': 'modal',
+ 'meth': 'modal',
+ 'mod': 'tooltip', # for Python Sphinx Domain
+ 'class': 'tooltip', # for Python Sphinx Domain
+}
+
# add few new directives
from sphinx_lesson.directives import _BaseCRDirective
diff --git a/content/example/scalene_web.png b/content/example/scalene_web.png
new file mode 100644
index 0000000..ff71b32
Binary files /dev/null and b/content/example/scalene_web.png differ
diff --git a/content/example/walk.py b/content/example/walk.py
index 6fbf286..c7cbd89 100644
--- a/content/example/walk.py
+++ b/content/example/walk.py
@@ -1,20 +1,56 @@
+"""A 1-D random walk.
+
+See also:
+- https://lectures.scientific-python.org/intro/numpy/auto_examples/plot_randomwalk.html
+
+"""
import numpy as np
+
def step():
import random
- return 1. if random.random() > .5 else -1.
+ return 1.0 if random.random() > 0.5 else -1.0
+
+
+def walk(n: int, dx: float = 1.0):
+ """The for-loop version.
+
+ Parameters
+ ----------
+ n: int
+ Number of time steps
+
+ dx: float
+ Step size. Default step size is unity.
+
+ """
+ xs = np.zeros(n)
-def walk(n):
- x = np.zeros(n)
- dx = 1. / n
for i in range(n - 1):
- x_new = x[i] + dx * step()
- if x_new > 5e-3:
- x[i + 1] = 0.
- else:
- x[i + 1] = x_new
- return x
+ x_new = xs[i] + dx * step()
+ xs[i + 1] = x_new
+
+ return xs
+
+
+def walk_vec(n: int, dx: float = 1.0):
+ """The vectorized version of :func:`walk` using numpy functions."""
+ import random
+ steps = np.array(random.sample([1, -1], k=n, counts=[10 * n, 10 * n]))
+
+ # steps = np.random.choice([1, -1], size=n)
+
+ dx_steps = dx * steps
+
+ # set initial condition to zero
+ dx_steps[0] = 0
+ # use cumulative sum to replicate time evolution of position x
+ xs = np.cumsum(dx_steps)
+
+ return xs
+
if __name__ == "__main__":
- n = 100000
- x = walk(n)
+ n = 1_000_000
+ _ = walk(n)
+ _ = walk_vec(n)
diff --git a/content/example/walk_ensemble_plot.py b/content/example/walk_ensemble_plot.py
new file mode 100644
index 0000000..8b09efa
--- /dev/null
+++ b/content/example/walk_ensemble_plot.py
@@ -0,0 +1,28 @@
+import numpy as np
+import matplotlib.pyplot as plt
+# See walk.py for the implementation
+import walk
+
+
+n_stories = 1000
+t_max = 200
+positions = np.empty((n_stories, t_max))
+
+for i_story in range(n_stories):
+ positions[i_story, :] = walk.walk(t_max, 1)
+
+# Determine the time evolution of the root-mean-square distance.
+sq_distance = positions**2
+# Root mean square distance
+rms_distance = np.sqrt(np.mean(sq_distance, axis=0))
+
+t = np.arange(t_max)
+
+fig, ax = plt.subplots()
+ax.plot(t, rms_distance, "g", label="ensemble RMS distance")
+ax.plot(t, np.sqrt(t), "k--", label=r"theoretical $\sqrt{\langle (\delta x)^2 \rangle}$")
+ax.set(xlabel="time", ylabel="x")
+ax.legend()
+
+plt.tight_layout()
+plt.show()
diff --git a/content/index.rst b/content/index.rst
index 807fe4f..dc572f2 100644
--- a/content/index.rst
+++ b/content/index.rst
@@ -68,6 +68,7 @@ and distributed computing.
pandas-extra
GPU-computing
parallel-computing_opt
+ optimization_opt
.. toctree::
@@ -136,11 +137,11 @@ Several examples and formulations are inspired by other open source educational
- `Python for Data Analysis `__
- `GTC2017-numba `__
- `IPython Cookbook `__
-- `Scipy Lecture Notes `__
+- `Scientific Python Lectures `__ (*previously known as, Scipy Lecture Notes*)
- `Machine Learning and Data Science Notebooks `__
- `Elegant SciPy `__
-- `A Comprehensive Guide to NumPy Data Types `__
-
+- `A Comprehensive Guide to NumPy Data Types `__
+- `Python performance workshop `__
Instructional Material
^^^^^^^^^^^^^^^^^^^^^^
diff --git a/content/ls_intersphinx_targets.py b/content/ls_intersphinx_targets.py
new file mode 100644
index 0000000..c222edc
--- /dev/null
+++ b/content/ls_intersphinx_targets.py
@@ -0,0 +1,9 @@
+import sys
+from sphinx.ext.intersphinx import inspect_main
+
+from conf import intersphinx_mapping
+
+
+library = sys.argv[1]
+url = intersphinx_mapping[library][0] + "/objects.inv"
+inspect_main([url])
diff --git a/content/optimization.rst b/content/optimization.rst
index f09ced9..fe12ca4 100644
--- a/content/optimization.rst
+++ b/content/optimization.rst
@@ -1,8 +1,7 @@
.. _performance:
-
-Profiling and Optimizing
-========================
+Benchmarking, profiling and optimizing
+======================================
.. objectives::
@@ -15,69 +14,271 @@ Profiling and Optimizing
- 30 min exercises
-Once your code is working reliably, you can start thinking of optimizing it.
-.. warning::
+.. keypoints::
+
+ - Once your code is working reliably, you can start thinking of optimizing it.
- Always measure the code before you start optimization. Don't base your optimization
- on theoretical consideration, otherwise you'll have surprises.
+ - Always measure the code before you start optimization. Don't base your optimization
+ on assumptions, otherwise you'll have surprises.
-Profilers
----------
+Benchmarking
+------------
+Benchmarking is a method of doing performance analysis for either the end-to-end execution of a whole
+program or a part of a program.
time
^^^^
-One of the easy way to profile the program is to use the time function:
+One of the easy way to benchmark is to use the time function:
.. code-block:: python
+ :emphasize-lines: 1, 8, 12-13
import time
+
+
+ def some_function():
+ ...
+
# start the timer
- start_time=time.time()
- # here are the code you would like to profile
- a = np.arange(1000)
- a = a ** 2
- # stop the timer
- end_time=time.time()
+ start_time = time.time()
+ # here are the code you would like to measure
+ result = some_function()
+ # stop the
+ end_time = time.time()
print("Runtime: {:.4f} seconds".format(end_time - start_time))
- # Runtime: 0.0001 seconds
-Timeit
+The IPython "magic" command
+:py:meth:`%time `
+can also be used to make a similar benchmark with less effort as follows:
+
+.. code-block:: ipython
+
+ %time some_function()
+
+
+timeit
^^^^^^
If you're using a Jupyter notebook, the best choice will be to use
-`%timeit `__ to time a small piece of code:
+:py:mod:`timeit` module or the
+IPython "magic" command
+:py:meth:`%timeit `
+to repeatedly time a small piece of code:
.. code-block:: ipython
+ :emphasize-lines: 5
import numpy as np
a = np.arange(1000)
%timeit a ** 2
- # 1.4 µs ± 25.1 ns per loop
-One can also use the cell magic ``%%timeit`` to benchmark a full cell.
+We will shortly see in an
+One can also use the cell magic
+:py:meth:`%timeit `
+to benchmark a full cell containing a block of code.
+
+Exercise 1
+----------
+
+.. exercise::
+
+ Start with the following code::
+
+ import numpy as np
+
+
+ a = np.arange(1000)
+
+ def square_sum(array):
+ return (a ** 2).sum()
+
+ #. Run ``%time square_sum(a)`` a couple of times. Do you get the same result?
+ #. Run ``%timeit square_sum(a)`` a couple of times. Do you get the same result?
+ #. (optional) execute the following benchmark and
+ compare it with output of question number 1.
+
+ .. code-block:: ipython
+
+ from urllib.request import urlopen
+
+ %time urlopen("https://raw.githubusercontent.com/ENCCS/hpda-python/refs/heads/main/content/data/tas1840.nc")
+
+.. solution::
+
+ 1. Run ``%time square_sum(a)`` a couple of times.
+
+ .. code-block:: ipython
+
+ In [1]: import numpy as np
+ ...:
+ ...:
+ ...: a = np.arange(1000)
+ ...:
+ ...: def square_sum(array):
+ ...: return (a ** 2).sum()
+ ...:
+
+ In [2]: %time square_sum(a)
+ CPU times: user 184 μs, sys: 5 μs, total: 189 μs
+ Wall time: 155 μs
+ Out[2]: np.int64(332833500)
+
+ In [3]: %time square_sum(a)
+ CPU times: user 74 μs, sys: 0 ns, total: 74 μs
+ Wall time: 77.7 μs
+ Out[3]: np.int64(332833500)
+
+ We get a rough estimate of how long it takes to execute a function for a given
+ input value. While useful, a few sample timings of the function ``square_sum()``,
+ does not represent a reproducible benchmark.
+ Subsequent measurements can result in different runtimes, due to the state of the
+ computer such as:
+
+ - what background processes are running,
+ - hyperthreading,
+ - memory and cache usage,
+ - CPU's temperature,
+
+ and many more factors, also collectively known as *system jitter*.
+
+ 2. Run ``%timeit square_sum(a)`` a couple of times.
+
+ .. code-block:: ipython
+
+ In [4]: %timeit square_sum(a)
+ 1.62 μs ± 55.4 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
+
+ In [5]: %timeit square_sum(a)
+ 1.6 μs ± 46.6 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
+
+ By making several measurements, we manage to reduce jitter and the measurement is more
+ reliable
+
+ .. note::
+
+ For long running calls, using ``%time`` instead of ``%timeit``; it is
+ less precise but faster
+
+ 3. (optional) Comparing benchmarks of ``%time square_sum(a)`` and
+ ``%time urlopen(...)``.
+
+ .. code-block:: ipython
+
+ In [6]: from urllib.request import urlopen
+
+ In [7]: %time urlopen("https://raw.githubusercontent.com/ENCCS/hpda-python/refs/heads/main/content/data/tas1840.nc")
+ CPU times: user 4.66 ms, sys: 974 μs, total: 5.63 ms
+ Wall time: 21.4 ms
+ Out[7]:
+
+ In (1) we see that the *CPU time* and *Wall time* is comparable which indicates that the
+ operation is CPU bound.
+
+ However in (3) we clearly see that **CPU time is lower
+ than wall-time**, from which we can deduce that it is not a CPU-bound operation.
+ In this particular case, the operation was I/O bound.
+ Some common I/O bound operations are network related, or due to
+ latency in filesystems or use of inefficient file storage formats.
+
+
+Profiling
+---------
+Profilers are applications which attach to the execution of the program, which in our case is done
+by the CPython interpreter and analyze the time taken for different portions of the code.
+Profilers help to identify performance bottlenecks in the code by showing
+
+- wall-time (*or start to end time that the user observes),
+- CPU and GPU time, and
+- memory usage patterns
+
+in **function/method/line of code** level granularity.
+
+Deterministic profilers vs. sampling profilers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. note::
- For long running calls, using ``%time`` instead of ``%timeit``; it is
- less precise but faster
+ *Deterministic profilers* are also called *tracing profilers*.
+**Deterministic profilers** record every function call and event in the program,
+logging the exact sequence and duration of events.
+ 👍 **Pros:**
+ - Provides detailed information on the program's execution.
+ - Deterministic: Captures exact call sequences and timings.
+ 👎 **Cons:**
+ - Higher overhead, slowing down the program.
+ - Can generate larger amount of data.
+
+**Sampling profilers** periodically samples the program's state (where it is
+and how much memory is used), providing a statistical view of where time is
+spent.
+
+ 👍 **Pros:**
+ - Lower overhead, as it doesn't track every event.
+ - Scales better with larger programs.
+
+ 👎 **Cons:**
+ - Less precise, potentially missing infrequent or short calls.
+ - Provides an approximation rather than exact timing.
+
+
+.. discussion::
+
+ *Analogy*: Imagine we want to optimize the Stockholm Länstrafik (SL) metro system
+ We wish to detect bottlenecks in the system to improve the service and for this we have
+ asked few passengers to help us by tracking their journey.
+
+ - **Deterministic**:
+ We follow every train and passenger, recording every stop
+ and delay. When passengers enter and exit the train, we record the exact time
+ and location.
+ - **Sampling**:
+ Every 5 minutes the phone notifies the passenger to note
+ down their current location. We then use this information to estimate
+ the most crowded stations and trains.
+
+In addition to the above distinctions, some profilers can also
+
+.. callout:: Examples of some profilers
+ :class: dropdown
+
+ CPU profilers:
+
+ - `cProfile and profile `__
+ - `line_profiler `__
+ - `py-spy `__
+
+ Memory profilers:
+
+ - `tracemalloc `__
+ - `memray `__
+
+ Both CPU and memory:
+
+ - `Scalene `__ (see optional course material on :ref:`scalene`)
+
+In the following sections, we will use :ref:`cProfile` and :ref:`line-profiler` to profile a Python program.
+cProfile is a deterministic (tracing) profiler built-in to the Python standard library
+and gives timings in function-level granularity.
+Line profiler is also deterministic and it provides timings in line-of-code granularity for few selected
+functions.
+
+.. _cProfile:
cProfile
^^^^^^^^
-For more complex code, one can use the `built-in python profilers
-`_, ``cProfile`` or ``profile``.
-
As a demo, let us consider the following code which simulates a random walk in one dimension
(we can save it as ``walk.py`` or download from :download:`here `):
+.. _walk-py-script:
.. literalinclude:: example/walk.py
We can profile it with ``cProfile``:
@@ -99,14 +300,14 @@ to a file with the ``-o`` flag and view it with `profile pstats module
`__
or profile visualisation tools like
`Snakeviz `__
-or `profile-viewer `__.
+or `tuna `__.
.. note::
Similar functionality is available in interactive IPython or Jupyter sessions with the
magic command `%%prun `__.
-
+.. _line-profiler:
Line-profiler
^^^^^^^^^^^^^
@@ -141,9 +342,10 @@ line-by-line breakdown of where time is being spent. For this information, we ca
- Based on the output, can you spot a mistake which is affecting performance?
- .. solution:: Line-profiling output
+ .. callout:: Line-profiling output
+ :class: dropdown
- .. code-block:: console
+ ::
Wrote profile results to walk.py.lprof
Timer unit: 1e-06 s
@@ -183,11 +385,133 @@ line-by-line breakdown of where time is being spent. For this information, we ca
which is called thousands of times! Moving the module import to the top level saves
considerable time.
+Exercise 2
+----------
+
+.. exercise::
+
+ Start by copying in the script :ref:`walk.py ` into Jupyter or
+ :download:`download it ` and execute ``%run walk.py``
+
+ #. Use ``%timeit`` magic command in Jupyter
+ to benchmark the functions ``walk(1_000_000)`` and ``walk_vec(1_000_000)``.
+ Which is faster?
+ #. Use::
+
+ %load_ext line_profiler
+ %lprun -f walk_vec walk_vec(1_000_000)
+
+ to apply line-profiler on ``walk_vec`` function. What is the bottleneck?
+ #. Modify the following lines to use to change how the ``steps`` array is initialized.
+ Execute it to re-initialize ``walk_vec``.
+ Redo the above benchmark and profiling. Does it improve the performance?
+
+ .. literalinclude:: example/walk.py
+ :pyobject: walk_vec
+ :emphasize-lines: 3-6
+
+.. solution::
+
+ #. Benchmarking ``walk`` and ``walk_vec``
+
+ .. code-block:: ipython
+
+ In [1]: %run walk.py
+
+ In [2]: %timeit walk(1_000_000)
+ 246 ms ± 2.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+ In [3]: %timeit walk_vec(1_000_000)
+ 397 ms ± 6.38 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+ Shows ``walk_vec``, although use vectorised operations, is slower.
+ Although it is not directly obvious why.
+
+ #. Profiling ``walk_vec``
+
+ .. code-block:: ipython
+
+ In [4]: %load_ext line_profiler
+
+ In [5]: %lprun -f walk_vec walk_vec(1_000_000)
+
+ illustrates how creating the ``steps`` array consumes more than 99% of the total time::
+
+ Timer unit: 1e-09 s
+
+ Total time: 1.14911 s
+ File: /home/ashwinmo/Sources/enccs/hpda-python/content/example/walk.py
+ Function: walk_vec at line 36
+
+ Line # Hits Time Per Hit % Time Line Contents
+ ==============================================================
+ 36 def walk_vec(n: int, dx: float = 1.0):
+ 37 """The vectorized version of :func:`walk` using numpy functions."""
+ 38 1 3374.0 3374.0 0.0 import random
+ 39 1 1145604166.0 1e+09 99.7 steps = np.array(random.sample([1, -1], k=n, counts=[10 * n, 10 * n]))
+ 40
+ 41 # steps = np.random.choice([1, -1], size=n)
+ 42
+ 43 1 1085779.0 1e+06 0.1 dx_steps = dx * steps
+ 44
+ 45 # set initial condition to zero
+ 46 1 3666.0 3666.0 0.0 dx_steps[0] = 0
+ 47 # use cumulative sum to replicate time evolution of position x
+ 48 1 2408374.0 2e+06 0.2 xs = np.cumsum(dx_steps)
+ 49
+ 50 1 507.0 507.0 0.0 return xs
+
+ This is because `random.sample` is part of the standard library and produces a list.
+
+ #. After modifying ``walk_vec`` function, we see it is much faster.
+
+ .. code-block:: ipython
+
+ In [7]: %ed walk.py
+ Editing... done. Executing edited code...
+
+ In [8]: %timeit walk(1_000_000)
+ 259 ms ± 14.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+ In [9]: %timeit walk_vec(1_000_000)
+ 6.49 ms ± 90.4 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+
+ In [10]: %lprun -f walk_vec walk_vec(1_000_000)
+
+ Line-profiler output::
+
+ Timer unit: 1e-09 s
+
+ Total time: 0.0078706 s
+ File: /home/ashwinmo/Sources/enccs/hpda-python/content/example/walk.py
+ Function: walk_vec at line 36
+
+ Line # Hits Time Per Hit % Time Line Contents
+ ==============================================================
+ 36 def walk_vec(n: int, dx: float = 1.0):
+ 37 """The vectorized version of :func:`walk` using numpy functions."""
+ 38 # import random
+ 39 # steps = np.array(random.sample([1, -1], k=n, counts=[10 * n, 10 * n]))
+ 40
+ 41 1 4323251.0 4e+06 54.9 steps = np.random.choice([1, -1], size=n)
+ 42
+ 43 1 1142060.0 1e+06 14.5 dx_steps = dx * steps
+ 44
+ 45 # set initial condition to zero
+ 46 1 2907.0 2907.0 0.0 dx_steps[0] = 0
+ 47 # use cumulative sum to replicate time evolution of position x
+ 48 1 2401730.0 2e+06 30.5 xs = np.cumsum(dx_steps)
+ 49
+ 50 1 649.0 649.0 0.0 return xs
Performance optimization
------------------------
Once we have identified the bottlenecks, we need to make the corresponding code go faster.
+The specific optimization can vary widely based on the computational load
+(how big or small the data is, and how frequently a function is executed)
+and particular problem at hand. Nevertheless, we present some common methods which can be
+handy to know.
Algorithm optimization
diff --git a/content/optimization_opt.rst b/content/optimization_opt.rst
new file mode 100644
index 0000000..73e7449
--- /dev/null
+++ b/content/optimization_opt.rst
@@ -0,0 +1,132 @@
+Benchmarking, profiling and optimizing (II)
+===========================================
+
+Profiling
+---------
+
+.. _scalene:
+Scalene
+~~~~~~~
+
+Scalene is a sampling profiler. In addition to timings , it can also give insight into:
+
+- CPU time spent in Python (interpreted), native (compiled) and system function calls
+- Memory usage and copy
+- GPU utilization
+- Memory leak detection
+
+Moreover, it adds minimal overhead due to profiling. The downside is the results are
+less reproducible, because it is a sampling profiler.
+
+Scalene can be used as a :ref:`scalene-cli`, or using :ref:`scalene-ipy` or in
+:ref:`scalene-web` as an interactive widget. Here are some examples profiling
+:download:`walk.py ` with Scalene.
+
+.. _scalene-cli:
+CLI tool
+^^^^^^^^
+
+.. code-block:: console
+
+ $ scalene --cli walk.py
+
+.. _scalene-ipy:
+IPython magic
+^^^^^^^^^^^^^
+This allows for profiling a specific function. For example to profile just `walk`, we do as follows:
+
+.. code-block:: ipython
+
+ In [1]: %load_ext scalene
+
+ In [2]: %run walk.py
+
+ In [3]: %scrun --cli walk(n)
+
+Gives the following output::
+
+ SCRUN MAGIC
+ /home/ashwinmo/Sources/enccs/hpda-python/content/example/walk.py: % of time = 100.00% (1.933s) out of 1.933s.
+ ╷ ╷ ╷ ╷ ╷
+ │Time │–––––– │–––––– │–––––– │
+ Line │Python │native │system │GPU │/home/ashwinmo/Sources/enccs/hpda-python/content/example/walk.py
+ ╺━━━━━━┿━━━━━━━┿━━━━━━━┿━━━━━━━┿━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸
+ 1 │ │ │ │ │"""A 1-D random walk.
+ 2 │ │ │ │ │
+ 3 │ │ │ │ │See also:
+ 4 │ │ │ │ │- https://lectures.scientific-python.org/intro/numpy/auto_examples/plot_randomwalk.html
+ 5 │ │ │ │ │
+ 6 │ │ │ │ │"""
+ 7 │ │ │ │ │import numpy as np
+ 8 │ │ │ │ │
+ 9 │ │ │ │ │
+ 10 │ 6% │ │ │ │def step():
+ 11 │ │ │ │ │ import random
+ 12 │ 7% │ 64% │ 13% │ │ return 1.0 if random.random() > 0.5 else -1.0
+ 13 │ │ │ │ │
+ 14 │ │ │ │ │
+ 15 │ │ │ │ │def walk(n: int, dx: float = 1.0):
+ 16 │ │ │ │ │ """The for-loop version.
+ 17 │ │ │ │ │
+ 18 │ │ │ │ │ Parameters
+ 19 │ │ │ │ │ ----------
+ 20 │ │ │ │ │ n: int
+ 21 │ │ │ │ │ Number of time steps
+ 22 │ │ │ │ │
+ 23 │ │ │ │ │ dx: float
+ 24 │ │ │ │ │ Step size. Default step size is unity.
+ 25 │ │ │ │ │
+ 26 │ │ │ │ │ """
+ 27 │ │ │ │ │ xs = np.zeros(n)
+ 28 │ │ │ │ │
+ 29 │ │ │ │ │ for i in range(n - 1):
+ 30 │ │ │ │ │ x_new = xs[i] + dx * step()
+ 31 │ 7% │ │ │ │ xs[i + 1] = x_new
+ 32 │ │ │ │ │
+ 33 │ │ │ │ │ return xs
+ 34 │ │ │ │ │
+ 35 │ │ │ │ │
+ 36 │ │ │ │ │def walk_vec(n: int, dx: float = 1.0):
+ 37 │ │ │ │ │ """The vectorized version of :func:`walk` using numpy functions."""
+ 38 │ │ │ │ │ import random
+ 39 │ │ │ │ │ steps = np.array(random.sample([1, -1], k=n, counts=[10 * n, 10 * n]))
+ 40 │ │ │ │ │
+ 41 │ │ │ │ │ # steps = np.random.choice([1, -1], size=n)
+ 42 │ │ │ │ │
+ 43 │ │ │ │ │ dx_steps = dx * steps
+ 44 │ │ │ │ │
+ 45 │ │ │ │ │ # set initial condition to zero
+ 46 │ │ │ │ │ dx_steps[0] = 0
+ 47 │ │ │ │ │ # use cumulative sum to replicate time evolution of position x
+ 48 │ │ │ │ │ xs = np.cumsum(dx_steps)
+ 49 │ │ │ │ │
+ 50 │ │ │ │ │ return xs
+ 51 │ │ │ │ │
+ 52 │ │ │ │ │
+ 53 │ │ │ │ │if __name__ == "__main__":
+ 54 │ │ │ │ │ n = 1_000_000
+ 55 │ │ │ │ │ _ = walk(n)
+ 56 │ │ │ │ │ _ = walk_vec(n)
+ 57 │ │ │ │ │
+ │ │ │ │ │
+ ╶──────┼───────┼───────┼───────┼───────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╴
+ │ │ │ │ │function summary for /home/ashwinmo/Sources/enccs/hpda-python/content/example/walk.py
+ 10 │ 14% │ 69% │ 9% │ │step
+ 15 │ 7% │ │ │ │walk
+ ╵ ╵ ╵ ╵
+
+If you run the magic command in Jupyter you can use `%scrun walk(n)` instead and it should an output similar to the :ref:`scalene-web` below.
+
+.. _scalene-web:
+Web interface
+^^^^^^^^^^^^^
+
+Running
+
+.. code-block:: console
+
+ $ scalene walk.py
+
+opens up the following web app:
+
+.. figure:: example/scalene_web.png
\ No newline at end of file
diff --git a/requirements.in b/requirements.in
index e49ae8a..2843de0 100644
--- a/requirements.in
+++ b/requirements.in
@@ -4,3 +4,4 @@ sphinx_rtd_theme_ext_color_contrast
myst_nb
sphinx-lesson>=0.8.19
pillow
+sphinx-hoverxref
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 6e084f6..4ef4c53 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -176,6 +176,7 @@ sphinx==7.4.7
# myst-parser
# sphinx-autobuild
# sphinx-copybutton
+ # sphinx-hoverxref
# sphinx-lesson
# sphinx-rtd-theme
# sphinx-rtd-theme-ext-color-contrast
@@ -186,6 +187,8 @@ sphinx-autobuild==2024.10.3
# via sphinx-lesson
sphinx-copybutton==0.5.2
# via sphinx-lesson
+sphinx-hoverxref==1.4.2
+ # via -r requirements.in
sphinx-lesson==0.8.19
# via -r requirements.in
sphinx-minipres==0.2.1
@@ -209,7 +212,9 @@ sphinxcontrib-devhelp==2.0.0
sphinxcontrib-htmlhelp==2.1.0
# via sphinx
sphinxcontrib-jquery==4.1
- # via sphinx-rtd-theme
+ # via
+ # sphinx-hoverxref
+ # sphinx-rtd-theme
sphinxcontrib-jsmath==1.0.1
# via sphinx
sphinxcontrib-qthelp==2.0.0