From e5d3fbee19d5211cc81c6cdeb350b0d2aded47d5 Mon Sep 17 00:00:00 2001
From: Mark <mark@betalupi.com>
Date: Wed, 22 Jan 2025 21:50:31 -0800
Subject: [PATCH] FIR draft

---
 src/Advanced/Fast Inverse Root/main.typ       |  21 ++
 src/Advanced/Fast Inverse Root/meta.toml      |   7 +
 .../Fast Inverse Root/parts/00 int.typ        |  89 ++++++++
 .../Fast Inverse Root/parts/01 float.typ      | 200 ++++++++++++++++++
 .../Fast Inverse Root/parts/02 approx.typ     |  87 ++++++++
 .../Fast Inverse Root/parts/03 quake.typ      | 119 +++++++++++
 6 files changed, 523 insertions(+)
 create mode 100644 src/Advanced/Fast Inverse Root/main.typ
 create mode 100644 src/Advanced/Fast Inverse Root/meta.toml
 create mode 100644 src/Advanced/Fast Inverse Root/parts/00 int.typ
 create mode 100644 src/Advanced/Fast Inverse Root/parts/01 float.typ
 create mode 100644 src/Advanced/Fast Inverse Root/parts/02 approx.typ
 create mode 100644 src/Advanced/Fast Inverse Root/parts/03 quake.typ

diff --git a/src/Advanced/Fast Inverse Root/main.typ b/src/Advanced/Fast Inverse Root/main.typ
new file mode 100644
index 0000000..cd0ad22
--- /dev/null
+++ b/src/Advanced/Fast Inverse Root/main.typ	
@@ -0,0 +1,21 @@
+#import "@local/handout:0.1.0": *
+
+#show: doc => handout(
+  doc,
+  group: "Advanced 2",
+
+  title: [Fast Inverse Root],
+  by: "Mark",
+  subtitle: "Based on a handout by Bryant Mathews",
+)
+
+#include "parts/00 int.typ"
+#pagebreak()
+
+#include "parts/01 float.typ"
+#pagebreak()
+
+#include "parts/02 approx.typ"
+#pagebreak()
+
+#include "parts/03 quake.typ"
diff --git a/src/Advanced/Fast Inverse Root/meta.toml b/src/Advanced/Fast Inverse Root/meta.toml
new file mode 100644
index 0000000..5a1bdbd
--- /dev/null
+++ b/src/Advanced/Fast Inverse Root/meta.toml	
@@ -0,0 +1,7 @@
+[metadata]
+title = "Fast Inverse Square Root"
+
+
+[publish]
+handout = true
+solutions = true
diff --git a/src/Advanced/Fast Inverse Root/parts/00 int.typ b/src/Advanced/Fast Inverse Root/parts/00 int.typ
new file mode 100644
index 0000000..209179d
--- /dev/null
+++ b/src/Advanced/Fast Inverse Root/parts/00 int.typ	
@@ -0,0 +1,89 @@
+#import "@local/handout:0.1.0": *
+
+= Integers
+
+#definition()
+A _bit string_ is a string of binary digits. \
+In this handout, we'll denote bit strings with the prefix `0b`. \
+That is, $1010 =$ "one thousand and one," while $#text([`0b1001`]) = 2^3 + 2^0 = 9$
+
+#v(2mm)
+We will seperate long bit strings with underscores for readability. \
+Underscores have no meaning: $#text([`0b1111_0000`]) = #text([`0b11110000`])$.
+
+#problem()
+What is the value of the following bit strings, if we interpret them as integers in base 2?
+- `0b0001_1010`
+- `0b0110_0001`
+
+#solution([
+  - $#text([`0b0001_1010`]) = 2 + 8 + 16 = 26$
+  - $#text([`0b0110_0001`]) = 1 + 32 + 64 = 95$
+])
+
+#v(1fr)
+#pagebreak()
+
+#definition()
+We can interpret a bit string in any number of ways. \
+One such interpretation is the _signed integer_, or `int` for short. \
+`ints` allow us to represent negative and positive integers using 32-bit strings.
+
+#v(2mm)
+
+The first bit of an `int` tells us its sign:
+- if the first bit is `1`, the _int_ represents a negative number;
+- if the first bit is `0`, it represents a positive number.
+
+We do not need negative numbers today, so we will assume that the first bit is always zero. \
+#note([If you'd like to know how negative integers are written, look up "two's complement} after class.])
+
+#v(2mm)
+
+The value of a positive signed `long` is simply the value of its binary digits:
+- $#text([`0b00000000_00000000_00000000_00000000`]) = 0$
+- $#text([`0b00000000_00000000_00000000_00000011`]) = 3$
+- $#text([`0b00000000_00000000_00000000_00100000`]) = 32$
+- $#text([`0b00000000_00000000_00000000_10000010`]) = 130$
+
+#problem()
+What is the largest number we can represent with a 32-bit `int`?
+
+#solution([
+  $#text([`0b01111111_11111111_11111111_11111111`]) = 2^(31)$
+])
+
+#v(1fr)
+
+#problem()
+What is the smallest possible number we can represented with a 32-bit `int`? \
+#hint([
+  You do not need to know _how_ negative numbers are represented. \
+  Assume that we do not skip any integers, and don't forget about zero.
+])
+
+#solution([
+  There are $2^(64)$ possible 32-bit patterns,
+  of which 1 represents zero and $2^(31)$ represent positive numbers.
+  We therefore have access to $2^(64) - 1 - 2^(31)$ negative numbers,
+  giving us a minimum representable value of $-2^(31) + 1$.
+])
+
+#v(1fr)
+
+#problem()
+Find the value of each of the following 32-bit `int`s:
+- `0b00000000_00000000_00000101_00111001`
+- `0b00000000_00000000_00000001_00101100`
+- `0b00000000_00000000_00000100_10110000`
+#hint([The third conversion is easy---look carefully at the second.])
+
+#solution([
+  - $#text([`0b00000000_00000000_00000101_00111001`]) = 1337$
+  - $#text([`0b00000000_00000000_00000001_00101100`]) = 300$
+  - $#text([`0b00000000_00000000_00000010_01011000`]) = 1200$
+])
+Notice that the third int is the second shifted left twice (i.e, multiplied by 4)
+])
+
+#v(2fr)
diff --git a/src/Advanced/Fast Inverse Root/parts/01 float.typ b/src/Advanced/Fast Inverse Root/parts/01 float.typ
new file mode 100644
index 0000000..0d9ec5e
--- /dev/null
+++ b/src/Advanced/Fast Inverse Root/parts/01 float.typ	
@@ -0,0 +1,200 @@
+#import "@local/handout:0.1.0": *
+#import "@preview/cetz:0.3.1"
+
+= Floats
+#definition()
+_Binary decimals_#footnote["decimal" is a misnomer, but that's ok.] are very similar to base-10 decimals. \
+In base 10, we interpret place value as follows:
+- $0.1 = 10^(-1)$
+- $0.03 = 3 times 10^(-2)$
+- $0.0208 = 2 times 10^(-2) + 8 times 10^(-4)$
+
+#v(5mm)
+
+We can do the same in base 2:
+- $#text([`0.1`]) = 2^(-1) = 0.5$
+- $#text([`0.011`]) = 2^(-2) + 2^(-3) = 0.375$
+- $#text([`101.01`]) = 5.125$
+
+#v(5mm)
+
+#problem()
+Rewrite the following binary decimals in base 10: \
+#note([You may leave your answer as a fraction.])
+- `1011.101`
+- `110.1101`
+
+
+#v(1fr)
+#pagebreak()
+
+#definition()
+Another way we can interpret a bit string is as a _signed floating-point decimal_, or a `float` for short. \
+Floats represent a subset of the real numbers, and are interpreted as follows: \
+#note([The following only applies to floats that consist of 32 bits. We won't encounter any others today.])
+
+#align(
+  center,
+  box(
+    inset: 2mm,
+    cetz.canvas({
+      import cetz.draw: *
+
+      let chars = (
+        `0`,
+        `b`,
+        `0`,
+        `_`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `_`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `_`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `_`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+        `0`,
+      )
+
+      let x = 0
+      for c in chars {
+        content((x, 0), c)
+        x += 0.25
+      }
+
+      let y = -0.4
+      line((0.3, y), (0.65, y))
+      content((0.45, y - 0.2), [s])
+
+      line((0.85, y), (2.9, y))
+      content((1.9, y - 0.2), [exponent])
+
+      line((3.10, y), (9.4, y))
+      content((6.3, y - 0.2), [fraction])
+    }),
+  ),
+)
+
+- The first bit denotes the sign of the float's value
+  We'll label it $s$. \
+  If $s = #text([`1`])$, this float is negative; if $s = #text([`0`])$, it is positive.
+
+- The next eight bits represent the _exponent_ of this float.
+  #note([(we'll see what that means soon)]) \
+  We'll call the value of this eight-bit binary integer $E$. \
+  Naturally, $0 <= E <= 255$ #note([(since $E$ consist of eight bits.)])
+
+- The remaining 23 bits represent the _fraction_ of this float, which we'll call $F$. \
+  These 23 bits are interpreted as the fractional part of a binary decimal. \
+  For example, the bits `0b10100000_00000000_00000000` represents $0.5 + 0.125 = 0.625$.
+
+
+#problem(label: "floata")
+Consider `0b01000001_10101000_00000000_00000000`. \
+Find the $s$, $E$, and $F$ we get if we interpret this bit string as a `float`. \
+#note([Leave $F$ as a sum of powers of two.])
+
+#solution([
+  $s = 0$ \
+  $E = 258$ \
+  $F = 2^31+2^19 = 2,621,440$
+])
+
+#v(1fr)
+
+
+#definition()
+The final value of a float with sign $s$, exponent $E$, and fraction $F$ is
+
+$
+  (-1)^s times 2^(E - 127) times (1 + F / (2^(23)))
+$
+
+Notice that this is very similar to decimal scientific notation, which is written as
+
+$
+  (-1)^s times 10^(e) times (f)
+$
+
+#problem()
+Consider `0b01000001_10101000_00000000_00000000`. \
+This is the same bit string we used in @floata. \
+
+#v(2mm)
+
+What value do we get if we interpret this bit string as a float? \
+#hint([$21 div 16 = 1.3125$])
+
+#solution([
+  This is 21:
+  $
+    2^(131) times (1 + (2^(21) + 2^(19)) / (2^(23)))
+    = 2^(4) times (1 + 0.25 + 0.0625)
+    = 16 times (1.3125)
+    = 21
+  $
+])
+
+#v(1fr)
+#pagebreak()
+
+#problem()
+Encode $12.5$ as a float. \
+#hint([$12.5 div 8 = 1.5625$])
+
+#solution([
+  $
+    12.5
+    = 8 times 1.5625
+    = 2^(3) times (1 + (0.5 + 0.0625))
+    = 2^(130) times (1 + (2^(22) + 2^(19)) / (2^(23)))
+  $
+
+  which is `0b01000001_01001000_00000000_00000000`. \
+])
+
+
+#v(1fr)
+
+#definition()
+Say we have a bit string $x$. \
+We'll let $x_f$ denote the value we get if we interpret $x$ as a float, \
+and we'll let $x_i$ denote the value we get if we interpret $x$ an integer.
+
+#problem()
+Let $x = #text[`0b01000001_01001000_00000000_00000000`]$. \
+What are $x_f$ and $x_i$? #note([As always, you may leave big numbers as powers of two.])
+#solution([
+  $x_f = 12.5$
+
+  #v(2mm)
+
+  $x_i = 2^30 + 2^24 + 2^22 + 2^19 = 11,095,237,632$
+])
+
+#v(1fr)
diff --git a/src/Advanced/Fast Inverse Root/parts/02 approx.typ b/src/Advanced/Fast Inverse Root/parts/02 approx.typ
new file mode 100644
index 0000000..0ed9211
--- /dev/null
+++ b/src/Advanced/Fast Inverse Root/parts/02 approx.typ	
@@ -0,0 +1,87 @@
+#import "@local/handout:0.1.0": *
+#import "@preview/cetz:0.3.1"
+#import "@preview/cetz-plot:0.1.0": plot, chart
+
+= Integers and Floats
+
+#generic("Observation:")
+For small values of $a$, $log_2(1 + a)$ is approximately equal to $a$. \
+Note that this equality is exact for $a = 0$ and $a = 1$, since $log_2(1) = 0$ and $log_2(2) = 1$.
+
+#v(2mm)
+
+We'll add a "correction term" $epsilon$ to this approximation, so that $log_2(1 + a) approx a + epsilon$.
+
+#cetz.canvas({
+  import cetz.draw: *
+
+  let f1(x) = calc.log(calc.abs(x + 1), base: 2)
+  let f2(x) = x
+
+  // Set-up a thin axis style
+  set-style(axes: (stroke: .5pt, tick: (stroke: .5pt)))
+
+
+  plot.plot(
+    size: (8, 8),
+    x-tick-step: 0.2,
+    y-tick-step: 0.2,
+    y-min: 0,
+    y-max: 1,
+    x-min: 0,
+    x-max: 1,
+    legend: none,
+    axis-style: "scientific-auto",
+
+    {
+      let domain = (0, 10)
+
+      plot.add-fill-between(
+        f1,
+        f2,
+        domain: domain,
+        style: (stroke: none, fill: luma(75%)),
+      )
+
+      plot.add(
+        f1,
+        domain: domain,
+        label: $log(1+x)$,
+        style: (stroke: black),
+      )
+      plot.add(f2, domain: domain, label: $x$, style: (stroke: black))
+    },
+  )
+})
+
+TODO: why? Graphs.
+
+#problem(label: "convert")
+Use the fact that $log_2(1 + a) approx a + epsilon$ to approximate $log_2(x_f)$ in terms of $x_i$. \
+
+#v(5mm)
+
+Namely, show that
+$
+  log_2(x_f) = (x_i) / (2^23) - 127 + epsilon
+$
+for some correction term term $epsilon$ \
+#note([
+  In other words, we're finding an expression for $x$ as a float
+  in terms of $x$ as an int.
+])
+
+#solution([
+  Let $E$ and $F$ be the exponent and float bits of $x_f$. \
+  We then have:
+  $
+    log_2(x_f)
+    &= log_2 ( 2^(E-127) times (1 + (F) / (2^23)) ) \
+    &= E - 127 + log_2(1 + F / (2^23)) \
+    & approx E-127 + F / (2^23) + epsilon \
+    &= 1 / (2^23)(2^23 E + F) - 127 + epsilon \
+    &= 1 / (2^23)(x_i) - 127 + epsilon
+  $
+])
+
+#v(1fr)
diff --git a/src/Advanced/Fast Inverse Root/parts/03 quake.typ b/src/Advanced/Fast Inverse Root/parts/03 quake.typ
new file mode 100644
index 0000000..5689863
--- /dev/null
+++ b/src/Advanced/Fast Inverse Root/parts/03 quake.typ	
@@ -0,0 +1,119 @@
+#import "@local/handout:0.1.0": *
+
+= The Fast Inverse Square Root
+
+
+The following code is present in _Quake III Arena_ (1999):
+```c
+float Q_rsqrt( float number ) {
+    long i = * ( long * ) &number;
+    i = 0x5f3759df - ( i >> 1 );
+    return * ( float * ) &i;
+}
+```
+
+This code defines a function `Q_rsqrt` that consumes a float `number` and approximates its inverse square root.
+If we rewrite this using notation we're familiar with, we get the following:
+$
+  #text[`Q_sqrt`] (n_f) = 6240089 - (n_i div 2)
+  approx 1 / sqrt(n_f)
+$
+
+#note([
+  `0x5f3759df` is $6240089$ in hexadecimal. \
+  It is a magic number hard-coded into `Q_sqrt`.
+])
+
+#v(2mm)
+
+Our goal in this section is to understand why this works:
+- How does Quake approximate $1 / sqrt(x)$ by simply subtracting and dividing by two?
+- What's special about $6240089$?
+
+
+#problem()
+Using basic log rules, rewrite $log_2(1 / sqrt(x))$ in terms of $log_2(x)$.
+
+#solution([
+  $
+    log_2(1 / sqrt(x)) = (-1) / (2)log_2(x)
+  $
+])
+
+#v(1fr)
+#pagebreak()
+
+#generic("Setup:")
+We are now ready to show that $#text[`Q_sqrt`] (x) approx 1/sqrt(x)$. \
+For convenience, let's call the bit string of the inverse square root $r$. \
+In other words,
+$
+  r_f := 1 / (sqrt(n_f))
+$
+This is the value we want to approximate.
+
+#problem(label: "finala")
+Find an approximation for $log_2(r_f)$ in terms of $n_i$ and $epsilon$ \
+#note[Remember, $epsilon$ is the correction constant in our approximation of $log_2(1 + a)$.]
+
+#solution[
+  $
+    log_2(r_f)
+    = log_2(1 / sqrt(n_f))
+    = (-1) / 2 log_2(n_f)
+    approx (-1) / 2 ( (n_i) / (2^23) + epsilon - 127 )
+  $
+]
+
+#v(1fr)
+
+#problem(label: "finalb")
+Let's call the "magic number" in the code above $kappa$, so that
+$
+  #text[`Q_sqrt`] (n_f) = kappa - (n_i div 2)
+$
+Use @convert and @finala to show that $#text[`Q_sqrt`] (n_f) approx r_i$
+
+#solution[
+  From @convert, we know that
+  $
+    log_2(r_f) approx (r_i) / (2^23) + epsilon - 127
+  $
+
+  #note[
+    Our approximation of $log_2(1+a)$ uses a fixed correction constant, \
+    so the $epsilon$ here is equivalent to the $epsilon$ in @finala.
+  ]
+
+  Combining this with the result from \ref{finala}, we get:
+  $
+    (r_i) / (2^23) + epsilon - 127
+    &approx (-1) / (2) ( (n_i) / (2^23) + epsilon - 127) \
+    (r_i) / (2^23)
+    &approx (-1) / (2) ( (n_i) / (2^23)) + 3 / 2 (epsilon - 127) \
+    r_i
+    &approx (-1) / 2 (n_i) + 2^23 3 / 2(epsilon - 127)
+    = 2^23 3 / 2 (epsilon - 127) - (n_i) / 2
+  $
+
+  #v(2mm)
+
+  This is exactly what we need! If we set $kappa$ to $(2^24)/3 (epsilon - 127)$, then
+  $
+    r_i approx kappa - (n_i div 2) = #text[`Q_sqrt`] (n_f)
+  $
+]
+
+#v(1fr)
+
+#problem()
+What is the exact value of $kappa$ in terms of $epsilon$? \
+#hint[Look at @finalb. We already found it!]
+
+#solution[
+  This problem makes sure our students see that
+  $kappa = (2^24)/3(epsilon - 127)$. \
+  See the solution to @finalb.
+]
+
+#if_no_solutions(v(2cm))