From cf60d6734a74ee670084a490db99004fce112503 Mon Sep 17 00:00:00 2001
From: Mark <mark@betalupi.com>
Date: Tue, 4 Mar 2025 19:18:21 -0800
Subject: [PATCH] Comments

---
 Makefile                           |  48 +++---------
 README.md                          |   5 +-
 bios/main.asm                      |  59 +++++++-------
 bios/print.asm                     |  29 +++----
 bios/protected_mode.asm            |  46 -----------
 bios/stage1.asm                    | 121 +++++++++++++++--------------
 bios/stage2.asm                    |  50 ++++++++++--
 bios/thunk.asm                     |   7 ++
 tetros/Cargo.toml                  |   2 +-
 tetros/linkers/x86-unknown-none.ld |   5 +-
 tetros/src/drivers/pic.rs          |  30 +++++--
 tetros/src/drivers/serial.rs       |   7 ++
 tetros/src/drivers/vga.rs          |   4 +-
 tetros/src/game/board.rs           |  46 ++++++-----
 tetros/src/game/falling.rs         |  22 ++----
 tetros/src/game/mod.rs             |   3 +
 tetros/src/idt/stackframe.rs       |   8 +-
 tetros/src/idt/table.rs            |  20 ++---
 tetros/src/idt/virtaddr.rs         |  42 +---------
 tetros/src/lib.rs                  |  51 +++++++++---
 tetros/src/os/eflags.rs            |   1 +
 tetros/src/os/panic.rs             |   6 +-
 tetros/src/os/util.rs              |   8 +-
 23 files changed, 303 insertions(+), 317 deletions(-)
 delete mode 100644 bios/protected_mode.asm

diff --git a/Makefile b/Makefile
index 45f5b6c..c10cdec 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ BUILD=./build
 
 # Default rule
 .PHONY: default
-default: all
+default: $(BUILD)/disk.img
 
 # Remove all build files
 .PHONY: clean
@@ -10,23 +10,18 @@ clean:
 	rm -drf $(BUILD)
 	cd tetros; cargo clean
 
-# Make everything
-# (but don't run qemu)
-.PHONY: all
-all: img
-
 #
-# MARK: boot
+# MARK: disk
 #
 
-# Compile tetros as library
+# Compile tetros as a library
+# (so that we can link it with a custom linker script)
 LIB_SRC = ./tetros/Cargo.toml ./tetros/Cargo.lock $(shell find ./tetros/src -type f)
 $(BUILD)/tetros.lib: $(LIB_SRC)
 	@mkdir -p $(BUILD)
 	cd tetros && \
 		env RUSTFLAGS="-C soft-float" \
 		cargo rustc \
-			--manifest-path="./Cargo.toml" \
 			-Z build-std=core \
 			-Z build-std-features=compiler-builtins-mem \
 			--target "./targets/x86-unknown-none.json" \
@@ -35,7 +30,7 @@ $(BUILD)/tetros.lib: $(LIB_SRC)
 			-- \
 			--emit link="$(CURDIR)/$@"
 
-# Link tetros
+# Link tetros using custom linker script
 BIOS_LD = ./tetros/linkers/x86-unknown-none.ld
 $(BUILD)/tetros.elf: $(BUILD)/tetros.lib $(BIOS_LD)
 	ld \
@@ -49,13 +44,13 @@ $(BUILD)/tetros.elf: $(BUILD)/tetros.lib $(BIOS_LD)
 	objcopy --only-keep-debug "$@" "$@.sym"
 	objcopy --strip-debug "$@"
 
-# Wrap tetros in three-stage BIOS loader
+# Wrap tetros in BIOS loader
 # Parameters:
 # - BIOS_SRC: source directory of bios assembly
 # - STAGE2_SECTOR: the index of the first sector of the stage 2 binary on the disk
 BIOS_SRC = ./bios
 STAGE2_SECTOR = 1
-$(BUILD)/bios.bin: $(wildcard $(BIOS_SRC)/*.asm) $(BUILD)/tetros.elf 
+$(BUILD)/disk.img: $(wildcard $(BIOS_SRC)/*.asm) $(BUILD)/tetros.elf 
 	@mkdir -p "$(BUILD)"
 	nasm \
 		-f bin \
@@ -66,30 +61,12 @@ $(BUILD)/bios.bin: $(wildcard $(BIOS_SRC)/*.asm) $(BUILD)/tetros.elf
 		-i "$(BIOS_SRC)" \
 		"$(BIOS_SRC)/main.asm"
 
-# Extract full mbr (first 512 bytes)
-$(BUILD)/mbr.bin: $(BUILD)/bios.bin
-	@mkdir -p "$(BUILD)"
-	@echo ""
-	dd if="$<" bs=512 count=1 of="$@"
-
-# Extract stage 2 (rest of file)
-$(BUILD)/stage2.bin: $(BUILD)/bios.bin
-	@mkdir -p "$(BUILD)"
-	@echo ""
-	dd if="$<" bs=512 skip=1 of="$@"
-
 #
-# MARK: bundle
+# MARK: qemu
+#
+# Do not use `-enable-kvm` or `-cpu host`,
+# this confuses gdb.
 #
-
-# Make full disk image
-.PHONY: img
-img: $(BUILD)/disk.img
-$(BUILD)/disk.img: $(BUILD)/mbr.bin $(BUILD)/stage2.bin
-	@mkdir -p $(BUILD)
-	@echo ""
-	dd if="$(BUILD)/mbr.bin" of=$@ conv=notrunc bs=512
-	dd if="$(BUILD)/stage2.bin" of=$@ conv=notrunc seek=$(STAGE2_SECTOR) bs=512
 
 .PHONY: qemu
 qemu: $(BUILD)/disk.img
@@ -128,6 +105,3 @@ qemu-gdb: $(BUILD)/disk.img
 		-fda "$<" \
 		-gdb tcp::26000 \
 		-S
-
-# Do not use `-enable-kvm` or `-cpu host`,
-# this confuses gdb.
\ No newline at end of file
diff --git a/README.md b/README.md
index 0b0b32b..35c50ef 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,11 @@
 # TetrOS: bare-metal tetris
 
-## TODO:
-- Fix stage 1 loader
-
 ## Features
 - Compiles to a standalone disk image
 - Written from scratch using only Nasm and Rust
 - Custom BIOS bootloader
 - 32-bit x86 OS
-- 🌟 Detailed comments. Read the [makefile](./Makefile), then start in [`./bios/main.asm`](./bios/main.asm).
+- Detailed comments. Read the [makefile](./Makefile), then start in [`./bios/main.asm`](./bios/main.asm).
 
 
 ## Non-Features
diff --git a/bios/main.asm b/bios/main.asm
index 50c62ba..3193bb7 100644
--- a/bios/main.asm
+++ b/bios/main.asm
@@ -1,30 +1,25 @@
 sectalign off
 
-; This program expects two external macros:
+; The following code expects two external macros:
 ; STAGE3, a path to the stage3 binary
 ; STAGE2_SECTOR, the location of stage 2
 ;   on the disk, in 512-byte sectors.
-; On a gpt disk, this is probably 34.
-
-; Stage 1 is MBR code, and should fit in LBA 0
-; (512 bytes). Layout is as follows:
-; (Format is `offset, length: purpose`)
-; 0, 424: x86 boot code
-; 440, 4: Unique disk signature
-; 444, 2: unknown
-; 446, 16*4: Array of four legacy MBR records
-; 510, 2: signature 0x55 0xAA
-; 512 to end  of logical block: reserved
-; 
-; See https://uefi.org/specs/UEFI/2.10/05_GUID_Partition_Table_Format.html
+;
+; Both of these are set in the makefile.
 
+; BIOS loads stage 1 at 0x7C00
 ORG 0x7C00
 SECTION .text
 
-; stage 1 is sector 0, loaded into memory at 0x7C00
+; Stage 1 is MBR code, and should fit in LBA 0
+; (i.e, in the first 512 bytes).
 %include "stage1.asm"
 
 ; Stage 1 is at most 440 bytes
+; This limit is set by the GPT spec.
+; See https://uefi.org/specs/UEFI/2.10/05_GUID_Partition_Table_Format.html
+;
+; This `times` will throw an error if the subtraction is negative.
 times 440-($-$$) db 0
 db 0xee
 
@@ -32,30 +27,38 @@ db 0xee
 times 510-($-$$) db 0
 
 ; MBR signature.
-; This isn't loaded into memory, it's
-; only here for debugging.
+; This tells the BIOS that this disk is bootable.
 db 0x55
 db 0xaa
 
+
+; Include stage 2. This is loaded into memory by stage 1.
+; (stage 1 loads both stage 2 and stage 3)
+;
+; Stage 2 sets up protected mode, sets up the GDT,
+; and initializes a minimal environment for stage 3.
+;
+; On a "real" boot disk, this data will not immediately follow stage 1.
+; It would be stored in a special disk partition.
+;
+; We don't need this kind of complexity here, though, so we store
+; stage 2 right after stage 1. (This is why STAGE2_SECTOR is 1.)
+;
+; This is nice, because the layout of the code on our boot disk
+; matches the layout of the code in memory. THIS IS NOT USUALLY THE CASE.
 stage2:
     %include "stage2.asm"
     align 512, db 0
 stage2.end:
 
-; The maximum size of stage2 is 4 KiB,
-; This fill will throw an error if the subtraction is negative.
-times (4*1024)-($-stage2) db 0
-
-; Pad to 0x9000.
-; This needs to match the value configured in the stage3 linker script
-times (0x9000 - 0x7c00)-($-$$) db 0
+; Pad to 0x3000.
+; This makes sure that state3 is loaded at the address
+; the linker expects. Must match the value in `tetros/linkers/x86-unknown-none.ld`.
+times (0x8000 - 0x7c00)-($-$$) db 0
 
+; Include stage 3, the binary compiled from Rust sources.
 stage3:
     %defstr STAGE3_STR %[STAGE3]
     incbin STAGE3_STR
     align 512, db 0
 .end:
-
-; TODO: why? Of the disk, or of memory?
-; the maximum size of the boot loader portion is 384 KiB
-times (384*1024)-($-$$) db 0
diff --git a/bios/print.asm b/bios/print.asm
index 5e958a3..ac8e111 100644
--- a/bios/print.asm
+++ b/bios/print.asm
@@ -1,22 +1,21 @@
 SECTION .text
 USE16
 
-; provide function for printing in x86 real mode
-
-; print a string and a newline
-; CLOBBER
-;   ax
+; Print a string and a newline
+;
+; Clobbers ax
 print_line:
     mov al, 13
     call print_char
     mov al, 10
     jmp print_char
 
-; print a string
-; IN
+; Print a string
+;
+; Input:
 ;   si: points at zero-terminated String
-; CLOBBER
-;   si, ax
+;
+; Clobbers si, ax
 print:
     pushf
     cld
@@ -30,8 +29,9 @@ print:
     popf
     ret
 
-; print a character
-; IN
+; Print a character
+;
+; Input:
 ;   al: character to print
 print_char:
     pusha
@@ -42,10 +42,11 @@ print_char:
     ret
 
 ; print a number in hex
-; IN
+;
+; Input:
 ;   bx: the number
-; CLOBBER
-;   al, cx
+;
+; Clobbers al, cx
 print_hex:
     mov cx, 4
 .lp:
diff --git a/bios/protected_mode.asm b/bios/protected_mode.asm
deleted file mode 100644
index 1df111f..0000000
--- a/bios/protected_mode.asm
+++ /dev/null
@@ -1,46 +0,0 @@
-SECTION .text
-USE16
-
-protected_mode:
-
-.func: dd 0
-
-.entry:
-    ; disable interrupts
-    cli
-
-    ; load protected mode GDT
-    lgdt [gdtr]
-
-    ; set protected mode bit of cr0
-    mov eax, cr0
-    or eax, 1
-    mov cr0, eax
-
-    ; far jump to load CS with 32 bit segment
-    ; (we are in 32-bit mode, but instruction pipeline
-    ; has 16-bit instructions.
-    jmp gdt.pm32_code:.inner
-
-    ; gdt.pm32_code is a multiple of 8, so it always ends with three zero bits.
-    ; The GDT spec abuses this fact, and uses these last three bits to store other
-    ; data (table type and privilege). In this case, 000 is what we need anyway.
-    ;
-    ; Also note that CS isn't an address in protected mode---it's a GDT descriptor.
- 
-
-
-USE32
-
-.inner:
-    ; load all the other segments with 32 bit data segments
-    mov eax, gdt.pm32_data
-    mov ds, eax
-    mov es, eax
-    mov fs, eax
-    mov gs, eax
-    mov ss, eax
-
-    ; jump to specified function
-    mov eax, [.func]
-    jmp eax
diff --git a/bios/stage1.asm b/bios/stage1.asm
index 5452f8e..7cec0e4 100644
--- a/bios/stage1.asm
+++ b/bios/stage1.asm
@@ -1,37 +1,39 @@
 USE16
 
-stage1: ; dl comes with disk
-    ; initialize segment registers
-    xor ax, ax
+stage1:
+    ; Initialize segment registers
+    xor ax, ax ; Set ax to 0
     mov ds, ax
     mov es, ax
     mov ss, ax
 
-    ; initialize stack
+    ; Initialize stack pointer
+    ; (stack grows up)
     mov sp, 0x7C00
 
-    ; initialize CS
-    ; far jump sets both CS and IP to a known-good state,
-    ; we don't know where the BIOS put us at startup.
-    ; (could be 0x00:0x7C00, could be 0x7C00:0x00.
-    ; Not everybody follows spec.)
-    push ax
+    ; Initialize CS
+    ;
+    ; `retf` sets both CS and IP to a known-good state.
+    ; This is necessary because we don't know where the BIOS put us at startup.
+    ; (could be 0x00:0x7C00, could be 0x7C00:0x00. Not everybody follows spec.)
+    push ax ; `ax` is still 0
     push word .set_cs
     retf
 
 .set_cs:
-    ; save disk number
+    ; Save disk number.
+    ; BIOS sets `dl` to the number of
+    ; the disk we're booting from.
     mov [disk], dl
 
+    ; Print "Stage 1"
     mov si, stage_msg
     call print
     mov al, '1'
     call print_char
     call print_line
 
-
-
-    ; read CHS gemotry
+    ; read CHS gemotry, save into [chs]
     ;  CL (bits 0-5) = maximum sector number
     ;  CL (bits 6-7) = high bits of max cylinder number
     ;  CH = low bits of maximum cylinder number
@@ -51,11 +53,10 @@ stage1: ; dl comes with disk
     and cl, 0x3f
     mov [chs.s], cl
 
-    ; disk address of stage 2
-    ; (start sector)
+    ; First sector of stage 2
     mov eax, STAGE2_SECTOR
 
-    ; where to load stage 2
+    ; Where to load stage 2
     mov bx, stage2
 
     ; length of stage2 + stage3
@@ -63,36 +64,40 @@ stage1: ; dl comes with disk
     mov cx, (stage3.end - stage2) / 512
     mov dx, 0
 
+    ; Consume eax, bx, cx, dx
+    ; and load code from disk.
     call load
 
     jmp stage2.entry
 
-; load some sectors from disk to a buffer in memory
-; buffer has to be below 1MiB
-; IN
+; Load sectors from disk to memory.
+; Cannot load more than 1MiB.
+;
+; Input:
 ;   ax: start sector
 ;   bx: offset of buffer
 ;   cx: number of sectors (512 Bytes each)
 ;   dx: segment of buffer
-; CLOBBER
-;   ax, bx, cx, dx, si
-; TODO rewrite to (eventually) move larger parts at once
-; if that is done increase buffer_size_sectors in startup-common to that (max 0x80000 - startup_end)
+;
+; Clobbers ax, bx, cx, dx, si
 load:
-    ; replaced 127 with 1.
-    ; see https://stackoverflow.com/questions/58564895/problem-with-bios-int-13h-read-sectors-from-drive
-    ; TODO: fix later
+    ; Every "replace 1" comment means that the `1`
+    ; on that line could be bigger.
+    ;
+    ; See https://stackoverflow.com/questions/58564895/problem-with-bios-int-13h-read-sectors-from-drive
+    ; We have to load one sector at a time to avoid the 1K boundary error.
+    ; Would be nice to read more sectors at a time, though, that's faster.
 
-    cmp cx, 1 ;127
+    cmp cx, 1 ; replace 1
     jbe .good_size
 
     pusha
-    mov cx, 1; 127
+    mov cx, 1 ; replace 1
     call load
     popa
-    add eax, 1; 127
-    add dx, 1 * 512 / 16 ; 127
-    sub cx, 1;127
+    add eax, 1 ; replace 1
+    add dx, 1 * 512 / 16 ; replace 1
+    sub cx, 1 ; replace 1
 
     jmp load
 .good_size:
@@ -101,44 +106,37 @@ load:
     mov [DAPACK.count], cx
     mov [DAPACK.seg], dx
 
-; This should be a subroutine, 
-; but we don't call/ret to save a few bytes.
-; (we only use this once)
-;
-;call print_dapack
-;print_dapack:
-    mov bx, [DAPACK.addr + 2]
+    ; Print the data we're reading
+    ; Prints AAAAAAAA#BBBB CCCC:DDDD, where:
+    ; - A..A is the lba we're reading (printed in two parts)
+    ; - BBBB is the number of sectors we're reading
+    ; - CCCC is the index we're writing to
+    ; - DDDD is the buffer we're writing to
+    mov bx, [DAPACK.addr + 2] ; last two bytes
     call print_hex
-
-    mov bx, [DAPACK.addr]
+    mov bx, [DAPACK.addr] ; first two bytes
     call print_hex
-
     mov al, '#'
     call print_char
-
     mov bx, [DAPACK.count]
     call print_hex
-
     mov al, ' '
     call print_char
-
     mov bx, [DAPACK.seg]
     call print_hex
-
     mov al, ':'
     call print_char
-
     mov bx, [DAPACK.buf]
     call print_hex
-
     call print_line
-    ;ret
-    ; End of print_dapack
 
 
+    ; Read from disk.
+    ; int13h, ah=0x42 does not work on some disks.
+    ; use int13h, ah=0x02 in this case.
     cmp byte [chs.s], 0
     jne .chs
-    ;INT 0x13 extended read does not work on CDROM!
+    
     mov dl, [disk]
     mov si, DAPACK
     mov ah, 0x42
@@ -188,6 +186,10 @@ load:
     jc error ; carry flag set on error
     ret
 
+;
+; MARK: errors
+;
+
 error_chs:
     mov ah, 0
 
@@ -200,13 +202,18 @@ error:
 
     mov si, stage1_error_msg
     call print
-    call print_line
+    call print_line 
 
+; halt after printing error details
 .halt:
     cli
     hlt
     jmp .halt
 
+;
+; MARK: data
+;
+
 %include "print.asm"
 
 stage_msg: db "Stage ",0
@@ -215,9 +222,9 @@ stage1_error_msg: db " ERROR",0
 disk: db 0
 
 chs:
-.c: dd 0
-.h: dd 0
-.s: dd 0
+    .c: dd 0
+    .h: dd 0
+    .s: dd 0
 
 DAPACK:
         db 0x10
@@ -225,6 +232,4 @@ DAPACK:
 .count: dw 0 ; int 13 resets this to # of blocks actually read/written
 .buf:   dw 0 ; memory buffer destination address (0:7c00)
 .seg:   dw 0 ; in memory page zero
-.addr:  dq 0 ; put the lba to read in this spot
-
-db 0xff
\ No newline at end of file
+.addr:  dq 0 ; put the lba to read in this spot
\ No newline at end of file
diff --git a/bios/stage2.asm b/bios/stage2.asm
index f964884..2675ab5 100644
--- a/bios/stage2.asm
+++ b/bios/stage2.asm
@@ -1,6 +1,9 @@
 SECTION .text
 USE16
 
+%include "gdt.asm"
+%include "thunk.asm"
+
 stage2.entry:
     mov si, stage_msg
     call print
@@ -13,26 +16,57 @@ stage2.entry:
     or al, 2
     out 0x92, al
 
-    mov dword [protected_mode.func], stage3.entry
-    jmp protected_mode.entry
+protected_mode:
+    ; disable interrupts
+    cli
 
-%include "gdt.asm"
-%include "protected_mode.asm"
-%include "thunk.asm"
+    ; load protected mode GDT
+    lgdt [gdtr]
 
+    ; set protected mode bit of cr0
+    mov eax, cr0
+    or eax, 1
+    mov cr0, eax
+
+    ; far jump to load CS with 32 bit segment
+    ; We need to do this because we are entering 32-bit mode,
+    ; but the instruction pipeline still has 16-bit instructions.
+    ;
+    ; gdt.pm32_code is a multiple of 8, so it always ends with three zero bits.
+    ; The GDT spec abuses this fact, and uses these last three bits to store other
+    ; data (table type and privilege). In this case, 000 is what we need anyway.
+    ;
+    ; Also note that CS isn't an address in protected mode---it's a GDT descriptor.
+    jmp gdt.pm32_code:protected_mode_inner
+
+; We can now use 32-bit instructions!
 USE32
 
-stage3.entry:
-    ; stage3 stack at 448 KiB (512KiB minus 64KiB disk buffer)
+protected_mode_inner:
+    ; load all the other segments with 32 bit data segments
+    mov eax, gdt.pm32_data
+    mov ds, eax
+    mov es, eax
+    mov fs, eax
+    mov gs, eax
+    mov ss, eax
+
+    ; Place stage 3 stack at 448 KiB
+    ; (512KiB minus 64KiB disk buffer)
     mov esp, 0x70000
 
     ; push arguments to `start()`
     mov eax, thunk.int10
     push eax
+
+    ; Call `start()`.
+    ; 0x18 skips ELF headers.
     mov eax, [stage3 + 0x18]
     call eax
-    
+
 .halt:
+    ; Halt if `start()` ever returns (it shouldn't, but just in case)
+    ; Without this, we'll try to execute whatever comes next in memory.
     cli
     hlt
     jmp .halt
diff --git a/bios/thunk.asm b/bios/thunk.asm
index 4a115ed..0ef5e08 100644
--- a/bios/thunk.asm
+++ b/bios/thunk.asm
@@ -1,3 +1,10 @@
+; Thunk allows stage 3 (rust code)
+; to use interrupts that are not 
+; usually available in protected mode.
+;
+; "thunk": a subroutine used to inject
+; a calculation into another subroutine.
+
 SECTION .text
 USE32
 
diff --git a/tetros/Cargo.toml b/tetros/Cargo.toml
index 219cd2b..505856d 100644
--- a/tetros/Cargo.toml
+++ b/tetros/Cargo.toml
@@ -29,7 +29,7 @@ absolute_paths_not_starting_with_crate = "deny"
 explicit_outlives_requirements = "warn"
 unused_crate_dependencies = "warn"
 redundant_lifetimes = "warn"
-missing_docs = "allow"
+missing_docs = "warn"
 
 [lints.clippy]
 needless_return = "allow"
diff --git a/tetros/linkers/x86-unknown-none.ld b/tetros/linkers/x86-unknown-none.ld
index dc7e852..f3eaeb1 100644
--- a/tetros/linkers/x86-unknown-none.ld
+++ b/tetros/linkers/x86-unknown-none.ld
@@ -1,9 +1,10 @@
+/* This is the name of the Rust function we start in */
 ENTRY(start)
 OUTPUT_FORMAT(elf32-i386)
 
 SECTIONS {
-    /* The start address must match bootloader.asm */
-    . = 0x9000;
+    /* The start address must match main.asm */
+    . = 0x8000;
 
     . += SIZEOF_HEADERS;
     . = ALIGN(4096);
diff --git a/tetros/src/drivers/pic.rs b/tetros/src/drivers/pic.rs
index afd74ac..c75b96e 100644
--- a/tetros/src/drivers/pic.rs
+++ b/tetros/src/drivers/pic.rs
@@ -1,18 +1,25 @@
+//! Control routines for the x86
+//! 8259 Programmable Interrupt Controller
+//!
+//! This helps us configure interrupts that receive
+//! keyboard input and timer pulses.
+
 use crate::os::util::outb;
 
 /// IO base address for master PIC
 const PIC_A: u32 = 0x20;
+/// Command address for master PIC
 const PIC_A_COMMAND: u32 = PIC_A;
+/// Data address for master PIC
 const PIC_A_DATA: u32 = PIC_A + 1;
 
 /// IO base address for slave PIC
 const PIC_B: u32 = 0xA0;
+/// Command address for slave PIC
 const PIC_B_COMMAND: u32 = PIC_B;
+/// Data address for slave PIC
 const PIC_B_DATA: u32 = PIC_B + 1;
 
-/// PIC `EOI` command
-const CMD_EOI: u8 = 0x20;
-
 /// A driver for the PIC
 ///
 /// Reference:
@@ -24,6 +31,7 @@ pub struct PICDriver {
 }
 
 impl PICDriver {
+	/// Create a PIC driver with the given offsets
 	pub const fn new(offset_pic_a: u8, offset_pic_b: u8) -> Self {
 		Self {
 			offset_pic_a,
@@ -47,14 +55,20 @@ impl PICDriver {
 		unsafe { outb(PIC_B_DATA, cmd) }
 	}
 
-	pub fn send_eoi(&self, irq: u8) {
-		if irq > 8 {
-			self.send_b_cmd(CMD_EOI);
+	/// Send an EOI for the given IRQ.
+	///
+	/// This needs to be called at the end of each interrupt handler.
+	/// If `both` is true, reset both PICs. This is only necessary
+	/// when we handle interrupts from PIC_B.
+	pub fn send_eoi(&self, both: bool) {
+		if both {
+			self.send_b_cmd(0x20);
 		}
-
-		self.send_a_cmd(CMD_EOI);
+		self.send_a_cmd(0x20);
 	}
 
+	/// Initialize this PIC driver.
+	/// This should be called as early as possible.
 	pub fn init(&mut self) {
 		const ICW1_ICW4: u8 = 0x01; /* Indicates that ICW4 will be present */
 		const ICW1_INIT: u8 = 0x10; /* Initialization - required! */
diff --git a/tetros/src/drivers/serial.rs b/tetros/src/drivers/serial.rs
index cb5bad5..161e877 100644
--- a/tetros/src/drivers/serial.rs
+++ b/tetros/src/drivers/serial.rs
@@ -1,3 +1,10 @@
+//! Serial port driver, for debug.
+//!
+//! This file provides the usual `print`
+//! and `println` macros (which are usually
+//! provided by `std`) that send messages out
+//! of the serial port.
+
 use lazy_static::lazy_static;
 use spin::Mutex;
 use uart_16550::SerialPort;
diff --git a/tetros/src/drivers/vga.rs b/tetros/src/drivers/vga.rs
index 194882e..320a3b1 100644
--- a/tetros/src/drivers/vga.rs
+++ b/tetros/src/drivers/vga.rs
@@ -3,7 +3,6 @@ use rand::seq::IndexedRandom;
 
 use crate::RNG;
 
-#[repr(u8)]
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum VgaColor {
 	Black,
@@ -73,6 +72,9 @@ impl Vga13h {
 	pub const HEIGHT: usize = 200;
 	pub const ADDR: usize = 0xA0000;
 
+	/// Initialize a new VGA driver.
+	///
+	/// Only one of these should exist.
 	pub const unsafe fn new() -> Self {
 		Self {
 			fb_a: [0; Vga13h::WIDTH * Vga13h::HEIGHT],
diff --git a/tetros/src/game/board.rs b/tetros/src/game/board.rs
index 885a138..daa96d6 100644
--- a/tetros/src/game/board.rs
+++ b/tetros/src/game/board.rs
@@ -2,21 +2,26 @@ use crate::drivers::vga::{Vga13h, VgaColor};
 
 use super::FallingTetromino;
 
-#[repr(u8)]
+/// The state of a cell in the game board
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum TetrisCell {
 	Empty,
 	Filled { color: VgaColor },
 }
 
+/// The tetris board
 pub struct TetrisBoard {
 	board: [TetrisCell; TetrisBoard::BOARD_WIDTH * TetrisBoard::BOARD_HEIGHT],
 }
 
 impl TetrisBoard {
+	/// The width of this board, in cells
 	const BOARD_WIDTH: usize = 10;
+
+	/// The height of this board, in cells
 	const BOARD_HEIGHT: usize = 20;
 
+	/// The side length of a (square) cell, in pixels
 	const CELL_SIZE: usize = 9;
 
 	pub const fn new() -> Self {
@@ -25,6 +30,8 @@ impl TetrisBoard {
 		}
 	}
 
+	/// Find and remove all filled rows,
+	/// shifting upper rows down.
 	pub fn collapse(&mut self) {
 		let mut y = Self::BOARD_HEIGHT - 1;
 		'outer: loop {
@@ -58,6 +65,11 @@ impl TetrisBoard {
 		}
 	}
 
+	/// Place the given tetromino on the board,
+	/// filling the cells it occupies.
+	///
+	/// If the tetromino cells that overlap
+	/// non-empty board cells are ignored.
 	pub fn place_tetromino(&mut self, tetromino: FallingTetromino) {
 		for (x, y) in tetromino.tiles() {
 			let cell = self.get_cell_mut(x, y);
@@ -82,32 +94,26 @@ impl TetrisBoard {
 		return true;
 	}
 
+	/// Get the value of the cell at the given position.
+	/// Returns [`None`] if (x, y) exceeds the board's bounds.
 	pub fn get_cell(&self, x: usize, y: usize) -> Option<&TetrisCell> {
-		if y >= TetrisBoard::BOARD_HEIGHT {
-			return None;
-		}
-
-		if x >= TetrisBoard::BOARD_WIDTH {
-			return None;
-		}
-
-		return Some(&self.board[y * TetrisBoard::BOARD_WIDTH + x]);
+		return self.board.get(y * TetrisBoard::BOARD_WIDTH + x);
 	}
 
+	/// Get a mutable reference to the cell at the given position.
+	/// Returns [`None`] if (x, y) exceeds the board's bounds.
 	pub fn get_cell_mut(&mut self, x: usize, y: usize) -> Option<&mut TetrisCell> {
-		if y >= TetrisBoard::BOARD_HEIGHT {
-			return None;
-		}
-
-		if x >= TetrisBoard::BOARD_WIDTH {
-			return None;
-		}
-
-		return Some(&mut self.board[y * TetrisBoard::BOARD_WIDTH + x]);
+		return self.board.get_mut(y * TetrisBoard::BOARD_WIDTH + x);
 	}
 }
 
+//
+// MARK: draw routines
+//
+
 impl TetrisBoard {
+	/// Draw a cell of the given color on `fb`.
+	/// (x, y) is the pixel position of the cell (NOT board coordinates).
 	fn draw_cell(&self, fb: &mut [u8], color: VgaColor, x: usize, y: usize) {
 		let color = color.as_u8();
 		for yo in 0..TetrisBoard::CELL_SIZE {
@@ -117,6 +123,7 @@ impl TetrisBoard {
 		}
 	}
 
+	/// Draw the tetris board's frame
 	fn draw_frame(&self, fb: &mut [u8], x: usize, y: usize) {
 		let color = VgaColor::Gray.as_u8();
 		for yo in 0..TetrisBoard::CELL_SIZE {
@@ -126,6 +133,7 @@ impl TetrisBoard {
 		}
 	}
 
+	/// Draw this tetris board using the given VGA driver.
 	pub fn draw(&self, vga: &mut Vga13h, falling: Option<&FallingTetromino>) {
 		let fb = vga.get_fb();
 
diff --git a/tetros/src/game/falling.rs b/tetros/src/game/falling.rs
index b0e664b..d603d79 100644
--- a/tetros/src/game/falling.rs
+++ b/tetros/src/game/falling.rs
@@ -53,6 +53,7 @@ pub enum Direction {
 }
 
 impl Direction {
+	/// Rotate this direction clockwise
 	pub fn rot_cw(self) -> Self {
 		match self {
 			Self::North => Self::East,
@@ -61,17 +62,6 @@ impl Direction {
 			Self::West => Self::North,
 		}
 	}
-
-	/*
-	pub fn rot_ccw(self) -> Self {
-		match self {
-			Self::North => Self::West,
-			Self::West => Self::South,
-			Self::South => Self::East,
-			Self::East => Self::North,
-		}
-	}
-	*/
 }
 
 #[derive(Debug, Clone)]
@@ -85,6 +75,7 @@ pub struct FallingTetromino {
 }
 
 impl FallingTetromino {
+	/// Make a new falling tetromino
 	pub fn new(tetromino: Tetromino, color: VgaColor, center_x: usize, center_y: usize) -> Self {
 		Self {
 			tetromino,
@@ -95,6 +86,7 @@ impl FallingTetromino {
 		}
 	}
 
+	/// Generate a random tetromino at the given position
 	pub fn random(center_x: usize, center_y: usize) -> Self {
 		Self::new(
 			Tetromino::choose_rand(),
@@ -104,6 +96,7 @@ impl FallingTetromino {
 		)
 	}
 
+	// Move this tetromino
 	pub fn translate(&mut self, x: i16, y: i16) {
 		if x > 0 {
 			let x = usize::try_from(x).unwrap();
@@ -122,16 +115,11 @@ impl FallingTetromino {
 		}
 	}
 
+	/// Rotate this tetromino clockwise
 	pub fn rotate_cw(&mut self) {
 		self.direction = self.direction.rot_cw()
 	}
 
-	/*
-	pub fn rotate_ccw(&mut self) {
-		self.direction = self.direction.rot_ccw()
-	}
-	*/
-
 	/// Returns the positions of this falling tetromino's tiles.
 	pub fn tiles(&self) -> [(usize, usize); 4] {
 		match (&self.tetromino, self.direction) {
diff --git a/tetros/src/game/mod.rs b/tetros/src/game/mod.rs
index e92dd16..bb13eb6 100644
--- a/tetros/src/game/mod.rs
+++ b/tetros/src/game/mod.rs
@@ -1,3 +1,6 @@
+//! This crate contains all tetris game logic.
+//! No low-level magic here.
+
 mod board;
 pub use board::*;
 
diff --git a/tetros/src/idt/stackframe.rs b/tetros/src/idt/stackframe.rs
index 0360f0c..966ef86 100644
--- a/tetros/src/idt/stackframe.rs
+++ b/tetros/src/idt/stackframe.rs
@@ -1,16 +1,14 @@
 use core::{fmt, ops::Deref};
 
-use crate::os::EFlags;
-
 use super::VirtAddr;
+use crate::os::EFlags;
 
 /// Wrapper type for the interrupt stack frame pushed by the CPU.
 ///
 /// This type derefs to an [`InterruptStackFrameValue`], which allows reading the actual values.
 ///
-/// This wrapper type ensures that no accidental modification of the interrupt stack frame
-/// occurs, which can cause undefined behavior (see the [`as_mut`](InterruptStackFrame::as_mut)
-/// method for more information).
+/// This wrapper ensures that the stack frame cannot be modified.
+/// This prevents undefined behavior.
 #[repr(transparent)]
 pub struct InterruptStackFrame(InterruptStackFrameValue);
 
diff --git a/tetros/src/idt/table.rs b/tetros/src/idt/table.rs
index 7f520e8..5fd50d9 100644
--- a/tetros/src/idt/table.rs
+++ b/tetros/src/idt/table.rs
@@ -5,17 +5,6 @@ use super::{
 	HandlerFuncWithErrCode, PageFaultHandlerFunc,
 };
 
-// TODO: comments
-#[repr(C, packed(2))]
-struct Idtr {
-	size: u16,
-	offset: u32,
-}
-
-//
-// MARK: idt
-//
-
 // spell:off
 #[derive(Clone, Debug)]
 #[repr(C)]
@@ -467,12 +456,17 @@ impl InterruptDescriptorTable {
 	/// # Safety
 	///
 	/// As long as it is the active IDT, you must ensure that:
-	///
 	/// - `self` is never destroyed.
 	/// - `self` always stays at the same memory location.
-	///   It is recommended to wrap it in a `Box`.
 	#[inline]
 	pub unsafe fn load_unsafe(&self) {
+		/// The data we push to the IDTR register
+		#[repr(C, packed(2))]
+		struct Idtr {
+			size: u16,
+			offset: u32,
+		}
+
 		let idtr = {
 			Idtr {
 				size: (size_of::<InterruptDescriptorTable>() - 1) as u16,
diff --git a/tetros/src/idt/virtaddr.rs b/tetros/src/idt/virtaddr.rs
index 1cc4cd7..43b90dc 100644
--- a/tetros/src/idt/virtaddr.rs
+++ b/tetros/src/idt/virtaddr.rs
@@ -1,7 +1,4 @@
-use core::{
-	fmt::{self},
-	ops::{Add, AddAssign, Sub, SubAssign},
-};
+use core::fmt::{self};
 
 /// A canonical 32-bit virtual memory address.
 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -57,40 +54,3 @@ impl fmt::UpperHex for VirtAddr {
 		fmt::UpperHex::fmt(&self.0, f)
 	}
 }
-
-impl fmt::Pointer for VirtAddr {
-	#[inline]
-	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-		fmt::Pointer::fmt(&(self.0 as *const ()), f)
-	}
-}
-
-impl Add<u32> for VirtAddr {
-	type Output = Self;
-	#[inline]
-	fn add(self, rhs: u32) -> Self::Output {
-		VirtAddr(self.0.checked_add(rhs).unwrap())
-	}
-}
-
-impl AddAssign<u32> for VirtAddr {
-	#[inline]
-	fn add_assign(&mut self, rhs: u32) {
-		*self = *self + rhs;
-	}
-}
-
-impl Sub<u32> for VirtAddr {
-	type Output = Self;
-	#[inline]
-	fn sub(self, rhs: u32) -> Self::Output {
-		VirtAddr(self.0.checked_sub(rhs).unwrap())
-	}
-}
-
-impl SubAssign<u32> for VirtAddr {
-	#[inline]
-	fn sub_assign(&mut self, rhs: u32) {
-		*self = *self - rhs;
-	}
-}
diff --git a/tetros/src/lib.rs b/tetros/src/lib.rs
index d8c66b6..919e6ef 100644
--- a/tetros/src/lib.rs
+++ b/tetros/src/lib.rs
@@ -1,3 +1,5 @@
+//! The main code of tetris
+
 #![no_std]
 #![feature(int_roundings)]
 #![feature(lang_items)]
@@ -23,12 +25,18 @@ mod os;
 #[macro_use]
 mod drivers;
 
-const PIC_OFFSET: u8 = 32;
-
 //
 // MARK: globals
 //
+// This code has no parallelism, so we don't _really_
+// need locks. The Mutexes here satisfy Rust's
+// "no mutable global state" rule.
+//
+// They also help prevent bugs, since we get deadlocks
+// instead of hard-to-debug surprising behavior.
+//
 
+const PIC_OFFSET: u8 = 32;
 static VGA: Mutex<Vga13h> = Mutex::new(unsafe { Vga13h::new() });
 static PIC: Mutex<PICDriver> = Mutex::new(PICDriver::new(PIC_OFFSET, PIC_OFFSET + 8));
 static TICK_COUNTER: Mutex<u32> = Mutex::new(0);
@@ -36,6 +44,8 @@ static BOARD: Mutex<TetrisBoard> = Mutex::new(TetrisBoard::new());
 static FALLING: Mutex<Option<FallingTetromino>> = Mutex::new(None);
 static LAST_INPUT: Mutex<Option<InputKey>> = Mutex::new(None);
 
+// These values can't be initialized statically,
+// so we cheat with `lazy_static`
 lazy_static! {
 	static ref RNG: Mutex<SmallRng> = Mutex::new(SmallRng::seed_from_u64(1337));
 	static ref IDT: InterruptDescriptorTable = {
@@ -50,10 +60,26 @@ lazy_static! {
 	};
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum InputKey {
+	Left,
+	Right,
+	Up,
+	Down,
+}
+
 //
 // MARK: interrupts
 //
+// These functions are called when we receive interrupts.
+// This can occur between ANY two instructions---which is
+// why we use `without_interrupts` when acquiring locks.
+//
+// Notice how we do as little work as possible in our
+// interrupt handlers. All our business logic goes into
+// the main loop.
 
+#[expect(missing_docs)]
 #[derive(Debug, Clone, Copy)]
 #[repr(u8)]
 pub enum InterruptIndex {
@@ -72,22 +98,20 @@ impl InterruptIndex {
 }
 
 extern "x86-interrupt" fn divide_handler(stack_frame: InterruptStackFrame) {
+	// Simple interrupt handler, as an example.
+	// This can be triggered manually using `asm!("int 0")`,
+	// even if interrupts are disabled.
 	println!("DIVIDE ERROR {:?}", stack_frame);
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-enum InputKey {
-	Left,
-	Right,
-	Up,
-	Down,
-}
-
 extern "x86-interrupt" fn keyboard_handler(_stack_frame: InterruptStackFrame) {
 	{
 		// Re-seed our rng using user input.
 		// This is a simple hack that makes our
 		// "random" tile selector less deterministic.
+		//
+		// Getting random seeds from hardware is
+		// more trouble than its worth.
 		let mut rng = RNG.lock();
 		let past: u64 = rng.random();
 		let tcr = u64::from(*TICK_COUNTER.lock());
@@ -125,13 +149,13 @@ extern "x86-interrupt" fn keyboard_handler(_stack_frame: InterruptStackFrame) {
 		*LAST_INPUT.lock() = key;
 	}
 
-	PIC.lock().send_eoi(InterruptIndex::Keyboard.as_u8());
+	PIC.lock().send_eoi(false);
 }
 
 extern "x86-interrupt" fn timer_handler(_stack_frame: InterruptStackFrame) {
 	let mut t = TICK_COUNTER.lock();
 	*t = (*t).wrapping_add(1);
-	PIC.lock().send_eoi(InterruptIndex::Timer.as_u8());
+	PIC.lock().send_eoi(false);
 }
 
 extern "x86-interrupt" fn double_fault_handler(
@@ -145,6 +169,7 @@ extern "x86-interrupt" fn double_fault_handler(
 // MARK: main
 //
 
+#[expect(missing_docs)]
 #[no_mangle]
 pub unsafe extern "C" fn start(thunk10: extern "C" fn()) -> ! {
 	println!("Entered Rust, serial ready.");
@@ -190,6 +215,7 @@ pub unsafe extern "C" fn start(thunk10: extern "C" fn()) -> ! {
 		}
 		last_t = t;
 
+		// MARK: input
 		// Handle user input
 		without_interrupts(|| {
 			if let Some(fall) = &mut *FALLING.lock() {
@@ -236,6 +262,7 @@ pub unsafe extern "C" fn start(thunk10: extern "C" fn()) -> ! {
 			}
 		});
 
+		// MARK: update board
 		// Update board
 		without_interrupts(|| {
 			let mut v = VGA.lock();
diff --git a/tetros/src/os/eflags.rs b/tetros/src/os/eflags.rs
index 2395a0b..d14fabf 100644
--- a/tetros/src/os/eflags.rs
+++ b/tetros/src/os/eflags.rs
@@ -80,6 +80,7 @@ bitflags! {
 }
 
 impl EFlags {
+	/// Read the EFLAGS register
 	#[inline]
 	pub fn read() -> EFlags {
 		EFlags::from_bits_truncate(EFlags::read_raw())
diff --git a/tetros/src/os/panic.rs b/tetros/src/os/panic.rs
index 1a0a2de..2dcf8ca 100644
--- a/tetros/src/os/panic.rs
+++ b/tetros/src/os/panic.rs
@@ -1,8 +1,12 @@
-//! Intrinsics for panic handling
+//! Rust intrinsics for panic handling.
+//!
+//! These are usually provided by `std`,
+//! but we don't have that luxury!
 
 use core::arch::asm;
 use core::panic::PanicInfo;
 
+// Use serial println
 use crate::println;
 
 #[lang = "eh_personality"]
diff --git a/tetros/src/os/util.rs b/tetros/src/os/util.rs
index 31a4cff..b82cec8 100644
--- a/tetros/src/os/util.rs
+++ b/tetros/src/os/util.rs
@@ -18,11 +18,13 @@ pub fn sti() {
 	}
 }
 
-/// Run a closure with disabled interrupts.
-///
 /// Run the given closure, disabling interrupts before running it (if they aren't already disabled).
 /// Afterwards, interrupts are enabling again if they were enabled before.
 ///
+/// This helps us prevent deadlocks, which can occur if
+/// an interrupt handler tries to acquire a lock that was
+/// locked at the time of the interrupt.
+///
 /// If you have other `enable` and `disable` calls _within_ the closure, things may not work as expected.
 #[inline]
 pub fn without_interrupts<F, R>(f: F) -> R
@@ -46,6 +48,7 @@ where
 	ret
 }
 
+/// Wraps the `in` instruction
 pub unsafe fn inb(port: u32) -> u8 {
 	let mut out;
 
@@ -58,6 +61,7 @@ pub unsafe fn inb(port: u32) -> u8 {
 	return out;
 }
 
+/// Wraps the `out` instruction
 pub unsafe fn outb(port: u32, value: u8) {
 	asm!(
 		"out dx, al",