Added autocorrect feature

master
Mark 2022-11-20 20:45:36 -08:00
parent afa07ce83c
commit f4a7960eed
Signed by: Mark
GPG Key ID: AD62BB059C2AAEE4
7 changed files with 1044 additions and 2 deletions

View File

@ -0,0 +1,184 @@
#include "autocorrect.h"
#include <string.h>
#include "autocorrect_data.h"
#if AUTOCORRECTION_MIN_LENGTH < 4
// Odd output or hard locks on the board have been observed when the min typo
// length is 3 or lower (https://github.com/getreuer/qmk-keymap/issues/2).
// Additionally, autocorrection entries for short typos are more likely to false
// trigger, so it is suggested that typos be at least 5 characters.
#error "Min typo length is less than 4. Autocorrection may behave poorly."
#endif
bool process_autocorrection(uint16_t keycode, keyrecord_t* record) {
static uint8_t typo_buffer[AUTOCORRECTION_MAX_LENGTH] = {0};
static uint8_t typo_buffer_size = 0;
// Ignore key release; we only process key presses.
if (!record->event.pressed) {
return true;
}
#ifndef NO_ACTION_ONESHOT
const uint8_t mods = get_mods() | get_oneshot_mods();
#else
const uint8_t mods = get_mods();
#endif
// Disable autocorrection while a mod other than shift is active.
if ((mods & ~MOD_MASK_SHIFT) != 0) {
typo_buffer_size = 0;
return true;
}
// The following switch cases address various kinds of keycodes. This logic is
// split over two switches rather than merged into one. The first switch may
// extract a basic keycode which is then further handled by the second switch,
// e.g. a layer-tap key with Caps Lock `LT(layer, KC_CAPS)`.
switch (keycode) {
#ifndef NO_ACTION_TAPPING
case QK_MOD_TAP ... QK_MOD_TAP_MAX: // Tap-hold keys.
#ifndef NO_ACTION_LAYER
case QK_LAYER_TAP ... QK_LAYER_TAP_MAX:
#endif
if (record->tap.count == 0) {
return true;
}
// Otherwise when tapped, get the basic keycode.
// Fallthrough intended.
#endif
// Handle shifted keys, e.g. symbols like KC_EXLM = S(KC_1).
case QK_LSFT ... QK_LSFT + 255:
case QK_RSFT ... QK_RSFT + 255:
keycode &= 0xff; // Get the basic keycode.
break;
// NOTE: Space Cadet keys expose no info to check whether they are being
// tapped vs. held. This makes autocorrection ambiguous, e.g. KC_LCPO
// might be '(', which we would treat as a word break, or it might be
// shift, which we would treat as having no effect. To behave cautiously,
// we allow Space Cadet keycodes to fall to the logic below and clear
// autocorrection state.
}
switch (keycode) {
// Ignore shifts, Caps Lock, one-shot mods, and layer switch keys.
case KC_NO:
case KC_LSFT:
case KC_RSFT:
case KC_CAPS:
case QK_ONE_SHOT_MOD ... QK_ONE_SHOT_MOD_MAX:
case QK_TO ... QK_TO_MAX:
case QK_MOMENTARY ... QK_MOMENTARY_MAX:
case QK_DEF_LAYER ... QK_DEF_LAYER_MAX:
case QK_TOGGLE_LAYER ... QK_TOGGLE_LAYER_MAX:
case QK_ONE_SHOT_LAYER ... QK_ONE_SHOT_LAYER_MAX:
case QK_LAYER_TAP_TOGGLE ... QK_LAYER_TAP_TOGGLE_MAX:
case QK_LAYER_MOD ... QK_LAYER_MOD_MAX:
return true; // Ignore these keys.
}
if (keycode == KC_QUOT) {
// Treat " (shifted ') as a word boundary.
if ((mods & MOD_MASK_SHIFT) != 0) {
keycode = KC_SPC;
}
} else if (!(KC_A <= keycode && keycode <= KC_Z)) {
if (keycode == KC_BSPC) {
// Remove last character from the buffer.
if (typo_buffer_size > 0) {
--typo_buffer_size;
}
return true;
} else if (KC_1 <= keycode && keycode <= KC_SLSH && keycode != KC_ESC) {
// Set a word boundary if space, period, digit, etc. is pressed.
// Behave more conservatively for the enter key. Reset, so that enter
// can't be used on a word ending.
if (keycode == KC_ENT) {
typo_buffer_size = 0;
}
keycode = KC_SPC;
} else {
// Clear state if some other non-alpha key is pressed.
typo_buffer_size = 0;
return true;
}
}
// If the buffer is full, rotate it to discard the oldest character.
if (typo_buffer_size >= AUTOCORRECTION_MAX_LENGTH) {
memmove(typo_buffer, typo_buffer + 1, AUTOCORRECTION_MAX_LENGTH - 1);
typo_buffer_size = AUTOCORRECTION_MAX_LENGTH - 1;
}
// Append `keycode` to the buffer.
// NOTE: `keycode` must be a basic keycode (0-255) by this point.
typo_buffer[typo_buffer_size++] = (uint8_t)keycode;
// Early return if not many characters have been buffered so far.
if (typo_buffer_size < AUTOCORRECTION_MIN_LENGTH) {
return true;
}
// Check whether the buffer ends in a typo. This is done using a trie
// stored in `autocorrection_data`.
uint16_t state = 0;
uint8_t code = pgm_read_byte(autocorrection_data + state);
for (int i = typo_buffer_size - 1; i >= 0; --i) {
const uint8_t key_i = typo_buffer[i];
if (code & 64) { // Check for match in node with multiple children.
code &= 63;
for (; code != key_i; code = pgm_read_byte(autocorrection_data + (state += 3))) {
if (!code) {
return true;
}
}
// Follow link to child node.
state = (uint16_t)(
(uint_fast16_t) pgm_read_byte(
autocorrection_data + state + 1
) |
(uint_fast16_t) pgm_read_byte(
autocorrection_data + state + 2
) << 8
);
// Otherwise check for match in node with a single child.
} else if (code != key_i) {
return true;
} else if (!(code = pgm_read_byte(autocorrection_data + (++state)))) {
++state;
}
// Stop if `state` becomes an invalid index. This should not normally
// happen, it is a safeguard in case of a bug, data corruption, etc.
if (state >= sizeof(autocorrection_data)) {
return true;
}
// Read first byte of the next node.
code = pgm_read_byte(autocorrection_data + state);
if (code & 128) { // A typo was found! Apply autocorrection.
const int backspaces = code & 63;
for (int i = 0; i < backspaces; ++i) {
tap_code(KC_BSPC);
}
send_string_P((char const*)(autocorrection_data + state + 1));
if (keycode == KC_SPC) {
typo_buffer[0] = KC_SPC;
typo_buffer_size = 1;
return true;
} else {
typo_buffer_size = 0;
return false;
}
}
}
return true;
}

View File

@ -0,0 +1,111 @@
// Copyright 2021-2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* @file autocorrection.h
* @brief Autocorrection on your keyboard.
*
* Overview
* --------
*
* Some words are more prone to typos than others. This userspace QMK library
* implements rudimentary autocorrection, automatically detecting and fixing
* some misspellings.
*
* Features:
*
* * It runs on your keyboard, so it is always active no matter what software.
* * Low resource cost.
* * It is case insensitive.
* * It works within words, useful for programming to catch typos within longer
* identifiers.
*
* Limitations:
*
* * It is limited to alphabet characters az, apostrophes ', and word breaks.
* I'm sorry this probably isn't useful for languages besides English.
* * It does not follow mouse or hotkey driven cursor movement.
*
* Changing the autocorrection dictionary
* --------------------------------------
*
* The file autocorrection_data.h encodes the typos to correct. While you could
* simply use the version of this file provided above for a practical
* configuration, you can make your own to personalize the autocorrection to
* your most troublesome typos:
*
* Step 1: First, create an autocorrection dictionary autocorrection_dict.txt,
* in a form like
*
* :thier -> their
* dosen't -> doesn't
* fitler -> filter
* ouput -> output
* widht -> width
*
* For a practical 71-entry example, see autocorrection_dict.txt. And for a yet
* larger 400-entry example, see autocorrection_dict_extra.txt.
*
* The syntax is `typo -> correction`. Typos and corrections are case
* insensitive, and any whitespace before or after the typo and correction is
* ignored. The typo must be only the characters a-z, ', or the special
* character : representing a word break. The correction may have just about any
* printable ASCII characters.
*
* Step 2: Use the make_autocorrection_data.py Python script to process the
* dictionary. Put autocorrection_dict.txt in the same directory as the Python
* script and run
*
* $ python3 make_autocorrection_data.py
* Processed 71 autocorrection entries to table with 1120 bytes.
*
* The script arranges the entries in autocorrection_dict.txt into a trie and
* generates autocorrection_data.h with the serialized trie embedded as an
* array. The .h file will be written in the same directory.
*
* Step 3: Finally, recompile and flash your keymap.
*
* For full documentation, see
* <https://getreuer.info/posts/keyboards/autocorrection>
*
* @author Pascal Getreuer
*/
#pragma once
#include "quantum.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* Handler function for autocorrection.
*
* Call this function in keymap.c from `process_record_user()` like
*
* #include "features/autocorrection.h"
*
* bool process_record_user(uint16_t keycode, keyrecord_t* record) {
* if (!process_autocorrection(keycode, record)) { return false; }
* // Your macros...
*
* return true;
* }
*/
bool process_autocorrection(uint16_t keycode, keyrecord_t* record);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,238 @@
// Generated code.
// Autocorrection dictionary (132 entries):
// :agian -> again
// :agred -> agreed
// :ajust -> adjust
// :alot: -> a lot
// :andteh -> and the
// :andthe -> and the
// :anual -> annual
// :asign -> assign
// :asthe -> as the
// :atthe -> at the
// :casue -> cause
// :eveyr -> every
// :foudn -> found
// :gaurd -> guard
// :goign -> going
// :gonig -> going
// :graet -> great
// :haev -> have
// :htere -> there
// :htikn -> think
// :htink -> think
// :hwihc -> which
// :hwile -> while
// :idaes -> ideas
// :jstu: -> just
// :jsut: -> just
// :knwo -> know
// :konw -> know
// :kwno -> know
// :moeny -> money
// :olther -> other
// :otehr -> other
// :owudl -> would
// :sicne -> since
// :the:the: -> the
// :theri -> their
// :thier -> their
// :thsoe -> those
// :tothe -> to the
// :yaers -> years
// :yeasr -> years
// abbout -> about
// abotu -> about
// abouta -> about a
// aboutit -> about it
// aboutthe -> about the
// almsot -> almost
// alomst -> almost
// alwasy -> always
// alwyas -> always
// anohter -> another
// aroud -> around
// arround -> around
// arund -> around
// baceause -> because
// baout -> about
// beacuse -> because
// becasue -> because
// beccause -> because
// becouse -> because
// becuase -> because
// bedore -> before
// beeing -> being
// befoer -> before
// beteen -> between
// beween -> between
// bewteen -> between
// chekc -> check
// childen -> children
// chnage -> change
// claer -> clear
// comapny -> company
// contian -> contain
// coudl -> could
// didnot -> did not
// elasped -> elapsed
// firend -> friend
// firts -> first
// fitler -> filter
// follwo -> follow
// freind -> friend
// frequecy -> frequency
// fromthe -> from the
// heigth -> height
// iamge -> image
// inital -> initial
// intput -> input
// laguage -> language
// lenght -> length
// levle -> level
// libary -> library
// littel -> little
// mysefl -> myself
// ouptut -> output
// ouput -> output
// peaple -> people
// peolpe -> people
// peopel -> people
// poeople -> people
// poeple -> people
// probaly -> probably
// probelm -> problem
// raelly -> really
// realy -> really
// recrod -> record
// relaly -> really
// reponse -> response
// reprot -> report
// shoudl -> should
// singel -> single
// stregth -> strength
// strengh -> strength
// theese -> these
// therfore -> therefore
// thign -> thing
// thigsn -> things
// thikn -> think
// thiunk -> think
// thnigs -> things
// throught -> thought
// tihkn -> think
// tkaes -> takes
// todya -> today
// toghether -> together
// unkown -> unknown
// unqiue -> unique
// whcih -> which
// whihc -> which
// whlch -> which
// widht -> width
// wihch -> which
// woudl -> would
#define AUTOCORRECTION_MIN_LENGTH 5 // "abotu"
#define AUTOCORRECTION_MAX_LENGTH 9 // "toghether"
static const uint8_t autocorrection_data[1881] PROGMEM = {108, 61, 0, 4, 117, 0,
6, 141, 0, 7, 181, 0, 8, 52, 1, 10, 237, 2, 11, 8, 3, 12, 105, 3, 14, 115, 3,
15, 143, 3, 16, 12, 4, 17, 24, 4, 18, 234, 4, 21, 16, 5, 22, 152, 5, 23, 233,
5, 24, 190, 6, 25, 199, 6, 26, 208, 6, 28, 218, 6, 0, 72, 71, 0, 23, 81, 0,
24, 107, 0, 0, 11, 23, 44, 8, 11, 23, 44, 0, 132, 0, 82, 88, 0, 24, 98, 0, 0,
15, 4, 44, 0, 131, 32, 108, 111, 116, 0, 22, 13, 44, 0, 131, 117, 115, 116, 0,
23, 22, 13, 44, 0, 131, 117, 115, 116, 0, 87, 124, 0, 28, 133, 0, 0, 24, 18,
5, 4, 0, 128, 32, 97, 0, 7, 18, 23, 0, 129, 97, 121, 0, 75, 148, 0, 14, 173,
0, 0, 12, 0, 75, 157, 0, 26, 163, 0, 0, 26, 0, 129, 99, 104, 0, 11, 44, 0,
132, 119, 104, 105, 99, 104, 0, 8, 11, 6, 0, 129, 99, 107, 0, 72, 197, 0, 17,
223, 0, 18, 23, 1, 21, 33, 1, 24, 44, 1, 0, 83, 204, 0, 21, 215, 0, 0, 22, 4,
15, 8, 0, 131, 112, 115, 101, 100, 0, 10, 4, 44, 0, 128, 101, 100, 0, 72, 233,
0, 12, 244, 0, 24, 254, 0, 0, 21, 12, 9, 0, 132, 114, 105, 101, 110, 100, 0,
8, 21, 9, 0, 131, 105, 101, 110, 100, 0, 82, 5, 1, 21, 15, 1, 0, 21, 21, 4, 0,
132, 111, 117, 110, 100, 0, 4, 0, 130, 111, 117, 110, 100, 0, 21, 6, 8, 21, 0,
130, 111, 114, 100, 0, 24, 4, 10, 44, 0, 131, 117, 97, 114, 100, 0, 18, 21, 4,
0, 128, 110, 100, 0, 74, 80, 1, 11, 124, 1, 15, 205, 1, 17, 17, 2, 18, 27, 2,
19, 37, 2, 21, 47, 2, 22, 93, 2, 24, 198, 2, 0, 68, 87, 1, 16, 115, 1, 0, 81,
94, 1, 24, 103, 1, 0, 11, 6, 0, 131, 97, 110, 103, 101, 0, 10, 4, 15, 0, 132,
110, 103, 117, 97, 103, 101, 0, 4, 12, 0, 131, 109, 97, 103, 101, 0, 23, 0,
71, 142, 1, 16, 152, 1, 18, 162, 1, 22, 171, 1, 23, 180, 1, 0, 17, 4, 44, 0,
130, 32, 116, 104, 101, 0, 18, 21, 9, 0, 130, 32, 116, 104, 101, 0, 23, 44, 0,
130, 32, 116, 104, 101, 0, 4, 44, 0, 130, 32, 116, 104, 101, 0, 68, 187, 1,
24, 195, 1, 0, 44, 0, 130, 32, 116, 104, 101, 0, 18, 5, 4, 0, 130, 32, 116,
104, 101, 0, 76, 215, 1, 19, 226, 1, 25, 10, 2, 0, 26, 11, 44, 0, 132, 119,
104, 105, 108, 101, 0, 68, 236, 1, 8, 245, 1, 18, 255, 1, 0, 8, 19, 0, 131,
111, 112, 108, 101, 0, 18, 19, 0, 132, 101, 111, 112, 108, 101, 0, 8, 18, 19,
0, 133, 101, 111, 112, 108, 101, 0, 8, 15, 0, 129, 101, 108, 0, 6, 12, 22, 44,
0, 130, 110, 99, 101, 0, 22, 11, 23, 44, 0, 130, 111, 115, 101, 0, 15, 18, 8,
19, 0, 130, 112, 108, 101, 0, 72, 54, 2, 18, 65, 2, 0, 23, 11, 44, 0, 132,
116, 104, 101, 114, 101, 0, 71, 72, 2, 9, 81, 2, 0, 8, 5, 0, 131, 102, 111,
114, 101, 0, 21, 8, 11, 23, 0, 131, 101, 102, 111, 114, 101, 0, 68, 106, 2, 8,
117, 2, 17, 125, 2, 24, 138, 2, 0, 24, 6, 8, 5, 0, 131, 97, 117, 115, 101, 0,
8, 11, 23, 0, 130, 115, 101, 0, 18, 19, 8, 21, 0, 132, 115, 112, 111, 110,
115, 101, 0, 68, 148, 2, 6, 177, 2, 18, 188, 2, 0, 70, 155, 2, 8, 165, 2, 0,
6, 8, 5, 0, 132, 97, 117, 115, 101, 0, 6, 4, 5, 0, 134, 101, 99, 97, 117, 115,
101, 0, 4, 8, 5, 0, 132, 99, 97, 117, 115, 101, 0, 6, 8, 5, 0, 131, 97, 117,
115, 101, 0, 76, 205, 2, 22, 215, 2, 0, 20, 17, 24, 0, 131, 105, 113, 117,
101, 0, 4, 6, 0, 108, 225, 2, 8, 230, 2, 0, 130, 117, 115, 101, 0, 5, 0, 130,
117, 115, 101, 0, 76, 244, 2, 17, 254, 2, 0, 17, 18, 10, 44, 0, 130, 105, 110,
103, 0, 12, 8, 8, 5, 0, 131, 105, 110, 103, 0, 70, 24, 3, 8, 48, 3, 10, 60, 3,
12, 70, 3, 23, 79, 3, 0, 75, 31, 3, 15, 40, 3, 0, 12, 26, 0, 131, 104, 105,
99, 104, 0, 11, 26, 0, 130, 105, 99, 104, 0, 23, 7, 17, 4, 44, 0, 130, 32,
116, 104, 101, 0, 17, 8, 21, 23, 22, 0, 128, 116, 104, 0, 6, 11, 26, 0, 130,
105, 99, 104, 0, 10, 0, 72, 88, 3, 12, 98, 3, 0, 21, 23, 22, 0, 130, 110, 103,
116, 104, 0, 8, 11, 0, 129, 104, 116, 0, 21, 8, 11, 23, 44, 0, 129, 105, 114,
0, 17, 0, 76, 124, 3, 24, 135, 3, 0, 23, 11, 44, 0, 132, 116, 104, 105, 110,
107, 0, 12, 11, 23, 0, 130, 110, 107, 0, 68, 156, 3, 7, 182, 3, 8, 225, 3, 9,
3, 4, 0, 87, 163, 3, 24, 172, 3, 0, 12, 17, 12, 0, 129, 105, 97, 108, 0, 17,
4, 44, 0, 130, 110, 117, 97, 108, 0, 24, 0, 82, 191, 3, 26, 215, 3, 0, 70,
201, 3, 11, 205, 3, 26, 211, 3, 0, 129, 108, 100, 0, 22, 0, 129, 108, 100, 0,
129, 108, 100, 0, 18, 44, 0, 132, 119, 111, 117, 108, 100, 0, 74, 235, 3, 19,
243, 3, 23, 251, 3, 0, 17, 12, 22, 0, 129, 108, 101, 0, 18, 8, 19, 0, 129,
108, 101, 0, 23, 12, 15, 0, 129, 108, 101, 0, 8, 22, 28, 16, 0, 129, 108, 102,
0, 15, 8, 5, 18, 21, 19, 0, 130, 108, 101, 109, 0, 68, 46, 4, 7, 72, 4, 8, 81,
4, 10, 140, 4, 14, 174, 4, 22, 213, 4, 26, 223, 4, 0, 12, 0, 74, 55, 4, 23,
63, 4, 0, 4, 44, 0, 130, 97, 105, 110, 0, 17, 18, 6, 0, 130, 97, 105, 110, 0,
24, 18, 9, 44, 0, 129, 110, 100, 0, 71, 88, 4, 8, 98, 4, 0, 15, 12, 11, 6, 0,
129, 114, 101, 110, 0, 87, 105, 4, 26, 130, 4, 0, 72, 112, 4, 26, 120, 4, 0,
5, 0, 130, 119, 101, 101, 110, 0, 8, 5, 0, 132, 116, 119, 101, 101, 110, 0, 8,
5, 0, 131, 116, 119, 101, 101, 110, 0, 12, 0, 75, 152, 4, 18, 158, 4, 22, 165,
4, 0, 23, 0, 129, 110, 103, 0, 10, 44, 0, 129, 110, 103, 0, 4, 44, 0, 130,
115, 105, 103, 110, 0, 75, 181, 4, 12, 190, 4, 0, 12, 23, 0, 131, 104, 105,
110, 107, 0, 75, 197, 4, 23, 203, 4, 0, 23, 0, 129, 110, 107, 0, 11, 44, 0,
132, 116, 104, 105, 110, 107, 0, 10, 12, 11, 23, 0, 130, 110, 103, 115, 0, 18,
14, 17, 24, 0, 130, 110, 111, 119, 110, 0, 81, 241, 4, 26, 250, 4, 0, 26, 14,
44, 0, 130, 110, 111, 119, 0, 79, 1, 5, 17, 9, 5, 0, 15, 18, 9, 0, 129, 111,
119, 0, 14, 44, 0, 129, 111, 119, 0, 72, 29, 5, 11, 124, 5, 22, 134, 5, 28,
143, 5, 0, 68, 48, 5, 11, 56, 5, 12, 86, 5, 15, 95, 5, 18, 105, 5, 23, 113, 5,
0, 15, 6, 0, 130, 101, 97, 114, 0, 23, 0, 72, 65, 5, 15, 77, 5, 0, 11, 10, 18,
23, 0, 133, 101, 116, 104, 101, 114, 0, 18, 44, 0, 132, 116, 104, 101, 114, 0,
11, 23, 44, 0, 130, 101, 105, 114, 0, 23, 12, 9, 0, 131, 108, 116, 101, 114,
0, 9, 8, 5, 0, 129, 114, 101, 0, 11, 18, 17, 4, 0, 131, 116, 104, 101, 114, 0,
8, 23, 18, 44, 0, 130, 104, 101, 114, 0, 4, 8, 28, 44, 0, 129, 114, 115, 0, 8,
25, 8, 44, 0, 129, 114, 121, 0, 68, 168, 5, 8, 178, 5, 10, 203, 5, 21, 214, 5,
23, 225, 5, 0, 28, 26, 15, 4, 0, 130, 97, 121, 115, 0, 4, 0, 71, 187, 5, 14,
195, 5, 0, 12, 44, 0, 130, 101, 97, 115, 0, 23, 0, 131, 97, 107, 101, 115, 0,
12, 17, 11, 23, 0, 131, 105, 110, 103, 115, 0, 8, 4, 28, 44, 0, 131, 101, 97,
114, 115, 0, 21, 12, 9, 0, 129, 115, 116, 0, 72, 252, 5, 11, 6, 6, 12, 46, 6,
18, 57, 6, 22, 95, 6, 24, 123, 6, 0, 4, 21, 10, 44, 0, 130, 101, 97, 116, 0,
71, 13, 6, 10, 20, 6, 0, 12, 26, 0, 129, 116, 104, 0, 81, 27, 6, 24, 34, 6, 0,
8, 15, 0, 129, 116, 104, 0, 18, 21, 11, 23, 0, 133, 111, 117, 103, 104, 116,
0, 23, 24, 18, 5, 4, 0, 129, 32, 105, 116, 0, 81, 67, 6, 21, 77, 6, 22, 86, 6,
0, 7, 12, 7, 0, 130, 32, 110, 111, 116, 0, 19, 8, 21, 0, 130, 111, 114, 116,
0, 16, 15, 4, 0, 130, 111, 115, 116, 0, 80, 102, 6, 24, 112, 6, 0, 18, 15, 4,
0, 131, 109, 111, 115, 116, 0, 13, 4, 44, 0, 131, 100, 106, 117, 115, 116, 0,
82, 133, 6, 19, 157, 6, 23, 180, 6, 0, 68, 140, 6, 5, 149, 6, 0, 5, 0, 132,
97, 98, 111, 117, 116, 0, 5, 4, 0, 131, 111, 117, 116, 0, 87, 164, 6, 24, 172,
6, 0, 17, 12, 0, 131, 112, 117, 116, 0, 18, 0, 130, 116, 112, 117, 116, 0, 19,
24, 18, 0, 131, 116, 112, 117, 116, 0, 23, 18, 5, 4, 0, 129, 117, 116, 0, 8,
4, 11, 44, 0, 129, 118, 101, 0, 17, 18, 14, 44, 0, 130, 110, 111, 119, 0, 70,
234, 6, 15, 246, 6, 17, 42, 7, 21, 69, 7, 22, 80, 7, 0, 8, 24, 20, 8, 21, 9,
0, 129, 110, 99, 121, 0, 68, 253, 6, 15, 31, 7, 0, 69, 7, 7, 8, 16, 7, 15, 22,
7, 0, 18, 21, 19, 0, 129, 98, 108, 121, 0, 21, 0, 128, 108, 121, 0, 8, 21, 0,
131, 97, 108, 108, 121, 0, 8, 4, 21, 0, 132, 101, 97, 108, 108, 121, 0, 72,
49, 7, 19, 58, 7, 0, 18, 16, 44, 0, 130, 110, 101, 121, 0, 4, 16, 18, 6, 0,
131, 112, 97, 110, 121, 0, 4, 5, 12, 15, 0, 130, 114, 97, 114, 121, 0, 4, 26,
15, 4, 0, 129, 121, 115, 0};

View File

@ -0,0 +1,148 @@
# Dictionary syntax:
# Each line of this file defines one typo correction entry with the syntax
# "typo -> correction". Typos and corrections are case insensitive, and any
# whitespace before or after the typo and correction is ignored. The typo must be
# only the letters a-z, or the special character : representing a word break.
:htere -> there
abbout -> about
abotu -> about
baout -> about
:theri -> their
:thier -> their
:owudl -> would
woudl -> would
peaple -> people
peolpe -> people
peopel -> people
poeple -> people
poeople -> people
:hwihc -> which
whcih -> which
whihc -> which
whlch -> which
wihch -> which
coudl -> could
:htikn -> think
:htink -> think
thikn -> think
thiunk -> think
tihkn -> think
:olther -> other
:otehr -> other
baceause -> because
beacuse -> because
becasue -> because
beccause -> because
becouse -> because
becuase -> because
theese -> these
:goign -> going
:gonig -> going
:yaers -> years
:yeasr -> years
:thsoe -> those
shoudl -> should
raelly -> really
realy -> really
relaly -> really
bedore -> before
befoer -> before
littel -> little
beeing -> being
:hwile -> while
aroud -> around
arround -> around
arund -> around
thign -> thing
thigsn -> things
thnigs -> things
anohter -> another
beteen -> between
beween -> between
bewteen -> between
:eveyr -> every
:graet -> great
:agian -> again
:sicne -> since
alwasy -> always
alwyas -> always
throught -> thought
almsot -> almost
alomst -> almost
chnage -> change
chekc -> check
childen -> children
claer -> clear
comapny -> company
contian -> contain
elasped -> elapsed
fitler -> filter
firts -> first
follwo -> follow
:foudn -> found
frequecy -> frequency
firend -> friend
freind -> friend
heigth -> height
iamge -> image
inital -> initial
intput -> input
laguage -> language
lenght -> length
levle -> level
libary -> library
:moeny -> money
mysefl -> myself
ouptut -> output
ouput -> output
probaly -> probably
probelm -> problem
recrod -> record
reponse -> response
reprot -> report
singel -> single
stregth -> strength
strengh -> strength
tkaes -> takes
therfore -> therefore
todya -> today
toghether -> together
unkown -> unknown
unqiue -> unique
widht -> width
## Catch skipped spaces between common words.
:alot: -> a lot
:andteh -> and the
:andthe -> and the
:asthe -> as the
:atthe -> at the
abouta -> about a
aboutit -> about it
aboutthe -> about the
:tothe -> to the
didnot -> did not
fromthe -> from the
:the:the: -> the
## Various additional entries.
:agred -> agreed
:ajust -> adjust
:anual -> annual
:asign -> assign
:casue -> cause
:gaurd -> guard
:haev -> have
:idaes -> ideas
:jsut: -> just
:jstu: -> just
:knwo -> know
:konw -> know
:kwno -> know

View File

@ -0,0 +1,352 @@
"""Python program to make autocorrection_data.h.
This program reads "autocorrection_dict.txt" from the current directory and
generates a C source file "autocorrection_data.h" with a serialized trie
embedded as an array. Run this program without arguments like
$ python3 make_autocorrection_data.py
Or specify a dict file as the first argument like
$ python3 make_autocorrection_data.py mykeymap/dict.txt
The output is written to "autocorrection_data.h" in the same directory as the
dictionary. Or optionally specify the output .h file as well like
$ python3 make_autocorrection_data.py dict.txt somewhere/out.h
Each line of the dict file defines one typo and its correction with the syntax
"typo -> correction". Blank lines or lines starting with '#' are ignored.
Example:
:thier -> their
dosen't -> doesn't
fitler -> filter
lenght -> length
ouput -> output
widht -> width
See autocorrection_dict_extra.txt for a larger example.
For full documentation, see
https://getreuer.info/posts/keyboards/autocorrection
"""
import os.path
import sys
import textwrap
from typing import Any, Dict, Iterator, List, Tuple
try:
from english_words import english_words_lower_alpha_set as CORRECT_WORDS
except ImportError:
print(
"Autocorrection will falsely trigger when a typo is a substring of a "
"correctly spelled word. To check for this, install the english_words "
"package and rerun this script:\n\n pip install english_words\n"
)
# Use a minimal word list as a fallback.
CORRECT_WORDS = (
"apparent",
"association",
"available",
"classification",
"effect",
"entertainment",
"fantastic",
"information",
"integrate",
"international",
"language",
"loosest",
"manual",
"nothing",
"provides",
"reference",
"statehood",
"technology",
"virtually",
"wealthier",
"wonderful",
)
KC_A = 4
KC_SPC = 0x2C
KC_QUOT = 0x34
TYPO_CHARS = dict(
[
("'", KC_QUOT),
(":", KC_SPC), # "Word break" character.
]
+
# Characters a-z.
[(chr(c), c + KC_A - ord("a")) for c in range(ord("a"), ord("z") + 1)]
)
def parse_file(file_name: str) -> List[Tuple[str, str]]:
"""Parses autocorrections dictionary file.
Each line of the file defines one typo and its correction with the syntax
"typo -> correction". Blank lines or lines starting with '#' are ignored. The
function validates that typos only have characters in TYPO_CHARS, that
typos are not substrings of other typos, and checking that typos don't trigger
on CORRECT_WORDS.
Args:
file_name: String, path of the autocorrections dictionary.
Returns:
List of (typo, correction) tuples.
"""
autocorrections = []
typos = set()
for line_number, typo, correction in parse_file_lines(file_name):
if typo in typos:
print(f'Warning:{line_number}: Ignoring duplicate typo: "{typo}"')
continue
# Check that `typo` is valid.
if not (all([c in TYPO_CHARS for c in typo])):
print(
f'Error:{line_number}: Typo "{typo}" has '
"characters other than " + "".join(TYPO_CHARS.keys())
)
sys.exit(1)
for other_typo in typos:
if typo in other_typo or other_typo in typo:
print(
f"Error:{line_number}: Typos may not be substrings of one "
f"another, otherwise the longer typo would never trigger: "
f'"{typo}" vs. "{other_typo}".'
)
sys.exit(1)
if len(typo) < 5:
print(
f"Warning:{line_number}: It is suggested that typos are at "
f'least 5 characters long to avoid false triggers: "{typo}"'
)
check_typo_against_dictionary(line_number, typo)
autocorrections.append((typo, correction))
typos.add(typo)
return autocorrections
def make_trie(autocorrections: List[Tuple[str, str]]) -> Dict[str, Any]:
"""Makes a trie from the the typos, writing in reverse.
Args:
autocorrections: List of (typo, correction) tuples.
Returns:
Dict of dict, representing the trie.
"""
trie = {}
for typo, correction in autocorrections:
node = trie
for letter in typo[::-1]:
node = node.setdefault(letter, {})
node["LEAF"] = (typo, correction)
return trie
def parse_file_lines(file_name: str) -> Iterator[Tuple[int, str, str]]:
"""Parses lines read from `file_name` into typo-correction pairs."""
line_number = 0
for line in open(file_name, "rt"):
line_number += 1
line = line.strip()
if line and line[0] != "#":
# Parse syntax "typo -> correction", using strip to ignore indenting.
tokens = [token.strip() for token in line.split("->", 1)]
if len(tokens) != 2 or not tokens[0]:
print(f'Error:{line_number}: Invalid syntax: "{line}"')
sys.exit(1)
typo, correction = tokens
typo = typo.lower() # Force typos to lowercase.
typo = typo.replace(" ", ":")
yield line_number, typo, correction
def check_typo_against_dictionary(line_number: int, typo: str) -> None:
"""Checks `typo` against English dictionary words."""
if typo.startswith(":") and typo.endswith(":"):
if typo[1:-1] in CORRECT_WORDS:
print(
f'Warning:{line_number}: Typo "{typo}" is a correctly spelled '
"dictionary word."
)
elif typo.startswith(":") and not typo.endswith(":"):
for word in CORRECT_WORDS:
if word.startswith(typo[1:]):
print(
f'Warning:{line_number}: Typo "{typo}" would falsely trigger '
f'on correctly spelled word "{word}".'
)
elif not typo.startswith(":") and typo.endswith(":"):
for word in CORRECT_WORDS:
if word.endswith(typo[:-1]):
print(
f'Warning:{line_number}: Typo "{typo}" would falsely trigger '
f'on correctly spelled word "{word}".'
)
elif not typo.startswith(":") and not typo.endswith(":"):
for word in CORRECT_WORDS:
if typo in word:
print(
f'Warning:{line_number}: Typo "{typo}" would falsely trigger '
f'on correctly spelled word "{word}".'
)
def serialize_trie(
autocorrections: List[Tuple[str, str]], trie: Dict[str, Any]
) -> List[int]:
"""Serializes trie and correction data in a form readable by the C code.
Args:
autocorrections: List of (typo, correction) tuples.
trie: Dict of dicts.
Returns:
List of ints in the range 0-255.
"""
table = []
# Traverse trie in depth first order.
def traverse(trie_node: Dict[str, Any]) -> Dict[str, Any]:
if "LEAF" in trie_node: # Handle a leaf trie node.
typo, correction = trie_node["LEAF"]
word_boundary_ending = typo[-1] == ":"
typo = typo.strip(":")
i = 0 # Make the autocorrection data for this entry and serialize it.
while i < min(len(typo), len(correction)) and typo[i] == correction[i]:
i += 1
backspaces = len(typo) - i - 1 + word_boundary_ending
assert 0 <= backspaces <= 63
correction = correction[i:]
data = [backspaces + 128] + list(bytes(correction, "ascii")) + [0]
entry = {"data": data, "links": [], "byte_offset": 0}
table.append(entry)
elif len(trie_node) == 1: # Handle trie node with a single child.
c, trie_node = next(iter(trie_node.items()))
entry = {"chars": c, "byte_offset": 0}
# It's common for a trie to have long chains of single-child nodes. We
# find the whole chain so that we can serialize it more efficiently.
while len(trie_node) == 1 and "LEAF" not in trie_node:
c, trie_node = next(iter(trie_node.items()))
entry["chars"] += c
table.append(entry)
entry["links"] = [traverse(trie_node)]
else: # Handle trie node with multiple children.
entry = {"chars": "".join(sorted(trie_node.keys())), "byte_offset": 0}
table.append(entry)
entry["links"] = [traverse(trie_node[c]) for c in entry["chars"]]
return entry
traverse(trie)
def serialize(e: Dict[str, Any]) -> List[int]:
if not e["links"]: # Handle a leaf table entry.
return e["data"]
elif len(e["links"]) == 1: # Handle a chain table entry.
return [TYPO_CHARS[c] for c in e["chars"]] + [0]
else: # Handle a branch table entry.
data = []
for c, link in zip(e["chars"], e["links"]):
data += [TYPO_CHARS[c] | (0 if data else 64)] + encode_link(link)
return data + [0]
byte_offset = 0
for e in table: # To encode links, first compute byte offset of each entry.
e["byte_offset"] = byte_offset
byte_offset += len(serialize(e))
return [b for e in table for b in serialize(e)] # Serialize final table.
def encode_link(link: Dict[str, Any]) -> List[int]:
"""Encodes a node link as two bytes."""
byte_offset = link["byte_offset"]
if not (0 <= byte_offset <= 0xFFFF):
print(
"Error: The autocorrection table is too large, a node link exceeds "
"64KB limit. Try reducing the autocorrection dict to fewer entries."
)
sys.exit(1)
return [byte_offset & 255, byte_offset >> 8]
def write_generated_code(
autocorrections: List[Tuple[str, str]], data: List[int], file_name: str
) -> None:
"""Writes autocorrection data as generated C code to `file_name`.
Args:
autocorrections: List of (typo, correction) tuples.
data: List of ints in 0-255, the serialized trie.
file_name: String, path of the output C file.
"""
assert all(0 <= b <= 255 for b in data)
def typo_len(e: Tuple[str, str]) -> int:
return len(e[0])
min_typo = min(autocorrections, key=typo_len)[0]
max_typo = max(autocorrections, key=typo_len)[0]
generated_code = "".join(
[
"// Generated code.\n\n",
f"// Autocorrection dictionary ({len(autocorrections)} entries):\n",
"".join(
sorted(
f"// {typo:<{len(max_typo)}} -> {correction}\n"
for typo, correction in autocorrections
)
),
f'\n#define AUTOCORRECTION_MIN_LENGTH {len(min_typo)} // "{min_typo}"\n',
f'#define AUTOCORRECTION_MAX_LENGTH {len(max_typo)} // "{max_typo}"\n\n',
textwrap.fill(
"static const uint8_t autocorrection_data[%d] PROGMEM = {%s};"
% (len(data), ", ".join(map(str, data))),
width=80,
subsequent_indent=" ",
),
"\n\n",
]
)
with open(file_name, "wt") as f:
f.write(generated_code)
def get_default_h_file(dict_file: str) -> str:
return os.path.join(os.path.dirname(dict_file), "autocorrect_data.h")
def main(argv):
dict_file = argv[1] if len(argv) > 1 else "dict.txt"
h_file = argv[2] if len(argv) > 2 else get_default_h_file(dict_file)
autocorrections = parse_file(dict_file)
trie = make_trie(autocorrections)
data = serialize_trie(autocorrections, trie)
print(
f"Processed %d autocorrection entries to table with %d bytes."
% (len(autocorrections), len(data))
)
write_generated_code(autocorrections, data, h_file)
if __name__ == "__main__":
main(sys.argv)

View File

@ -8,7 +8,9 @@
// Enable spellcheck over HID // Enable spellcheck over HID
#define ENABLE_HID_SPELLCHECK //#define ENABLE_HID_SPELLCHECK
#define ENABLE_AUTOCORRECT
// RGBLight effects // RGBLight effects
// Static color is always enabled. // Static color is always enabled.

View File

@ -7,6 +7,10 @@
#include "features/beta_rawhid.h" #include "features/beta_rawhid.h"
#ifdef ENABLE_AUTOCORRECT
#include "features/autocorrect/autocorrect.h"
#endif
// Values that should not be saved to git. // Values that should not be saved to git.
// Create a `secrets.h` in the keymap directory. // Create a `secrets.h` in the keymap directory.
// //
@ -27,7 +31,6 @@ bool send_special_character(uint16_t keycode) {
return true; return true;
} }
LEADER_EXTERNS(); LEADER_EXTERNS();
void leader_start(void) { ergodox_right_led_3_on(); } void leader_start(void) { ergodox_right_led_3_on(); }
void leader_end(void) { ergodox_right_led_3_off(); } void leader_end(void) { ergodox_right_led_3_off(); }
@ -67,6 +70,10 @@ bool process_record_user(uint16_t keycode, keyrecord_t *record) {
if (!process_spellcheck(keycode, record)) { return false; } if (!process_spellcheck(keycode, record)) { return false; }
#endif #endif
#ifdef ENABLE_AUTOCORRECT
if (!process_autocorrection(keycode, record)) { return false; }
#endif
// Handle special chars // Handle special chars
if (record->event.pressed) { if (record->event.pressed) {
if (!send_special_character(keycode)) { return false; } if (!send_special_character(keycode)) { return false; }