From f4a7960eedc7c6da393797b30d1083aae044180d Mon Sep 17 00:00:00 2001 From: Mark Date: Sun, 20 Nov 2022 20:45:36 -0800 Subject: [PATCH] Added autocorrect feature --- .../features/autocorrect/autocorrect.c | 184 +++++++++ .../features/autocorrect/autocorrect.h | 111 ++++++ .../features/autocorrect/autocorrect_data.h | 238 ++++++++++++ .../features/autocorrect/dict.txt | 148 ++++++++ .../features/autocorrect/gen_autocorrect.py | 352 ++++++++++++++++++ .../betalupi_ergodox/keymaps/default/config.h | 4 +- .../betalupi_ergodox/keymaps/default/keymap.c | 9 +- 7 files changed, 1044 insertions(+), 2 deletions(-) create mode 100644 keyboards/betalupi_ergodox/features/autocorrect/autocorrect.c create mode 100644 keyboards/betalupi_ergodox/features/autocorrect/autocorrect.h create mode 100644 keyboards/betalupi_ergodox/features/autocorrect/autocorrect_data.h create mode 100644 keyboards/betalupi_ergodox/features/autocorrect/dict.txt create mode 100644 keyboards/betalupi_ergodox/features/autocorrect/gen_autocorrect.py diff --git a/keyboards/betalupi_ergodox/features/autocorrect/autocorrect.c b/keyboards/betalupi_ergodox/features/autocorrect/autocorrect.c new file mode 100644 index 0000000..45a16ab --- /dev/null +++ b/keyboards/betalupi_ergodox/features/autocorrect/autocorrect.c @@ -0,0 +1,184 @@ +#include "autocorrect.h" +#include +#include "autocorrect_data.h" + +#if AUTOCORRECTION_MIN_LENGTH < 4 +// Odd output or hard locks on the board have been observed when the min typo +// length is 3 or lower (https://github.com/getreuer/qmk-keymap/issues/2). +// Additionally, autocorrection entries for short typos are more likely to false +// trigger, so it is suggested that typos be at least 5 characters. +#error "Min typo length is less than 4. Autocorrection may behave poorly." +#endif + +bool process_autocorrection(uint16_t keycode, keyrecord_t* record) { + static uint8_t typo_buffer[AUTOCORRECTION_MAX_LENGTH] = {0}; + static uint8_t typo_buffer_size = 0; + + // Ignore key release; we only process key presses. + if (!record->event.pressed) { + return true; + } + + #ifndef NO_ACTION_ONESHOT + const uint8_t mods = get_mods() | get_oneshot_mods(); + #else + const uint8_t mods = get_mods(); + #endif + + // Disable autocorrection while a mod other than shift is active. + if ((mods & ~MOD_MASK_SHIFT) != 0) { + typo_buffer_size = 0; + return true; + } + + // The following switch cases address various kinds of keycodes. This logic is + // split over two switches rather than merged into one. The first switch may + // extract a basic keycode which is then further handled by the second switch, + // e.g. a layer-tap key with Caps Lock `LT(layer, KC_CAPS)`. + switch (keycode) { + #ifndef NO_ACTION_TAPPING + case QK_MOD_TAP ... QK_MOD_TAP_MAX: // Tap-hold keys. + #ifndef NO_ACTION_LAYER + case QK_LAYER_TAP ... QK_LAYER_TAP_MAX: + #endif + + if (record->tap.count == 0) { + return true; + } + // Otherwise when tapped, get the basic keycode. + // Fallthrough intended. + #endif + + // Handle shifted keys, e.g. symbols like KC_EXLM = S(KC_1). + case QK_LSFT ... QK_LSFT + 255: + case QK_RSFT ... QK_RSFT + 255: + keycode &= 0xff; // Get the basic keycode. + break; + + // NOTE: Space Cadet keys expose no info to check whether they are being + // tapped vs. held. This makes autocorrection ambiguous, e.g. KC_LCPO + // might be '(', which we would treat as a word break, or it might be + // shift, which we would treat as having no effect. To behave cautiously, + // we allow Space Cadet keycodes to fall to the logic below and clear + // autocorrection state. + } + + switch (keycode) { + // Ignore shifts, Caps Lock, one-shot mods, and layer switch keys. + case KC_NO: + case KC_LSFT: + case KC_RSFT: + case KC_CAPS: + case QK_ONE_SHOT_MOD ... QK_ONE_SHOT_MOD_MAX: + case QK_TO ... QK_TO_MAX: + case QK_MOMENTARY ... QK_MOMENTARY_MAX: + case QK_DEF_LAYER ... QK_DEF_LAYER_MAX: + case QK_TOGGLE_LAYER ... QK_TOGGLE_LAYER_MAX: + case QK_ONE_SHOT_LAYER ... QK_ONE_SHOT_LAYER_MAX: + case QK_LAYER_TAP_TOGGLE ... QK_LAYER_TAP_TOGGLE_MAX: + case QK_LAYER_MOD ... QK_LAYER_MOD_MAX: + return true; // Ignore these keys. + } + + if (keycode == KC_QUOT) { + // Treat " (shifted ') as a word boundary. + if ((mods & MOD_MASK_SHIFT) != 0) { + keycode = KC_SPC; + } + } else if (!(KC_A <= keycode && keycode <= KC_Z)) { + if (keycode == KC_BSPC) { + // Remove last character from the buffer. + if (typo_buffer_size > 0) { + --typo_buffer_size; + } + return true; + } else if (KC_1 <= keycode && keycode <= KC_SLSH && keycode != KC_ESC) { + // Set a word boundary if space, period, digit, etc. is pressed. + // Behave more conservatively for the enter key. Reset, so that enter + // can't be used on a word ending. + if (keycode == KC_ENT) { + typo_buffer_size = 0; + } + keycode = KC_SPC; + } else { + // Clear state if some other non-alpha key is pressed. + typo_buffer_size = 0; + return true; + } + } + + // If the buffer is full, rotate it to discard the oldest character. + if (typo_buffer_size >= AUTOCORRECTION_MAX_LENGTH) { + memmove(typo_buffer, typo_buffer + 1, AUTOCORRECTION_MAX_LENGTH - 1); + typo_buffer_size = AUTOCORRECTION_MAX_LENGTH - 1; + } + + // Append `keycode` to the buffer. + // NOTE: `keycode` must be a basic keycode (0-255) by this point. + typo_buffer[typo_buffer_size++] = (uint8_t)keycode; + // Early return if not many characters have been buffered so far. + if (typo_buffer_size < AUTOCORRECTION_MIN_LENGTH) { + return true; + } + + // Check whether the buffer ends in a typo. This is done using a trie + // stored in `autocorrection_data`. + uint16_t state = 0; + uint8_t code = pgm_read_byte(autocorrection_data + state); + for (int i = typo_buffer_size - 1; i >= 0; --i) { + const uint8_t key_i = typo_buffer[i]; + + if (code & 64) { // Check for match in node with multiple children. + code &= 63; + for (; code != key_i; code = pgm_read_byte(autocorrection_data + (state += 3))) { + if (!code) { + return true; + } + } + + // Follow link to child node. + state = (uint16_t)( + (uint_fast16_t) pgm_read_byte( + autocorrection_data + state + 1 + ) | + (uint_fast16_t) pgm_read_byte( + autocorrection_data + state + 2 + ) << 8 + ); + + // Otherwise check for match in node with a single child. + } else if (code != key_i) { + return true; + } else if (!(code = pgm_read_byte(autocorrection_data + (++state)))) { + ++state; + } + + // Stop if `state` becomes an invalid index. This should not normally + // happen, it is a safeguard in case of a bug, data corruption, etc. + if (state >= sizeof(autocorrection_data)) { + return true; + } + + // Read first byte of the next node. + code = pgm_read_byte(autocorrection_data + state); + + if (code & 128) { // A typo was found! Apply autocorrection. + const int backspaces = code & 63; + for (int i = 0; i < backspaces; ++i) { + tap_code(KC_BSPC); + } + send_string_P((char const*)(autocorrection_data + state + 1)); + + if (keycode == KC_SPC) { + typo_buffer[0] = KC_SPC; + typo_buffer_size = 1; + return true; + } else { + typo_buffer_size = 0; + return false; + } + } + } + + return true; +} diff --git a/keyboards/betalupi_ergodox/features/autocorrect/autocorrect.h b/keyboards/betalupi_ergodox/features/autocorrect/autocorrect.h new file mode 100644 index 0000000..184b2e4 --- /dev/null +++ b/keyboards/betalupi_ergodox/features/autocorrect/autocorrect.h @@ -0,0 +1,111 @@ +// Copyright 2021-2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * @file autocorrection.h + * @brief Autocorrection on your keyboard. + * + * Overview + * -------- + * + * Some words are more prone to typos than others. This userspace QMK library + * implements rudimentary autocorrection, automatically detecting and fixing + * some misspellings. + * + * Features: + * + * * It runs on your keyboard, so it is always active no matter what software. + * * Low resource cost. + * * It is case insensitive. + * * It works within words, useful for programming to catch typos within longer + * identifiers. + * + * Limitations: + * + * * It is limited to alphabet characters a–z, apostrophes ', and word breaks. + * I'm sorry this probably isn't useful for languages besides English. + * * It does not follow mouse or hotkey driven cursor movement. + * + * Changing the autocorrection dictionary + * -------------------------------------- + * + * The file autocorrection_data.h encodes the typos to correct. While you could + * simply use the version of this file provided above for a practical + * configuration, you can make your own to personalize the autocorrection to + * your most troublesome typos: + * + * Step 1: First, create an autocorrection dictionary autocorrection_dict.txt, + * in a form like + * + * :thier -> their + * dosen't -> doesn't + * fitler -> filter + * ouput -> output + * widht -> width + * + * For a practical 71-entry example, see autocorrection_dict.txt. And for a yet + * larger 400-entry example, see autocorrection_dict_extra.txt. + * + * The syntax is `typo -> correction`. Typos and corrections are case + * insensitive, and any whitespace before or after the typo and correction is + * ignored. The typo must be only the characters a-z, ', or the special + * character : representing a word break. The correction may have just about any + * printable ASCII characters. + * + * Step 2: Use the make_autocorrection_data.py Python script to process the + * dictionary. Put autocorrection_dict.txt in the same directory as the Python + * script and run + * + * $ python3 make_autocorrection_data.py + * Processed 71 autocorrection entries to table with 1120 bytes. + * + * The script arranges the entries in autocorrection_dict.txt into a trie and + * generates autocorrection_data.h with the serialized trie embedded as an + * array. The .h file will be written in the same directory. + * + * Step 3: Finally, recompile and flash your keymap. + * + * For full documentation, see + * + * + * @author Pascal Getreuer + */ + +#pragma once + +#include "quantum.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Handler function for autocorrection. + * + * Call this function in keymap.c from `process_record_user()` like + * + * #include "features/autocorrection.h" + * + * bool process_record_user(uint16_t keycode, keyrecord_t* record) { + * if (!process_autocorrection(keycode, record)) { return false; } + * // Your macros... + * + * return true; + * } + */ +bool process_autocorrection(uint16_t keycode, keyrecord_t* record); + +#ifdef __cplusplus +} +#endif diff --git a/keyboards/betalupi_ergodox/features/autocorrect/autocorrect_data.h b/keyboards/betalupi_ergodox/features/autocorrect/autocorrect_data.h new file mode 100644 index 0000000..17f1396 --- /dev/null +++ b/keyboards/betalupi_ergodox/features/autocorrect/autocorrect_data.h @@ -0,0 +1,238 @@ +// Generated code. + +// Autocorrection dictionary (132 entries): +// :agian -> again +// :agred -> agreed +// :ajust -> adjust +// :alot: -> a lot +// :andteh -> and the +// :andthe -> and the +// :anual -> annual +// :asign -> assign +// :asthe -> as the +// :atthe -> at the +// :casue -> cause +// :eveyr -> every +// :foudn -> found +// :gaurd -> guard +// :goign -> going +// :gonig -> going +// :graet -> great +// :haev -> have +// :htere -> there +// :htikn -> think +// :htink -> think +// :hwihc -> which +// :hwile -> while +// :idaes -> ideas +// :jstu: -> just +// :jsut: -> just +// :knwo -> know +// :konw -> know +// :kwno -> know +// :moeny -> money +// :olther -> other +// :otehr -> other +// :owudl -> would +// :sicne -> since +// :the:the: -> the +// :theri -> their +// :thier -> their +// :thsoe -> those +// :tothe -> to the +// :yaers -> years +// :yeasr -> years +// abbout -> about +// abotu -> about +// abouta -> about a +// aboutit -> about it +// aboutthe -> about the +// almsot -> almost +// alomst -> almost +// alwasy -> always +// alwyas -> always +// anohter -> another +// aroud -> around +// arround -> around +// arund -> around +// baceause -> because +// baout -> about +// beacuse -> because +// becasue -> because +// beccause -> because +// becouse -> because +// becuase -> because +// bedore -> before +// beeing -> being +// befoer -> before +// beteen -> between +// beween -> between +// bewteen -> between +// chekc -> check +// childen -> children +// chnage -> change +// claer -> clear +// comapny -> company +// contian -> contain +// coudl -> could +// didnot -> did not +// elasped -> elapsed +// firend -> friend +// firts -> first +// fitler -> filter +// follwo -> follow +// freind -> friend +// frequecy -> frequency +// fromthe -> from the +// heigth -> height +// iamge -> image +// inital -> initial +// intput -> input +// laguage -> language +// lenght -> length +// levle -> level +// libary -> library +// littel -> little +// mysefl -> myself +// ouptut -> output +// ouput -> output +// peaple -> people +// peolpe -> people +// peopel -> people +// poeople -> people +// poeple -> people +// probaly -> probably +// probelm -> problem +// raelly -> really +// realy -> really +// recrod -> record +// relaly -> really +// reponse -> response +// reprot -> report +// shoudl -> should +// singel -> single +// stregth -> strength +// strengh -> strength +// theese -> these +// therfore -> therefore +// thign -> thing +// thigsn -> things +// thikn -> think +// thiunk -> think +// thnigs -> things +// throught -> thought +// tihkn -> think +// tkaes -> takes +// todya -> today +// toghether -> together +// unkown -> unknown +// unqiue -> unique +// whcih -> which +// whihc -> which +// whlch -> which +// widht -> width +// wihch -> which +// woudl -> would + +#define AUTOCORRECTION_MIN_LENGTH 5 // "abotu" +#define AUTOCORRECTION_MAX_LENGTH 9 // "toghether" + +static const uint8_t autocorrection_data[1881] PROGMEM = {108, 61, 0, 4, 117, 0, + 6, 141, 0, 7, 181, 0, 8, 52, 1, 10, 237, 2, 11, 8, 3, 12, 105, 3, 14, 115, 3, + 15, 143, 3, 16, 12, 4, 17, 24, 4, 18, 234, 4, 21, 16, 5, 22, 152, 5, 23, 233, + 5, 24, 190, 6, 25, 199, 6, 26, 208, 6, 28, 218, 6, 0, 72, 71, 0, 23, 81, 0, + 24, 107, 0, 0, 11, 23, 44, 8, 11, 23, 44, 0, 132, 0, 82, 88, 0, 24, 98, 0, 0, + 15, 4, 44, 0, 131, 32, 108, 111, 116, 0, 22, 13, 44, 0, 131, 117, 115, 116, 0, + 23, 22, 13, 44, 0, 131, 117, 115, 116, 0, 87, 124, 0, 28, 133, 0, 0, 24, 18, + 5, 4, 0, 128, 32, 97, 0, 7, 18, 23, 0, 129, 97, 121, 0, 75, 148, 0, 14, 173, + 0, 0, 12, 0, 75, 157, 0, 26, 163, 0, 0, 26, 0, 129, 99, 104, 0, 11, 44, 0, + 132, 119, 104, 105, 99, 104, 0, 8, 11, 6, 0, 129, 99, 107, 0, 72, 197, 0, 17, + 223, 0, 18, 23, 1, 21, 33, 1, 24, 44, 1, 0, 83, 204, 0, 21, 215, 0, 0, 22, 4, + 15, 8, 0, 131, 112, 115, 101, 100, 0, 10, 4, 44, 0, 128, 101, 100, 0, 72, 233, + 0, 12, 244, 0, 24, 254, 0, 0, 21, 12, 9, 0, 132, 114, 105, 101, 110, 100, 0, + 8, 21, 9, 0, 131, 105, 101, 110, 100, 0, 82, 5, 1, 21, 15, 1, 0, 21, 21, 4, 0, + 132, 111, 117, 110, 100, 0, 4, 0, 130, 111, 117, 110, 100, 0, 21, 6, 8, 21, 0, + 130, 111, 114, 100, 0, 24, 4, 10, 44, 0, 131, 117, 97, 114, 100, 0, 18, 21, 4, + 0, 128, 110, 100, 0, 74, 80, 1, 11, 124, 1, 15, 205, 1, 17, 17, 2, 18, 27, 2, + 19, 37, 2, 21, 47, 2, 22, 93, 2, 24, 198, 2, 0, 68, 87, 1, 16, 115, 1, 0, 81, + 94, 1, 24, 103, 1, 0, 11, 6, 0, 131, 97, 110, 103, 101, 0, 10, 4, 15, 0, 132, + 110, 103, 117, 97, 103, 101, 0, 4, 12, 0, 131, 109, 97, 103, 101, 0, 23, 0, + 71, 142, 1, 16, 152, 1, 18, 162, 1, 22, 171, 1, 23, 180, 1, 0, 17, 4, 44, 0, + 130, 32, 116, 104, 101, 0, 18, 21, 9, 0, 130, 32, 116, 104, 101, 0, 23, 44, 0, + 130, 32, 116, 104, 101, 0, 4, 44, 0, 130, 32, 116, 104, 101, 0, 68, 187, 1, + 24, 195, 1, 0, 44, 0, 130, 32, 116, 104, 101, 0, 18, 5, 4, 0, 130, 32, 116, + 104, 101, 0, 76, 215, 1, 19, 226, 1, 25, 10, 2, 0, 26, 11, 44, 0, 132, 119, + 104, 105, 108, 101, 0, 68, 236, 1, 8, 245, 1, 18, 255, 1, 0, 8, 19, 0, 131, + 111, 112, 108, 101, 0, 18, 19, 0, 132, 101, 111, 112, 108, 101, 0, 8, 18, 19, + 0, 133, 101, 111, 112, 108, 101, 0, 8, 15, 0, 129, 101, 108, 0, 6, 12, 22, 44, + 0, 130, 110, 99, 101, 0, 22, 11, 23, 44, 0, 130, 111, 115, 101, 0, 15, 18, 8, + 19, 0, 130, 112, 108, 101, 0, 72, 54, 2, 18, 65, 2, 0, 23, 11, 44, 0, 132, + 116, 104, 101, 114, 101, 0, 71, 72, 2, 9, 81, 2, 0, 8, 5, 0, 131, 102, 111, + 114, 101, 0, 21, 8, 11, 23, 0, 131, 101, 102, 111, 114, 101, 0, 68, 106, 2, 8, + 117, 2, 17, 125, 2, 24, 138, 2, 0, 24, 6, 8, 5, 0, 131, 97, 117, 115, 101, 0, + 8, 11, 23, 0, 130, 115, 101, 0, 18, 19, 8, 21, 0, 132, 115, 112, 111, 110, + 115, 101, 0, 68, 148, 2, 6, 177, 2, 18, 188, 2, 0, 70, 155, 2, 8, 165, 2, 0, + 6, 8, 5, 0, 132, 97, 117, 115, 101, 0, 6, 4, 5, 0, 134, 101, 99, 97, 117, 115, + 101, 0, 4, 8, 5, 0, 132, 99, 97, 117, 115, 101, 0, 6, 8, 5, 0, 131, 97, 117, + 115, 101, 0, 76, 205, 2, 22, 215, 2, 0, 20, 17, 24, 0, 131, 105, 113, 117, + 101, 0, 4, 6, 0, 108, 225, 2, 8, 230, 2, 0, 130, 117, 115, 101, 0, 5, 0, 130, + 117, 115, 101, 0, 76, 244, 2, 17, 254, 2, 0, 17, 18, 10, 44, 0, 130, 105, 110, + 103, 0, 12, 8, 8, 5, 0, 131, 105, 110, 103, 0, 70, 24, 3, 8, 48, 3, 10, 60, 3, + 12, 70, 3, 23, 79, 3, 0, 75, 31, 3, 15, 40, 3, 0, 12, 26, 0, 131, 104, 105, + 99, 104, 0, 11, 26, 0, 130, 105, 99, 104, 0, 23, 7, 17, 4, 44, 0, 130, 32, + 116, 104, 101, 0, 17, 8, 21, 23, 22, 0, 128, 116, 104, 0, 6, 11, 26, 0, 130, + 105, 99, 104, 0, 10, 0, 72, 88, 3, 12, 98, 3, 0, 21, 23, 22, 0, 130, 110, 103, + 116, 104, 0, 8, 11, 0, 129, 104, 116, 0, 21, 8, 11, 23, 44, 0, 129, 105, 114, + 0, 17, 0, 76, 124, 3, 24, 135, 3, 0, 23, 11, 44, 0, 132, 116, 104, 105, 110, + 107, 0, 12, 11, 23, 0, 130, 110, 107, 0, 68, 156, 3, 7, 182, 3, 8, 225, 3, 9, + 3, 4, 0, 87, 163, 3, 24, 172, 3, 0, 12, 17, 12, 0, 129, 105, 97, 108, 0, 17, + 4, 44, 0, 130, 110, 117, 97, 108, 0, 24, 0, 82, 191, 3, 26, 215, 3, 0, 70, + 201, 3, 11, 205, 3, 26, 211, 3, 0, 129, 108, 100, 0, 22, 0, 129, 108, 100, 0, + 129, 108, 100, 0, 18, 44, 0, 132, 119, 111, 117, 108, 100, 0, 74, 235, 3, 19, + 243, 3, 23, 251, 3, 0, 17, 12, 22, 0, 129, 108, 101, 0, 18, 8, 19, 0, 129, + 108, 101, 0, 23, 12, 15, 0, 129, 108, 101, 0, 8, 22, 28, 16, 0, 129, 108, 102, + 0, 15, 8, 5, 18, 21, 19, 0, 130, 108, 101, 109, 0, 68, 46, 4, 7, 72, 4, 8, 81, + 4, 10, 140, 4, 14, 174, 4, 22, 213, 4, 26, 223, 4, 0, 12, 0, 74, 55, 4, 23, + 63, 4, 0, 4, 44, 0, 130, 97, 105, 110, 0, 17, 18, 6, 0, 130, 97, 105, 110, 0, + 24, 18, 9, 44, 0, 129, 110, 100, 0, 71, 88, 4, 8, 98, 4, 0, 15, 12, 11, 6, 0, + 129, 114, 101, 110, 0, 87, 105, 4, 26, 130, 4, 0, 72, 112, 4, 26, 120, 4, 0, + 5, 0, 130, 119, 101, 101, 110, 0, 8, 5, 0, 132, 116, 119, 101, 101, 110, 0, 8, + 5, 0, 131, 116, 119, 101, 101, 110, 0, 12, 0, 75, 152, 4, 18, 158, 4, 22, 165, + 4, 0, 23, 0, 129, 110, 103, 0, 10, 44, 0, 129, 110, 103, 0, 4, 44, 0, 130, + 115, 105, 103, 110, 0, 75, 181, 4, 12, 190, 4, 0, 12, 23, 0, 131, 104, 105, + 110, 107, 0, 75, 197, 4, 23, 203, 4, 0, 23, 0, 129, 110, 107, 0, 11, 44, 0, + 132, 116, 104, 105, 110, 107, 0, 10, 12, 11, 23, 0, 130, 110, 103, 115, 0, 18, + 14, 17, 24, 0, 130, 110, 111, 119, 110, 0, 81, 241, 4, 26, 250, 4, 0, 26, 14, + 44, 0, 130, 110, 111, 119, 0, 79, 1, 5, 17, 9, 5, 0, 15, 18, 9, 0, 129, 111, + 119, 0, 14, 44, 0, 129, 111, 119, 0, 72, 29, 5, 11, 124, 5, 22, 134, 5, 28, + 143, 5, 0, 68, 48, 5, 11, 56, 5, 12, 86, 5, 15, 95, 5, 18, 105, 5, 23, 113, 5, + 0, 15, 6, 0, 130, 101, 97, 114, 0, 23, 0, 72, 65, 5, 15, 77, 5, 0, 11, 10, 18, + 23, 0, 133, 101, 116, 104, 101, 114, 0, 18, 44, 0, 132, 116, 104, 101, 114, 0, + 11, 23, 44, 0, 130, 101, 105, 114, 0, 23, 12, 9, 0, 131, 108, 116, 101, 114, + 0, 9, 8, 5, 0, 129, 114, 101, 0, 11, 18, 17, 4, 0, 131, 116, 104, 101, 114, 0, + 8, 23, 18, 44, 0, 130, 104, 101, 114, 0, 4, 8, 28, 44, 0, 129, 114, 115, 0, 8, + 25, 8, 44, 0, 129, 114, 121, 0, 68, 168, 5, 8, 178, 5, 10, 203, 5, 21, 214, 5, + 23, 225, 5, 0, 28, 26, 15, 4, 0, 130, 97, 121, 115, 0, 4, 0, 71, 187, 5, 14, + 195, 5, 0, 12, 44, 0, 130, 101, 97, 115, 0, 23, 0, 131, 97, 107, 101, 115, 0, + 12, 17, 11, 23, 0, 131, 105, 110, 103, 115, 0, 8, 4, 28, 44, 0, 131, 101, 97, + 114, 115, 0, 21, 12, 9, 0, 129, 115, 116, 0, 72, 252, 5, 11, 6, 6, 12, 46, 6, + 18, 57, 6, 22, 95, 6, 24, 123, 6, 0, 4, 21, 10, 44, 0, 130, 101, 97, 116, 0, + 71, 13, 6, 10, 20, 6, 0, 12, 26, 0, 129, 116, 104, 0, 81, 27, 6, 24, 34, 6, 0, + 8, 15, 0, 129, 116, 104, 0, 18, 21, 11, 23, 0, 133, 111, 117, 103, 104, 116, + 0, 23, 24, 18, 5, 4, 0, 129, 32, 105, 116, 0, 81, 67, 6, 21, 77, 6, 22, 86, 6, + 0, 7, 12, 7, 0, 130, 32, 110, 111, 116, 0, 19, 8, 21, 0, 130, 111, 114, 116, + 0, 16, 15, 4, 0, 130, 111, 115, 116, 0, 80, 102, 6, 24, 112, 6, 0, 18, 15, 4, + 0, 131, 109, 111, 115, 116, 0, 13, 4, 44, 0, 131, 100, 106, 117, 115, 116, 0, + 82, 133, 6, 19, 157, 6, 23, 180, 6, 0, 68, 140, 6, 5, 149, 6, 0, 5, 0, 132, + 97, 98, 111, 117, 116, 0, 5, 4, 0, 131, 111, 117, 116, 0, 87, 164, 6, 24, 172, + 6, 0, 17, 12, 0, 131, 112, 117, 116, 0, 18, 0, 130, 116, 112, 117, 116, 0, 19, + 24, 18, 0, 131, 116, 112, 117, 116, 0, 23, 18, 5, 4, 0, 129, 117, 116, 0, 8, + 4, 11, 44, 0, 129, 118, 101, 0, 17, 18, 14, 44, 0, 130, 110, 111, 119, 0, 70, + 234, 6, 15, 246, 6, 17, 42, 7, 21, 69, 7, 22, 80, 7, 0, 8, 24, 20, 8, 21, 9, + 0, 129, 110, 99, 121, 0, 68, 253, 6, 15, 31, 7, 0, 69, 7, 7, 8, 16, 7, 15, 22, + 7, 0, 18, 21, 19, 0, 129, 98, 108, 121, 0, 21, 0, 128, 108, 121, 0, 8, 21, 0, + 131, 97, 108, 108, 121, 0, 8, 4, 21, 0, 132, 101, 97, 108, 108, 121, 0, 72, + 49, 7, 19, 58, 7, 0, 18, 16, 44, 0, 130, 110, 101, 121, 0, 4, 16, 18, 6, 0, + 131, 112, 97, 110, 121, 0, 4, 5, 12, 15, 0, 130, 114, 97, 114, 121, 0, 4, 26, + 15, 4, 0, 129, 121, 115, 0}; + diff --git a/keyboards/betalupi_ergodox/features/autocorrect/dict.txt b/keyboards/betalupi_ergodox/features/autocorrect/dict.txt new file mode 100644 index 0000000..55ca266 --- /dev/null +++ b/keyboards/betalupi_ergodox/features/autocorrect/dict.txt @@ -0,0 +1,148 @@ +# Dictionary syntax: +# Each line of this file defines one typo correction entry with the syntax +# "typo -> correction". Typos and corrections are case insensitive, and any +# whitespace before or after the typo and correction is ignored. The typo must be +# only the letters a-z, or the special character : representing a word break. + +:htere -> there +abbout -> about +abotu -> about +baout -> about +:theri -> their +:thier -> their +:owudl -> would +woudl -> would +peaple -> people +peolpe -> people +peopel -> people +poeple -> people +poeople -> people +:hwihc -> which +whcih -> which +whihc -> which +whlch -> which +wihch -> which +coudl -> could +:htikn -> think +:htink -> think +thikn -> think +thiunk -> think +tihkn -> think +:olther -> other +:otehr -> other +baceause -> because +beacuse -> because +becasue -> because +beccause -> because +becouse -> because +becuase -> because + +theese -> these +:goign -> going +:gonig -> going +:yaers -> years +:yeasr -> years +:thsoe -> those +shoudl -> should +raelly -> really +realy -> really +relaly -> really +bedore -> before +befoer -> before +littel -> little +beeing -> being +:hwile -> while + +aroud -> around +arround -> around +arund -> around +thign -> thing +thigsn -> things +thnigs -> things +anohter -> another +beteen -> between +beween -> between +bewteen -> between +:eveyr -> every +:graet -> great +:agian -> again +:sicne -> since +alwasy -> always +alwyas -> always +throught -> thought + + +almsot -> almost +alomst -> almost +chnage -> change +chekc -> check +childen -> children +claer -> clear +comapny -> company +contian -> contain +elasped -> elapsed +fitler -> filter +firts -> first +follwo -> follow +:foudn -> found +frequecy -> frequency +firend -> friend +freind -> friend +heigth -> height +iamge -> image +inital -> initial +intput -> input +laguage -> language +lenght -> length +levle -> level +libary -> library +:moeny -> money +mysefl -> myself +ouptut -> output +ouput -> output +probaly -> probably +probelm -> problem +recrod -> record +reponse -> response +reprot -> report +singel -> single +stregth -> strength +strengh -> strength +tkaes -> takes +therfore -> therefore +todya -> today +toghether -> together +unkown -> unknown +unqiue -> unique +widht -> width + + +## Catch skipped spaces between common words. +:alot: -> a lot +:andteh -> and the +:andthe -> and the +:asthe -> as the +:atthe -> at the +abouta -> about a +aboutit -> about it +aboutthe -> about the +:tothe -> to the +didnot -> did not +fromthe -> from the +:the:the: -> the + + +## Various additional entries. +:agred -> agreed +:ajust -> adjust +:anual -> annual +:asign -> assign +:casue -> cause +:gaurd -> guard +:haev -> have +:idaes -> ideas +:jsut: -> just +:jstu: -> just +:knwo -> know +:konw -> know +:kwno -> know \ No newline at end of file diff --git a/keyboards/betalupi_ergodox/features/autocorrect/gen_autocorrect.py b/keyboards/betalupi_ergodox/features/autocorrect/gen_autocorrect.py new file mode 100644 index 0000000..9bc2d75 --- /dev/null +++ b/keyboards/betalupi_ergodox/features/autocorrect/gen_autocorrect.py @@ -0,0 +1,352 @@ +"""Python program to make autocorrection_data.h. + +This program reads "autocorrection_dict.txt" from the current directory and +generates a C source file "autocorrection_data.h" with a serialized trie +embedded as an array. Run this program without arguments like + +$ python3 make_autocorrection_data.py + +Or specify a dict file as the first argument like + +$ python3 make_autocorrection_data.py mykeymap/dict.txt + +The output is written to "autocorrection_data.h" in the same directory as the +dictionary. Or optionally specify the output .h file as well like + +$ python3 make_autocorrection_data.py dict.txt somewhere/out.h + +Each line of the dict file defines one typo and its correction with the syntax +"typo -> correction". Blank lines or lines starting with '#' are ignored. +Example: + + :thier -> their + dosen't -> doesn't + fitler -> filter + lenght -> length + ouput -> output + widht -> width + +See autocorrection_dict_extra.txt for a larger example. + +For full documentation, see +https://getreuer.info/posts/keyboards/autocorrection +""" + +import os.path +import sys +import textwrap +from typing import Any, Dict, Iterator, List, Tuple + +try: + from english_words import english_words_lower_alpha_set as CORRECT_WORDS +except ImportError: + print( + "Autocorrection will falsely trigger when a typo is a substring of a " + "correctly spelled word. To check for this, install the english_words " + "package and rerun this script:\n\n pip install english_words\n" + ) + # Use a minimal word list as a fallback. + CORRECT_WORDS = ( + "apparent", + "association", + "available", + "classification", + "effect", + "entertainment", + "fantastic", + "information", + "integrate", + "international", + "language", + "loosest", + "manual", + "nothing", + "provides", + "reference", + "statehood", + "technology", + "virtually", + "wealthier", + "wonderful", + ) + +KC_A = 4 +KC_SPC = 0x2C +KC_QUOT = 0x34 + +TYPO_CHARS = dict( + [ + ("'", KC_QUOT), + (":", KC_SPC), # "Word break" character. + ] + + + # Characters a-z. + [(chr(c), c + KC_A - ord("a")) for c in range(ord("a"), ord("z") + 1)] +) + + +def parse_file(file_name: str) -> List[Tuple[str, str]]: + """Parses autocorrections dictionary file. + + Each line of the file defines one typo and its correction with the syntax + "typo -> correction". Blank lines or lines starting with '#' are ignored. The + function validates that typos only have characters in TYPO_CHARS, that + typos are not substrings of other typos, and checking that typos don't trigger + on CORRECT_WORDS. + + Args: + file_name: String, path of the autocorrections dictionary. + Returns: + List of (typo, correction) tuples. + """ + + autocorrections = [] + typos = set() + for line_number, typo, correction in parse_file_lines(file_name): + if typo in typos: + print(f'Warning:{line_number}: Ignoring duplicate typo: "{typo}"') + continue + + # Check that `typo` is valid. + if not (all([c in TYPO_CHARS for c in typo])): + print( + f'Error:{line_number}: Typo "{typo}" has ' + "characters other than " + "".join(TYPO_CHARS.keys()) + ) + sys.exit(1) + for other_typo in typos: + if typo in other_typo or other_typo in typo: + print( + f"Error:{line_number}: Typos may not be substrings of one " + f"another, otherwise the longer typo would never trigger: " + f'"{typo}" vs. "{other_typo}".' + ) + sys.exit(1) + if len(typo) < 5: + print( + f"Warning:{line_number}: It is suggested that typos are at " + f'least 5 characters long to avoid false triggers: "{typo}"' + ) + + check_typo_against_dictionary(line_number, typo) + + autocorrections.append((typo, correction)) + typos.add(typo) + + return autocorrections + + +def make_trie(autocorrections: List[Tuple[str, str]]) -> Dict[str, Any]: + """Makes a trie from the the typos, writing in reverse. + + Args: + autocorrections: List of (typo, correction) tuples. + Returns: + Dict of dict, representing the trie. + """ + trie = {} + for typo, correction in autocorrections: + node = trie + for letter in typo[::-1]: + node = node.setdefault(letter, {}) + node["LEAF"] = (typo, correction) + + return trie + + +def parse_file_lines(file_name: str) -> Iterator[Tuple[int, str, str]]: + """Parses lines read from `file_name` into typo-correction pairs.""" + + line_number = 0 + for line in open(file_name, "rt"): + line_number += 1 + line = line.strip() + if line and line[0] != "#": + # Parse syntax "typo -> correction", using strip to ignore indenting. + tokens = [token.strip() for token in line.split("->", 1)] + if len(tokens) != 2 or not tokens[0]: + print(f'Error:{line_number}: Invalid syntax: "{line}"') + sys.exit(1) + + typo, correction = tokens + typo = typo.lower() # Force typos to lowercase. + typo = typo.replace(" ", ":") + + yield line_number, typo, correction + + +def check_typo_against_dictionary(line_number: int, typo: str) -> None: + """Checks `typo` against English dictionary words.""" + + if typo.startswith(":") and typo.endswith(":"): + if typo[1:-1] in CORRECT_WORDS: + print( + f'Warning:{line_number}: Typo "{typo}" is a correctly spelled ' + "dictionary word." + ) + elif typo.startswith(":") and not typo.endswith(":"): + for word in CORRECT_WORDS: + if word.startswith(typo[1:]): + print( + f'Warning:{line_number}: Typo "{typo}" would falsely trigger ' + f'on correctly spelled word "{word}".' + ) + elif not typo.startswith(":") and typo.endswith(":"): + for word in CORRECT_WORDS: + if word.endswith(typo[:-1]): + print( + f'Warning:{line_number}: Typo "{typo}" would falsely trigger ' + f'on correctly spelled word "{word}".' + ) + elif not typo.startswith(":") and not typo.endswith(":"): + for word in CORRECT_WORDS: + if typo in word: + print( + f'Warning:{line_number}: Typo "{typo}" would falsely trigger ' + f'on correctly spelled word "{word}".' + ) + + +def serialize_trie( + autocorrections: List[Tuple[str, str]], trie: Dict[str, Any] +) -> List[int]: + """Serializes trie and correction data in a form readable by the C code. + + Args: + autocorrections: List of (typo, correction) tuples. + trie: Dict of dicts. + Returns: + List of ints in the range 0-255. + """ + table = [] + + # Traverse trie in depth first order. + def traverse(trie_node: Dict[str, Any]) -> Dict[str, Any]: + if "LEAF" in trie_node: # Handle a leaf trie node. + typo, correction = trie_node["LEAF"] + word_boundary_ending = typo[-1] == ":" + typo = typo.strip(":") + i = 0 # Make the autocorrection data for this entry and serialize it. + while i < min(len(typo), len(correction)) and typo[i] == correction[i]: + i += 1 + backspaces = len(typo) - i - 1 + word_boundary_ending + assert 0 <= backspaces <= 63 + correction = correction[i:] + data = [backspaces + 128] + list(bytes(correction, "ascii")) + [0] + + entry = {"data": data, "links": [], "byte_offset": 0} + table.append(entry) + elif len(trie_node) == 1: # Handle trie node with a single child. + c, trie_node = next(iter(trie_node.items())) + entry = {"chars": c, "byte_offset": 0} + + # It's common for a trie to have long chains of single-child nodes. We + # find the whole chain so that we can serialize it more efficiently. + while len(trie_node) == 1 and "LEAF" not in trie_node: + c, trie_node = next(iter(trie_node.items())) + entry["chars"] += c + + table.append(entry) + entry["links"] = [traverse(trie_node)] + else: # Handle trie node with multiple children. + entry = {"chars": "".join(sorted(trie_node.keys())), "byte_offset": 0} + table.append(entry) + entry["links"] = [traverse(trie_node[c]) for c in entry["chars"]] + return entry + + traverse(trie) + + def serialize(e: Dict[str, Any]) -> List[int]: + if not e["links"]: # Handle a leaf table entry. + return e["data"] + elif len(e["links"]) == 1: # Handle a chain table entry. + return [TYPO_CHARS[c] for c in e["chars"]] + [0] + else: # Handle a branch table entry. + data = [] + for c, link in zip(e["chars"], e["links"]): + data += [TYPO_CHARS[c] | (0 if data else 64)] + encode_link(link) + return data + [0] + + byte_offset = 0 + for e in table: # To encode links, first compute byte offset of each entry. + e["byte_offset"] = byte_offset + byte_offset += len(serialize(e)) + + return [b for e in table for b in serialize(e)] # Serialize final table. + + +def encode_link(link: Dict[str, Any]) -> List[int]: + """Encodes a node link as two bytes.""" + byte_offset = link["byte_offset"] + if not (0 <= byte_offset <= 0xFFFF): + print( + "Error: The autocorrection table is too large, a node link exceeds " + "64KB limit. Try reducing the autocorrection dict to fewer entries." + ) + sys.exit(1) + return [byte_offset & 255, byte_offset >> 8] + + +def write_generated_code( + autocorrections: List[Tuple[str, str]], data: List[int], file_name: str +) -> None: + """Writes autocorrection data as generated C code to `file_name`. + + Args: + autocorrections: List of (typo, correction) tuples. + data: List of ints in 0-255, the serialized trie. + file_name: String, path of the output C file. + """ + assert all(0 <= b <= 255 for b in data) + + def typo_len(e: Tuple[str, str]) -> int: + return len(e[0]) + + min_typo = min(autocorrections, key=typo_len)[0] + max_typo = max(autocorrections, key=typo_len)[0] + generated_code = "".join( + [ + "// Generated code.\n\n", + f"// Autocorrection dictionary ({len(autocorrections)} entries):\n", + "".join( + sorted( + f"// {typo:<{len(max_typo)}} -> {correction}\n" + for typo, correction in autocorrections + ) + ), + f'\n#define AUTOCORRECTION_MIN_LENGTH {len(min_typo)} // "{min_typo}"\n', + f'#define AUTOCORRECTION_MAX_LENGTH {len(max_typo)} // "{max_typo}"\n\n', + textwrap.fill( + "static const uint8_t autocorrection_data[%d] PROGMEM = {%s};" + % (len(data), ", ".join(map(str, data))), + width=80, + subsequent_indent=" ", + ), + "\n\n", + ] + ) + + with open(file_name, "wt") as f: + f.write(generated_code) + + +def get_default_h_file(dict_file: str) -> str: + return os.path.join(os.path.dirname(dict_file), "autocorrect_data.h") + + +def main(argv): + dict_file = argv[1] if len(argv) > 1 else "dict.txt" + h_file = argv[2] if len(argv) > 2 else get_default_h_file(dict_file) + + autocorrections = parse_file(dict_file) + trie = make_trie(autocorrections) + data = serialize_trie(autocorrections, trie) + print( + f"Processed %d autocorrection entries to table with %d bytes." + % (len(autocorrections), len(data)) + ) + write_generated_code(autocorrections, data, h_file) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/keyboards/betalupi_ergodox/keymaps/default/config.h b/keyboards/betalupi_ergodox/keymaps/default/config.h index b767653..2be062f 100644 --- a/keyboards/betalupi_ergodox/keymaps/default/config.h +++ b/keyboards/betalupi_ergodox/keymaps/default/config.h @@ -8,7 +8,9 @@ // Enable spellcheck over HID -#define ENABLE_HID_SPELLCHECK +//#define ENABLE_HID_SPELLCHECK + +#define ENABLE_AUTOCORRECT // RGBLight effects // Static color is always enabled. diff --git a/keyboards/betalupi_ergodox/keymaps/default/keymap.c b/keyboards/betalupi_ergodox/keymaps/default/keymap.c index cc4e60a..13dee12 100644 --- a/keyboards/betalupi_ergodox/keymaps/default/keymap.c +++ b/keyboards/betalupi_ergodox/keymaps/default/keymap.c @@ -7,6 +7,10 @@ #include "features/beta_rawhid.h" +#ifdef ENABLE_AUTOCORRECT +#include "features/autocorrect/autocorrect.h" +#endif + // Values that should not be saved to git. // Create a `secrets.h` in the keymap directory. // @@ -27,7 +31,6 @@ bool send_special_character(uint16_t keycode) { return true; } - LEADER_EXTERNS(); void leader_start(void) { ergodox_right_led_3_on(); } void leader_end(void) { ergodox_right_led_3_off(); } @@ -67,6 +70,10 @@ bool process_record_user(uint16_t keycode, keyrecord_t *record) { if (!process_spellcheck(keycode, record)) { return false; } #endif + #ifdef ENABLE_AUTOCORRECT + if (!process_autocorrection(keycode, record)) { return false; } + #endif + // Handle special chars if (record->event.pressed) { if (!send_special_character(keycode)) { return false; }