Added autocorrect feature
parent
afa07ce83c
commit
f4a7960eed
|
@ -0,0 +1,184 @@
|
||||||
|
#include "autocorrect.h"
|
||||||
|
#include <string.h>
|
||||||
|
#include "autocorrect_data.h"
|
||||||
|
|
||||||
|
#if AUTOCORRECTION_MIN_LENGTH < 4
|
||||||
|
// Odd output or hard locks on the board have been observed when the min typo
|
||||||
|
// length is 3 or lower (https://github.com/getreuer/qmk-keymap/issues/2).
|
||||||
|
// Additionally, autocorrection entries for short typos are more likely to false
|
||||||
|
// trigger, so it is suggested that typos be at least 5 characters.
|
||||||
|
#error "Min typo length is less than 4. Autocorrection may behave poorly."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
bool process_autocorrection(uint16_t keycode, keyrecord_t* record) {
|
||||||
|
static uint8_t typo_buffer[AUTOCORRECTION_MAX_LENGTH] = {0};
|
||||||
|
static uint8_t typo_buffer_size = 0;
|
||||||
|
|
||||||
|
// Ignore key release; we only process key presses.
|
||||||
|
if (!record->event.pressed) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef NO_ACTION_ONESHOT
|
||||||
|
const uint8_t mods = get_mods() | get_oneshot_mods();
|
||||||
|
#else
|
||||||
|
const uint8_t mods = get_mods();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Disable autocorrection while a mod other than shift is active.
|
||||||
|
if ((mods & ~MOD_MASK_SHIFT) != 0) {
|
||||||
|
typo_buffer_size = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The following switch cases address various kinds of keycodes. This logic is
|
||||||
|
// split over two switches rather than merged into one. The first switch may
|
||||||
|
// extract a basic keycode which is then further handled by the second switch,
|
||||||
|
// e.g. a layer-tap key with Caps Lock `LT(layer, KC_CAPS)`.
|
||||||
|
switch (keycode) {
|
||||||
|
#ifndef NO_ACTION_TAPPING
|
||||||
|
case QK_MOD_TAP ... QK_MOD_TAP_MAX: // Tap-hold keys.
|
||||||
|
#ifndef NO_ACTION_LAYER
|
||||||
|
case QK_LAYER_TAP ... QK_LAYER_TAP_MAX:
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (record->tap.count == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Otherwise when tapped, get the basic keycode.
|
||||||
|
// Fallthrough intended.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Handle shifted keys, e.g. symbols like KC_EXLM = S(KC_1).
|
||||||
|
case QK_LSFT ... QK_LSFT + 255:
|
||||||
|
case QK_RSFT ... QK_RSFT + 255:
|
||||||
|
keycode &= 0xff; // Get the basic keycode.
|
||||||
|
break;
|
||||||
|
|
||||||
|
// NOTE: Space Cadet keys expose no info to check whether they are being
|
||||||
|
// tapped vs. held. This makes autocorrection ambiguous, e.g. KC_LCPO
|
||||||
|
// might be '(', which we would treat as a word break, or it might be
|
||||||
|
// shift, which we would treat as having no effect. To behave cautiously,
|
||||||
|
// we allow Space Cadet keycodes to fall to the logic below and clear
|
||||||
|
// autocorrection state.
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (keycode) {
|
||||||
|
// Ignore shifts, Caps Lock, one-shot mods, and layer switch keys.
|
||||||
|
case KC_NO:
|
||||||
|
case KC_LSFT:
|
||||||
|
case KC_RSFT:
|
||||||
|
case KC_CAPS:
|
||||||
|
case QK_ONE_SHOT_MOD ... QK_ONE_SHOT_MOD_MAX:
|
||||||
|
case QK_TO ... QK_TO_MAX:
|
||||||
|
case QK_MOMENTARY ... QK_MOMENTARY_MAX:
|
||||||
|
case QK_DEF_LAYER ... QK_DEF_LAYER_MAX:
|
||||||
|
case QK_TOGGLE_LAYER ... QK_TOGGLE_LAYER_MAX:
|
||||||
|
case QK_ONE_SHOT_LAYER ... QK_ONE_SHOT_LAYER_MAX:
|
||||||
|
case QK_LAYER_TAP_TOGGLE ... QK_LAYER_TAP_TOGGLE_MAX:
|
||||||
|
case QK_LAYER_MOD ... QK_LAYER_MOD_MAX:
|
||||||
|
return true; // Ignore these keys.
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keycode == KC_QUOT) {
|
||||||
|
// Treat " (shifted ') as a word boundary.
|
||||||
|
if ((mods & MOD_MASK_SHIFT) != 0) {
|
||||||
|
keycode = KC_SPC;
|
||||||
|
}
|
||||||
|
} else if (!(KC_A <= keycode && keycode <= KC_Z)) {
|
||||||
|
if (keycode == KC_BSPC) {
|
||||||
|
// Remove last character from the buffer.
|
||||||
|
if (typo_buffer_size > 0) {
|
||||||
|
--typo_buffer_size;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else if (KC_1 <= keycode && keycode <= KC_SLSH && keycode != KC_ESC) {
|
||||||
|
// Set a word boundary if space, period, digit, etc. is pressed.
|
||||||
|
// Behave more conservatively for the enter key. Reset, so that enter
|
||||||
|
// can't be used on a word ending.
|
||||||
|
if (keycode == KC_ENT) {
|
||||||
|
typo_buffer_size = 0;
|
||||||
|
}
|
||||||
|
keycode = KC_SPC;
|
||||||
|
} else {
|
||||||
|
// Clear state if some other non-alpha key is pressed.
|
||||||
|
typo_buffer_size = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the buffer is full, rotate it to discard the oldest character.
|
||||||
|
if (typo_buffer_size >= AUTOCORRECTION_MAX_LENGTH) {
|
||||||
|
memmove(typo_buffer, typo_buffer + 1, AUTOCORRECTION_MAX_LENGTH - 1);
|
||||||
|
typo_buffer_size = AUTOCORRECTION_MAX_LENGTH - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append `keycode` to the buffer.
|
||||||
|
// NOTE: `keycode` must be a basic keycode (0-255) by this point.
|
||||||
|
typo_buffer[typo_buffer_size++] = (uint8_t)keycode;
|
||||||
|
// Early return if not many characters have been buffered so far.
|
||||||
|
if (typo_buffer_size < AUTOCORRECTION_MIN_LENGTH) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check whether the buffer ends in a typo. This is done using a trie
|
||||||
|
// stored in `autocorrection_data`.
|
||||||
|
uint16_t state = 0;
|
||||||
|
uint8_t code = pgm_read_byte(autocorrection_data + state);
|
||||||
|
for (int i = typo_buffer_size - 1; i >= 0; --i) {
|
||||||
|
const uint8_t key_i = typo_buffer[i];
|
||||||
|
|
||||||
|
if (code & 64) { // Check for match in node with multiple children.
|
||||||
|
code &= 63;
|
||||||
|
for (; code != key_i; code = pgm_read_byte(autocorrection_data + (state += 3))) {
|
||||||
|
if (!code) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Follow link to child node.
|
||||||
|
state = (uint16_t)(
|
||||||
|
(uint_fast16_t) pgm_read_byte(
|
||||||
|
autocorrection_data + state + 1
|
||||||
|
) |
|
||||||
|
(uint_fast16_t) pgm_read_byte(
|
||||||
|
autocorrection_data + state + 2
|
||||||
|
) << 8
|
||||||
|
);
|
||||||
|
|
||||||
|
// Otherwise check for match in node with a single child.
|
||||||
|
} else if (code != key_i) {
|
||||||
|
return true;
|
||||||
|
} else if (!(code = pgm_read_byte(autocorrection_data + (++state)))) {
|
||||||
|
++state;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop if `state` becomes an invalid index. This should not normally
|
||||||
|
// happen, it is a safeguard in case of a bug, data corruption, etc.
|
||||||
|
if (state >= sizeof(autocorrection_data)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read first byte of the next node.
|
||||||
|
code = pgm_read_byte(autocorrection_data + state);
|
||||||
|
|
||||||
|
if (code & 128) { // A typo was found! Apply autocorrection.
|
||||||
|
const int backspaces = code & 63;
|
||||||
|
for (int i = 0; i < backspaces; ++i) {
|
||||||
|
tap_code(KC_BSPC);
|
||||||
|
}
|
||||||
|
send_string_P((char const*)(autocorrection_data + state + 1));
|
||||||
|
|
||||||
|
if (keycode == KC_SPC) {
|
||||||
|
typo_buffer[0] = KC_SPC;
|
||||||
|
typo_buffer_size = 1;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
typo_buffer_size = 0;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
|
@ -0,0 +1,111 @@
|
||||||
|
// Copyright 2021-2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file autocorrection.h
|
||||||
|
* @brief Autocorrection on your keyboard.
|
||||||
|
*
|
||||||
|
* Overview
|
||||||
|
* --------
|
||||||
|
*
|
||||||
|
* Some words are more prone to typos than others. This userspace QMK library
|
||||||
|
* implements rudimentary autocorrection, automatically detecting and fixing
|
||||||
|
* some misspellings.
|
||||||
|
*
|
||||||
|
* Features:
|
||||||
|
*
|
||||||
|
* * It runs on your keyboard, so it is always active no matter what software.
|
||||||
|
* * Low resource cost.
|
||||||
|
* * It is case insensitive.
|
||||||
|
* * It works within words, useful for programming to catch typos within longer
|
||||||
|
* identifiers.
|
||||||
|
*
|
||||||
|
* Limitations:
|
||||||
|
*
|
||||||
|
* * It is limited to alphabet characters a–z, apostrophes ', and word breaks.
|
||||||
|
* I'm sorry this probably isn't useful for languages besides English.
|
||||||
|
* * It does not follow mouse or hotkey driven cursor movement.
|
||||||
|
*
|
||||||
|
* Changing the autocorrection dictionary
|
||||||
|
* --------------------------------------
|
||||||
|
*
|
||||||
|
* The file autocorrection_data.h encodes the typos to correct. While you could
|
||||||
|
* simply use the version of this file provided above for a practical
|
||||||
|
* configuration, you can make your own to personalize the autocorrection to
|
||||||
|
* your most troublesome typos:
|
||||||
|
*
|
||||||
|
* Step 1: First, create an autocorrection dictionary autocorrection_dict.txt,
|
||||||
|
* in a form like
|
||||||
|
*
|
||||||
|
* :thier -> their
|
||||||
|
* dosen't -> doesn't
|
||||||
|
* fitler -> filter
|
||||||
|
* ouput -> output
|
||||||
|
* widht -> width
|
||||||
|
*
|
||||||
|
* For a practical 71-entry example, see autocorrection_dict.txt. And for a yet
|
||||||
|
* larger 400-entry example, see autocorrection_dict_extra.txt.
|
||||||
|
*
|
||||||
|
* The syntax is `typo -> correction`. Typos and corrections are case
|
||||||
|
* insensitive, and any whitespace before or after the typo and correction is
|
||||||
|
* ignored. The typo must be only the characters a-z, ', or the special
|
||||||
|
* character : representing a word break. The correction may have just about any
|
||||||
|
* printable ASCII characters.
|
||||||
|
*
|
||||||
|
* Step 2: Use the make_autocorrection_data.py Python script to process the
|
||||||
|
* dictionary. Put autocorrection_dict.txt in the same directory as the Python
|
||||||
|
* script and run
|
||||||
|
*
|
||||||
|
* $ python3 make_autocorrection_data.py
|
||||||
|
* Processed 71 autocorrection entries to table with 1120 bytes.
|
||||||
|
*
|
||||||
|
* The script arranges the entries in autocorrection_dict.txt into a trie and
|
||||||
|
* generates autocorrection_data.h with the serialized trie embedded as an
|
||||||
|
* array. The .h file will be written in the same directory.
|
||||||
|
*
|
||||||
|
* Step 3: Finally, recompile and flash your keymap.
|
||||||
|
*
|
||||||
|
* For full documentation, see
|
||||||
|
* <https://getreuer.info/posts/keyboards/autocorrection>
|
||||||
|
*
|
||||||
|
* @author Pascal Getreuer
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "quantum.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handler function for autocorrection.
|
||||||
|
*
|
||||||
|
* Call this function in keymap.c from `process_record_user()` like
|
||||||
|
*
|
||||||
|
* #include "features/autocorrection.h"
|
||||||
|
*
|
||||||
|
* bool process_record_user(uint16_t keycode, keyrecord_t* record) {
|
||||||
|
* if (!process_autocorrection(keycode, record)) { return false; }
|
||||||
|
* // Your macros...
|
||||||
|
*
|
||||||
|
* return true;
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
bool process_autocorrection(uint16_t keycode, keyrecord_t* record);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
|
@ -0,0 +1,238 @@
|
||||||
|
// Generated code.
|
||||||
|
|
||||||
|
// Autocorrection dictionary (132 entries):
|
||||||
|
// :agian -> again
|
||||||
|
// :agred -> agreed
|
||||||
|
// :ajust -> adjust
|
||||||
|
// :alot: -> a lot
|
||||||
|
// :andteh -> and the
|
||||||
|
// :andthe -> and the
|
||||||
|
// :anual -> annual
|
||||||
|
// :asign -> assign
|
||||||
|
// :asthe -> as the
|
||||||
|
// :atthe -> at the
|
||||||
|
// :casue -> cause
|
||||||
|
// :eveyr -> every
|
||||||
|
// :foudn -> found
|
||||||
|
// :gaurd -> guard
|
||||||
|
// :goign -> going
|
||||||
|
// :gonig -> going
|
||||||
|
// :graet -> great
|
||||||
|
// :haev -> have
|
||||||
|
// :htere -> there
|
||||||
|
// :htikn -> think
|
||||||
|
// :htink -> think
|
||||||
|
// :hwihc -> which
|
||||||
|
// :hwile -> while
|
||||||
|
// :idaes -> ideas
|
||||||
|
// :jstu: -> just
|
||||||
|
// :jsut: -> just
|
||||||
|
// :knwo -> know
|
||||||
|
// :konw -> know
|
||||||
|
// :kwno -> know
|
||||||
|
// :moeny -> money
|
||||||
|
// :olther -> other
|
||||||
|
// :otehr -> other
|
||||||
|
// :owudl -> would
|
||||||
|
// :sicne -> since
|
||||||
|
// :the:the: -> the
|
||||||
|
// :theri -> their
|
||||||
|
// :thier -> their
|
||||||
|
// :thsoe -> those
|
||||||
|
// :tothe -> to the
|
||||||
|
// :yaers -> years
|
||||||
|
// :yeasr -> years
|
||||||
|
// abbout -> about
|
||||||
|
// abotu -> about
|
||||||
|
// abouta -> about a
|
||||||
|
// aboutit -> about it
|
||||||
|
// aboutthe -> about the
|
||||||
|
// almsot -> almost
|
||||||
|
// alomst -> almost
|
||||||
|
// alwasy -> always
|
||||||
|
// alwyas -> always
|
||||||
|
// anohter -> another
|
||||||
|
// aroud -> around
|
||||||
|
// arround -> around
|
||||||
|
// arund -> around
|
||||||
|
// baceause -> because
|
||||||
|
// baout -> about
|
||||||
|
// beacuse -> because
|
||||||
|
// becasue -> because
|
||||||
|
// beccause -> because
|
||||||
|
// becouse -> because
|
||||||
|
// becuase -> because
|
||||||
|
// bedore -> before
|
||||||
|
// beeing -> being
|
||||||
|
// befoer -> before
|
||||||
|
// beteen -> between
|
||||||
|
// beween -> between
|
||||||
|
// bewteen -> between
|
||||||
|
// chekc -> check
|
||||||
|
// childen -> children
|
||||||
|
// chnage -> change
|
||||||
|
// claer -> clear
|
||||||
|
// comapny -> company
|
||||||
|
// contian -> contain
|
||||||
|
// coudl -> could
|
||||||
|
// didnot -> did not
|
||||||
|
// elasped -> elapsed
|
||||||
|
// firend -> friend
|
||||||
|
// firts -> first
|
||||||
|
// fitler -> filter
|
||||||
|
// follwo -> follow
|
||||||
|
// freind -> friend
|
||||||
|
// frequecy -> frequency
|
||||||
|
// fromthe -> from the
|
||||||
|
// heigth -> height
|
||||||
|
// iamge -> image
|
||||||
|
// inital -> initial
|
||||||
|
// intput -> input
|
||||||
|
// laguage -> language
|
||||||
|
// lenght -> length
|
||||||
|
// levle -> level
|
||||||
|
// libary -> library
|
||||||
|
// littel -> little
|
||||||
|
// mysefl -> myself
|
||||||
|
// ouptut -> output
|
||||||
|
// ouput -> output
|
||||||
|
// peaple -> people
|
||||||
|
// peolpe -> people
|
||||||
|
// peopel -> people
|
||||||
|
// poeople -> people
|
||||||
|
// poeple -> people
|
||||||
|
// probaly -> probably
|
||||||
|
// probelm -> problem
|
||||||
|
// raelly -> really
|
||||||
|
// realy -> really
|
||||||
|
// recrod -> record
|
||||||
|
// relaly -> really
|
||||||
|
// reponse -> response
|
||||||
|
// reprot -> report
|
||||||
|
// shoudl -> should
|
||||||
|
// singel -> single
|
||||||
|
// stregth -> strength
|
||||||
|
// strengh -> strength
|
||||||
|
// theese -> these
|
||||||
|
// therfore -> therefore
|
||||||
|
// thign -> thing
|
||||||
|
// thigsn -> things
|
||||||
|
// thikn -> think
|
||||||
|
// thiunk -> think
|
||||||
|
// thnigs -> things
|
||||||
|
// throught -> thought
|
||||||
|
// tihkn -> think
|
||||||
|
// tkaes -> takes
|
||||||
|
// todya -> today
|
||||||
|
// toghether -> together
|
||||||
|
// unkown -> unknown
|
||||||
|
// unqiue -> unique
|
||||||
|
// whcih -> which
|
||||||
|
// whihc -> which
|
||||||
|
// whlch -> which
|
||||||
|
// widht -> width
|
||||||
|
// wihch -> which
|
||||||
|
// woudl -> would
|
||||||
|
|
||||||
|
#define AUTOCORRECTION_MIN_LENGTH 5 // "abotu"
|
||||||
|
#define AUTOCORRECTION_MAX_LENGTH 9 // "toghether"
|
||||||
|
|
||||||
|
static const uint8_t autocorrection_data[1881] PROGMEM = {108, 61, 0, 4, 117, 0,
|
||||||
|
6, 141, 0, 7, 181, 0, 8, 52, 1, 10, 237, 2, 11, 8, 3, 12, 105, 3, 14, 115, 3,
|
||||||
|
15, 143, 3, 16, 12, 4, 17, 24, 4, 18, 234, 4, 21, 16, 5, 22, 152, 5, 23, 233,
|
||||||
|
5, 24, 190, 6, 25, 199, 6, 26, 208, 6, 28, 218, 6, 0, 72, 71, 0, 23, 81, 0,
|
||||||
|
24, 107, 0, 0, 11, 23, 44, 8, 11, 23, 44, 0, 132, 0, 82, 88, 0, 24, 98, 0, 0,
|
||||||
|
15, 4, 44, 0, 131, 32, 108, 111, 116, 0, 22, 13, 44, 0, 131, 117, 115, 116, 0,
|
||||||
|
23, 22, 13, 44, 0, 131, 117, 115, 116, 0, 87, 124, 0, 28, 133, 0, 0, 24, 18,
|
||||||
|
5, 4, 0, 128, 32, 97, 0, 7, 18, 23, 0, 129, 97, 121, 0, 75, 148, 0, 14, 173,
|
||||||
|
0, 0, 12, 0, 75, 157, 0, 26, 163, 0, 0, 26, 0, 129, 99, 104, 0, 11, 44, 0,
|
||||||
|
132, 119, 104, 105, 99, 104, 0, 8, 11, 6, 0, 129, 99, 107, 0, 72, 197, 0, 17,
|
||||||
|
223, 0, 18, 23, 1, 21, 33, 1, 24, 44, 1, 0, 83, 204, 0, 21, 215, 0, 0, 22, 4,
|
||||||
|
15, 8, 0, 131, 112, 115, 101, 100, 0, 10, 4, 44, 0, 128, 101, 100, 0, 72, 233,
|
||||||
|
0, 12, 244, 0, 24, 254, 0, 0, 21, 12, 9, 0, 132, 114, 105, 101, 110, 100, 0,
|
||||||
|
8, 21, 9, 0, 131, 105, 101, 110, 100, 0, 82, 5, 1, 21, 15, 1, 0, 21, 21, 4, 0,
|
||||||
|
132, 111, 117, 110, 100, 0, 4, 0, 130, 111, 117, 110, 100, 0, 21, 6, 8, 21, 0,
|
||||||
|
130, 111, 114, 100, 0, 24, 4, 10, 44, 0, 131, 117, 97, 114, 100, 0, 18, 21, 4,
|
||||||
|
0, 128, 110, 100, 0, 74, 80, 1, 11, 124, 1, 15, 205, 1, 17, 17, 2, 18, 27, 2,
|
||||||
|
19, 37, 2, 21, 47, 2, 22, 93, 2, 24, 198, 2, 0, 68, 87, 1, 16, 115, 1, 0, 81,
|
||||||
|
94, 1, 24, 103, 1, 0, 11, 6, 0, 131, 97, 110, 103, 101, 0, 10, 4, 15, 0, 132,
|
||||||
|
110, 103, 117, 97, 103, 101, 0, 4, 12, 0, 131, 109, 97, 103, 101, 0, 23, 0,
|
||||||
|
71, 142, 1, 16, 152, 1, 18, 162, 1, 22, 171, 1, 23, 180, 1, 0, 17, 4, 44, 0,
|
||||||
|
130, 32, 116, 104, 101, 0, 18, 21, 9, 0, 130, 32, 116, 104, 101, 0, 23, 44, 0,
|
||||||
|
130, 32, 116, 104, 101, 0, 4, 44, 0, 130, 32, 116, 104, 101, 0, 68, 187, 1,
|
||||||
|
24, 195, 1, 0, 44, 0, 130, 32, 116, 104, 101, 0, 18, 5, 4, 0, 130, 32, 116,
|
||||||
|
104, 101, 0, 76, 215, 1, 19, 226, 1, 25, 10, 2, 0, 26, 11, 44, 0, 132, 119,
|
||||||
|
104, 105, 108, 101, 0, 68, 236, 1, 8, 245, 1, 18, 255, 1, 0, 8, 19, 0, 131,
|
||||||
|
111, 112, 108, 101, 0, 18, 19, 0, 132, 101, 111, 112, 108, 101, 0, 8, 18, 19,
|
||||||
|
0, 133, 101, 111, 112, 108, 101, 0, 8, 15, 0, 129, 101, 108, 0, 6, 12, 22, 44,
|
||||||
|
0, 130, 110, 99, 101, 0, 22, 11, 23, 44, 0, 130, 111, 115, 101, 0, 15, 18, 8,
|
||||||
|
19, 0, 130, 112, 108, 101, 0, 72, 54, 2, 18, 65, 2, 0, 23, 11, 44, 0, 132,
|
||||||
|
116, 104, 101, 114, 101, 0, 71, 72, 2, 9, 81, 2, 0, 8, 5, 0, 131, 102, 111,
|
||||||
|
114, 101, 0, 21, 8, 11, 23, 0, 131, 101, 102, 111, 114, 101, 0, 68, 106, 2, 8,
|
||||||
|
117, 2, 17, 125, 2, 24, 138, 2, 0, 24, 6, 8, 5, 0, 131, 97, 117, 115, 101, 0,
|
||||||
|
8, 11, 23, 0, 130, 115, 101, 0, 18, 19, 8, 21, 0, 132, 115, 112, 111, 110,
|
||||||
|
115, 101, 0, 68, 148, 2, 6, 177, 2, 18, 188, 2, 0, 70, 155, 2, 8, 165, 2, 0,
|
||||||
|
6, 8, 5, 0, 132, 97, 117, 115, 101, 0, 6, 4, 5, 0, 134, 101, 99, 97, 117, 115,
|
||||||
|
101, 0, 4, 8, 5, 0, 132, 99, 97, 117, 115, 101, 0, 6, 8, 5, 0, 131, 97, 117,
|
||||||
|
115, 101, 0, 76, 205, 2, 22, 215, 2, 0, 20, 17, 24, 0, 131, 105, 113, 117,
|
||||||
|
101, 0, 4, 6, 0, 108, 225, 2, 8, 230, 2, 0, 130, 117, 115, 101, 0, 5, 0, 130,
|
||||||
|
117, 115, 101, 0, 76, 244, 2, 17, 254, 2, 0, 17, 18, 10, 44, 0, 130, 105, 110,
|
||||||
|
103, 0, 12, 8, 8, 5, 0, 131, 105, 110, 103, 0, 70, 24, 3, 8, 48, 3, 10, 60, 3,
|
||||||
|
12, 70, 3, 23, 79, 3, 0, 75, 31, 3, 15, 40, 3, 0, 12, 26, 0, 131, 104, 105,
|
||||||
|
99, 104, 0, 11, 26, 0, 130, 105, 99, 104, 0, 23, 7, 17, 4, 44, 0, 130, 32,
|
||||||
|
116, 104, 101, 0, 17, 8, 21, 23, 22, 0, 128, 116, 104, 0, 6, 11, 26, 0, 130,
|
||||||
|
105, 99, 104, 0, 10, 0, 72, 88, 3, 12, 98, 3, 0, 21, 23, 22, 0, 130, 110, 103,
|
||||||
|
116, 104, 0, 8, 11, 0, 129, 104, 116, 0, 21, 8, 11, 23, 44, 0, 129, 105, 114,
|
||||||
|
0, 17, 0, 76, 124, 3, 24, 135, 3, 0, 23, 11, 44, 0, 132, 116, 104, 105, 110,
|
||||||
|
107, 0, 12, 11, 23, 0, 130, 110, 107, 0, 68, 156, 3, 7, 182, 3, 8, 225, 3, 9,
|
||||||
|
3, 4, 0, 87, 163, 3, 24, 172, 3, 0, 12, 17, 12, 0, 129, 105, 97, 108, 0, 17,
|
||||||
|
4, 44, 0, 130, 110, 117, 97, 108, 0, 24, 0, 82, 191, 3, 26, 215, 3, 0, 70,
|
||||||
|
201, 3, 11, 205, 3, 26, 211, 3, 0, 129, 108, 100, 0, 22, 0, 129, 108, 100, 0,
|
||||||
|
129, 108, 100, 0, 18, 44, 0, 132, 119, 111, 117, 108, 100, 0, 74, 235, 3, 19,
|
||||||
|
243, 3, 23, 251, 3, 0, 17, 12, 22, 0, 129, 108, 101, 0, 18, 8, 19, 0, 129,
|
||||||
|
108, 101, 0, 23, 12, 15, 0, 129, 108, 101, 0, 8, 22, 28, 16, 0, 129, 108, 102,
|
||||||
|
0, 15, 8, 5, 18, 21, 19, 0, 130, 108, 101, 109, 0, 68, 46, 4, 7, 72, 4, 8, 81,
|
||||||
|
4, 10, 140, 4, 14, 174, 4, 22, 213, 4, 26, 223, 4, 0, 12, 0, 74, 55, 4, 23,
|
||||||
|
63, 4, 0, 4, 44, 0, 130, 97, 105, 110, 0, 17, 18, 6, 0, 130, 97, 105, 110, 0,
|
||||||
|
24, 18, 9, 44, 0, 129, 110, 100, 0, 71, 88, 4, 8, 98, 4, 0, 15, 12, 11, 6, 0,
|
||||||
|
129, 114, 101, 110, 0, 87, 105, 4, 26, 130, 4, 0, 72, 112, 4, 26, 120, 4, 0,
|
||||||
|
5, 0, 130, 119, 101, 101, 110, 0, 8, 5, 0, 132, 116, 119, 101, 101, 110, 0, 8,
|
||||||
|
5, 0, 131, 116, 119, 101, 101, 110, 0, 12, 0, 75, 152, 4, 18, 158, 4, 22, 165,
|
||||||
|
4, 0, 23, 0, 129, 110, 103, 0, 10, 44, 0, 129, 110, 103, 0, 4, 44, 0, 130,
|
||||||
|
115, 105, 103, 110, 0, 75, 181, 4, 12, 190, 4, 0, 12, 23, 0, 131, 104, 105,
|
||||||
|
110, 107, 0, 75, 197, 4, 23, 203, 4, 0, 23, 0, 129, 110, 107, 0, 11, 44, 0,
|
||||||
|
132, 116, 104, 105, 110, 107, 0, 10, 12, 11, 23, 0, 130, 110, 103, 115, 0, 18,
|
||||||
|
14, 17, 24, 0, 130, 110, 111, 119, 110, 0, 81, 241, 4, 26, 250, 4, 0, 26, 14,
|
||||||
|
44, 0, 130, 110, 111, 119, 0, 79, 1, 5, 17, 9, 5, 0, 15, 18, 9, 0, 129, 111,
|
||||||
|
119, 0, 14, 44, 0, 129, 111, 119, 0, 72, 29, 5, 11, 124, 5, 22, 134, 5, 28,
|
||||||
|
143, 5, 0, 68, 48, 5, 11, 56, 5, 12, 86, 5, 15, 95, 5, 18, 105, 5, 23, 113, 5,
|
||||||
|
0, 15, 6, 0, 130, 101, 97, 114, 0, 23, 0, 72, 65, 5, 15, 77, 5, 0, 11, 10, 18,
|
||||||
|
23, 0, 133, 101, 116, 104, 101, 114, 0, 18, 44, 0, 132, 116, 104, 101, 114, 0,
|
||||||
|
11, 23, 44, 0, 130, 101, 105, 114, 0, 23, 12, 9, 0, 131, 108, 116, 101, 114,
|
||||||
|
0, 9, 8, 5, 0, 129, 114, 101, 0, 11, 18, 17, 4, 0, 131, 116, 104, 101, 114, 0,
|
||||||
|
8, 23, 18, 44, 0, 130, 104, 101, 114, 0, 4, 8, 28, 44, 0, 129, 114, 115, 0, 8,
|
||||||
|
25, 8, 44, 0, 129, 114, 121, 0, 68, 168, 5, 8, 178, 5, 10, 203, 5, 21, 214, 5,
|
||||||
|
23, 225, 5, 0, 28, 26, 15, 4, 0, 130, 97, 121, 115, 0, 4, 0, 71, 187, 5, 14,
|
||||||
|
195, 5, 0, 12, 44, 0, 130, 101, 97, 115, 0, 23, 0, 131, 97, 107, 101, 115, 0,
|
||||||
|
12, 17, 11, 23, 0, 131, 105, 110, 103, 115, 0, 8, 4, 28, 44, 0, 131, 101, 97,
|
||||||
|
114, 115, 0, 21, 12, 9, 0, 129, 115, 116, 0, 72, 252, 5, 11, 6, 6, 12, 46, 6,
|
||||||
|
18, 57, 6, 22, 95, 6, 24, 123, 6, 0, 4, 21, 10, 44, 0, 130, 101, 97, 116, 0,
|
||||||
|
71, 13, 6, 10, 20, 6, 0, 12, 26, 0, 129, 116, 104, 0, 81, 27, 6, 24, 34, 6, 0,
|
||||||
|
8, 15, 0, 129, 116, 104, 0, 18, 21, 11, 23, 0, 133, 111, 117, 103, 104, 116,
|
||||||
|
0, 23, 24, 18, 5, 4, 0, 129, 32, 105, 116, 0, 81, 67, 6, 21, 77, 6, 22, 86, 6,
|
||||||
|
0, 7, 12, 7, 0, 130, 32, 110, 111, 116, 0, 19, 8, 21, 0, 130, 111, 114, 116,
|
||||||
|
0, 16, 15, 4, 0, 130, 111, 115, 116, 0, 80, 102, 6, 24, 112, 6, 0, 18, 15, 4,
|
||||||
|
0, 131, 109, 111, 115, 116, 0, 13, 4, 44, 0, 131, 100, 106, 117, 115, 116, 0,
|
||||||
|
82, 133, 6, 19, 157, 6, 23, 180, 6, 0, 68, 140, 6, 5, 149, 6, 0, 5, 0, 132,
|
||||||
|
97, 98, 111, 117, 116, 0, 5, 4, 0, 131, 111, 117, 116, 0, 87, 164, 6, 24, 172,
|
||||||
|
6, 0, 17, 12, 0, 131, 112, 117, 116, 0, 18, 0, 130, 116, 112, 117, 116, 0, 19,
|
||||||
|
24, 18, 0, 131, 116, 112, 117, 116, 0, 23, 18, 5, 4, 0, 129, 117, 116, 0, 8,
|
||||||
|
4, 11, 44, 0, 129, 118, 101, 0, 17, 18, 14, 44, 0, 130, 110, 111, 119, 0, 70,
|
||||||
|
234, 6, 15, 246, 6, 17, 42, 7, 21, 69, 7, 22, 80, 7, 0, 8, 24, 20, 8, 21, 9,
|
||||||
|
0, 129, 110, 99, 121, 0, 68, 253, 6, 15, 31, 7, 0, 69, 7, 7, 8, 16, 7, 15, 22,
|
||||||
|
7, 0, 18, 21, 19, 0, 129, 98, 108, 121, 0, 21, 0, 128, 108, 121, 0, 8, 21, 0,
|
||||||
|
131, 97, 108, 108, 121, 0, 8, 4, 21, 0, 132, 101, 97, 108, 108, 121, 0, 72,
|
||||||
|
49, 7, 19, 58, 7, 0, 18, 16, 44, 0, 130, 110, 101, 121, 0, 4, 16, 18, 6, 0,
|
||||||
|
131, 112, 97, 110, 121, 0, 4, 5, 12, 15, 0, 130, 114, 97, 114, 121, 0, 4, 26,
|
||||||
|
15, 4, 0, 129, 121, 115, 0};
|
||||||
|
|
|
@ -0,0 +1,148 @@
|
||||||
|
# Dictionary syntax:
|
||||||
|
# Each line of this file defines one typo correction entry with the syntax
|
||||||
|
# "typo -> correction". Typos and corrections are case insensitive, and any
|
||||||
|
# whitespace before or after the typo and correction is ignored. The typo must be
|
||||||
|
# only the letters a-z, or the special character : representing a word break.
|
||||||
|
|
||||||
|
:htere -> there
|
||||||
|
abbout -> about
|
||||||
|
abotu -> about
|
||||||
|
baout -> about
|
||||||
|
:theri -> their
|
||||||
|
:thier -> their
|
||||||
|
:owudl -> would
|
||||||
|
woudl -> would
|
||||||
|
peaple -> people
|
||||||
|
peolpe -> people
|
||||||
|
peopel -> people
|
||||||
|
poeple -> people
|
||||||
|
poeople -> people
|
||||||
|
:hwihc -> which
|
||||||
|
whcih -> which
|
||||||
|
whihc -> which
|
||||||
|
whlch -> which
|
||||||
|
wihch -> which
|
||||||
|
coudl -> could
|
||||||
|
:htikn -> think
|
||||||
|
:htink -> think
|
||||||
|
thikn -> think
|
||||||
|
thiunk -> think
|
||||||
|
tihkn -> think
|
||||||
|
:olther -> other
|
||||||
|
:otehr -> other
|
||||||
|
baceause -> because
|
||||||
|
beacuse -> because
|
||||||
|
becasue -> because
|
||||||
|
beccause -> because
|
||||||
|
becouse -> because
|
||||||
|
becuase -> because
|
||||||
|
|
||||||
|
theese -> these
|
||||||
|
:goign -> going
|
||||||
|
:gonig -> going
|
||||||
|
:yaers -> years
|
||||||
|
:yeasr -> years
|
||||||
|
:thsoe -> those
|
||||||
|
shoudl -> should
|
||||||
|
raelly -> really
|
||||||
|
realy -> really
|
||||||
|
relaly -> really
|
||||||
|
bedore -> before
|
||||||
|
befoer -> before
|
||||||
|
littel -> little
|
||||||
|
beeing -> being
|
||||||
|
:hwile -> while
|
||||||
|
|
||||||
|
aroud -> around
|
||||||
|
arround -> around
|
||||||
|
arund -> around
|
||||||
|
thign -> thing
|
||||||
|
thigsn -> things
|
||||||
|
thnigs -> things
|
||||||
|
anohter -> another
|
||||||
|
beteen -> between
|
||||||
|
beween -> between
|
||||||
|
bewteen -> between
|
||||||
|
:eveyr -> every
|
||||||
|
:graet -> great
|
||||||
|
:agian -> again
|
||||||
|
:sicne -> since
|
||||||
|
alwasy -> always
|
||||||
|
alwyas -> always
|
||||||
|
throught -> thought
|
||||||
|
|
||||||
|
|
||||||
|
almsot -> almost
|
||||||
|
alomst -> almost
|
||||||
|
chnage -> change
|
||||||
|
chekc -> check
|
||||||
|
childen -> children
|
||||||
|
claer -> clear
|
||||||
|
comapny -> company
|
||||||
|
contian -> contain
|
||||||
|
elasped -> elapsed
|
||||||
|
fitler -> filter
|
||||||
|
firts -> first
|
||||||
|
follwo -> follow
|
||||||
|
:foudn -> found
|
||||||
|
frequecy -> frequency
|
||||||
|
firend -> friend
|
||||||
|
freind -> friend
|
||||||
|
heigth -> height
|
||||||
|
iamge -> image
|
||||||
|
inital -> initial
|
||||||
|
intput -> input
|
||||||
|
laguage -> language
|
||||||
|
lenght -> length
|
||||||
|
levle -> level
|
||||||
|
libary -> library
|
||||||
|
:moeny -> money
|
||||||
|
mysefl -> myself
|
||||||
|
ouptut -> output
|
||||||
|
ouput -> output
|
||||||
|
probaly -> probably
|
||||||
|
probelm -> problem
|
||||||
|
recrod -> record
|
||||||
|
reponse -> response
|
||||||
|
reprot -> report
|
||||||
|
singel -> single
|
||||||
|
stregth -> strength
|
||||||
|
strengh -> strength
|
||||||
|
tkaes -> takes
|
||||||
|
therfore -> therefore
|
||||||
|
todya -> today
|
||||||
|
toghether -> together
|
||||||
|
unkown -> unknown
|
||||||
|
unqiue -> unique
|
||||||
|
widht -> width
|
||||||
|
|
||||||
|
|
||||||
|
## Catch skipped spaces between common words.
|
||||||
|
:alot: -> a lot
|
||||||
|
:andteh -> and the
|
||||||
|
:andthe -> and the
|
||||||
|
:asthe -> as the
|
||||||
|
:atthe -> at the
|
||||||
|
abouta -> about a
|
||||||
|
aboutit -> about it
|
||||||
|
aboutthe -> about the
|
||||||
|
:tothe -> to the
|
||||||
|
didnot -> did not
|
||||||
|
fromthe -> from the
|
||||||
|
:the:the: -> the
|
||||||
|
|
||||||
|
|
||||||
|
## Various additional entries.
|
||||||
|
:agred -> agreed
|
||||||
|
:ajust -> adjust
|
||||||
|
:anual -> annual
|
||||||
|
:asign -> assign
|
||||||
|
:casue -> cause
|
||||||
|
:gaurd -> guard
|
||||||
|
:haev -> have
|
||||||
|
:idaes -> ideas
|
||||||
|
:jsut: -> just
|
||||||
|
:jstu: -> just
|
||||||
|
:knwo -> know
|
||||||
|
:konw -> know
|
||||||
|
:kwno -> know
|
|
@ -0,0 +1,352 @@
|
||||||
|
"""Python program to make autocorrection_data.h.
|
||||||
|
|
||||||
|
This program reads "autocorrection_dict.txt" from the current directory and
|
||||||
|
generates a C source file "autocorrection_data.h" with a serialized trie
|
||||||
|
embedded as an array. Run this program without arguments like
|
||||||
|
|
||||||
|
$ python3 make_autocorrection_data.py
|
||||||
|
|
||||||
|
Or specify a dict file as the first argument like
|
||||||
|
|
||||||
|
$ python3 make_autocorrection_data.py mykeymap/dict.txt
|
||||||
|
|
||||||
|
The output is written to "autocorrection_data.h" in the same directory as the
|
||||||
|
dictionary. Or optionally specify the output .h file as well like
|
||||||
|
|
||||||
|
$ python3 make_autocorrection_data.py dict.txt somewhere/out.h
|
||||||
|
|
||||||
|
Each line of the dict file defines one typo and its correction with the syntax
|
||||||
|
"typo -> correction". Blank lines or lines starting with '#' are ignored.
|
||||||
|
Example:
|
||||||
|
|
||||||
|
:thier -> their
|
||||||
|
dosen't -> doesn't
|
||||||
|
fitler -> filter
|
||||||
|
lenght -> length
|
||||||
|
ouput -> output
|
||||||
|
widht -> width
|
||||||
|
|
||||||
|
See autocorrection_dict_extra.txt for a larger example.
|
||||||
|
|
||||||
|
For full documentation, see
|
||||||
|
https://getreuer.info/posts/keyboards/autocorrection
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
import textwrap
|
||||||
|
from typing import Any, Dict, Iterator, List, Tuple
|
||||||
|
|
||||||
|
try:
|
||||||
|
from english_words import english_words_lower_alpha_set as CORRECT_WORDS
|
||||||
|
except ImportError:
|
||||||
|
print(
|
||||||
|
"Autocorrection will falsely trigger when a typo is a substring of a "
|
||||||
|
"correctly spelled word. To check for this, install the english_words "
|
||||||
|
"package and rerun this script:\n\n pip install english_words\n"
|
||||||
|
)
|
||||||
|
# Use a minimal word list as a fallback.
|
||||||
|
CORRECT_WORDS = (
|
||||||
|
"apparent",
|
||||||
|
"association",
|
||||||
|
"available",
|
||||||
|
"classification",
|
||||||
|
"effect",
|
||||||
|
"entertainment",
|
||||||
|
"fantastic",
|
||||||
|
"information",
|
||||||
|
"integrate",
|
||||||
|
"international",
|
||||||
|
"language",
|
||||||
|
"loosest",
|
||||||
|
"manual",
|
||||||
|
"nothing",
|
||||||
|
"provides",
|
||||||
|
"reference",
|
||||||
|
"statehood",
|
||||||
|
"technology",
|
||||||
|
"virtually",
|
||||||
|
"wealthier",
|
||||||
|
"wonderful",
|
||||||
|
)
|
||||||
|
|
||||||
|
KC_A = 4
|
||||||
|
KC_SPC = 0x2C
|
||||||
|
KC_QUOT = 0x34
|
||||||
|
|
||||||
|
TYPO_CHARS = dict(
|
||||||
|
[
|
||||||
|
("'", KC_QUOT),
|
||||||
|
(":", KC_SPC), # "Word break" character.
|
||||||
|
]
|
||||||
|
+
|
||||||
|
# Characters a-z.
|
||||||
|
[(chr(c), c + KC_A - ord("a")) for c in range(ord("a"), ord("z") + 1)]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_file(file_name: str) -> List[Tuple[str, str]]:
|
||||||
|
"""Parses autocorrections dictionary file.
|
||||||
|
|
||||||
|
Each line of the file defines one typo and its correction with the syntax
|
||||||
|
"typo -> correction". Blank lines or lines starting with '#' are ignored. The
|
||||||
|
function validates that typos only have characters in TYPO_CHARS, that
|
||||||
|
typos are not substrings of other typos, and checking that typos don't trigger
|
||||||
|
on CORRECT_WORDS.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_name: String, path of the autocorrections dictionary.
|
||||||
|
Returns:
|
||||||
|
List of (typo, correction) tuples.
|
||||||
|
"""
|
||||||
|
|
||||||
|
autocorrections = []
|
||||||
|
typos = set()
|
||||||
|
for line_number, typo, correction in parse_file_lines(file_name):
|
||||||
|
if typo in typos:
|
||||||
|
print(f'Warning:{line_number}: Ignoring duplicate typo: "{typo}"')
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check that `typo` is valid.
|
||||||
|
if not (all([c in TYPO_CHARS for c in typo])):
|
||||||
|
print(
|
||||||
|
f'Error:{line_number}: Typo "{typo}" has '
|
||||||
|
"characters other than " + "".join(TYPO_CHARS.keys())
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
for other_typo in typos:
|
||||||
|
if typo in other_typo or other_typo in typo:
|
||||||
|
print(
|
||||||
|
f"Error:{line_number}: Typos may not be substrings of one "
|
||||||
|
f"another, otherwise the longer typo would never trigger: "
|
||||||
|
f'"{typo}" vs. "{other_typo}".'
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
if len(typo) < 5:
|
||||||
|
print(
|
||||||
|
f"Warning:{line_number}: It is suggested that typos are at "
|
||||||
|
f'least 5 characters long to avoid false triggers: "{typo}"'
|
||||||
|
)
|
||||||
|
|
||||||
|
check_typo_against_dictionary(line_number, typo)
|
||||||
|
|
||||||
|
autocorrections.append((typo, correction))
|
||||||
|
typos.add(typo)
|
||||||
|
|
||||||
|
return autocorrections
|
||||||
|
|
||||||
|
|
||||||
|
def make_trie(autocorrections: List[Tuple[str, str]]) -> Dict[str, Any]:
|
||||||
|
"""Makes a trie from the the typos, writing in reverse.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
autocorrections: List of (typo, correction) tuples.
|
||||||
|
Returns:
|
||||||
|
Dict of dict, representing the trie.
|
||||||
|
"""
|
||||||
|
trie = {}
|
||||||
|
for typo, correction in autocorrections:
|
||||||
|
node = trie
|
||||||
|
for letter in typo[::-1]:
|
||||||
|
node = node.setdefault(letter, {})
|
||||||
|
node["LEAF"] = (typo, correction)
|
||||||
|
|
||||||
|
return trie
|
||||||
|
|
||||||
|
|
||||||
|
def parse_file_lines(file_name: str) -> Iterator[Tuple[int, str, str]]:
|
||||||
|
"""Parses lines read from `file_name` into typo-correction pairs."""
|
||||||
|
|
||||||
|
line_number = 0
|
||||||
|
for line in open(file_name, "rt"):
|
||||||
|
line_number += 1
|
||||||
|
line = line.strip()
|
||||||
|
if line and line[0] != "#":
|
||||||
|
# Parse syntax "typo -> correction", using strip to ignore indenting.
|
||||||
|
tokens = [token.strip() for token in line.split("->", 1)]
|
||||||
|
if len(tokens) != 2 or not tokens[0]:
|
||||||
|
print(f'Error:{line_number}: Invalid syntax: "{line}"')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
typo, correction = tokens
|
||||||
|
typo = typo.lower() # Force typos to lowercase.
|
||||||
|
typo = typo.replace(" ", ":")
|
||||||
|
|
||||||
|
yield line_number, typo, correction
|
||||||
|
|
||||||
|
|
||||||
|
def check_typo_against_dictionary(line_number: int, typo: str) -> None:
|
||||||
|
"""Checks `typo` against English dictionary words."""
|
||||||
|
|
||||||
|
if typo.startswith(":") and typo.endswith(":"):
|
||||||
|
if typo[1:-1] in CORRECT_WORDS:
|
||||||
|
print(
|
||||||
|
f'Warning:{line_number}: Typo "{typo}" is a correctly spelled '
|
||||||
|
"dictionary word."
|
||||||
|
)
|
||||||
|
elif typo.startswith(":") and not typo.endswith(":"):
|
||||||
|
for word in CORRECT_WORDS:
|
||||||
|
if word.startswith(typo[1:]):
|
||||||
|
print(
|
||||||
|
f'Warning:{line_number}: Typo "{typo}" would falsely trigger '
|
||||||
|
f'on correctly spelled word "{word}".'
|
||||||
|
)
|
||||||
|
elif not typo.startswith(":") and typo.endswith(":"):
|
||||||
|
for word in CORRECT_WORDS:
|
||||||
|
if word.endswith(typo[:-1]):
|
||||||
|
print(
|
||||||
|
f'Warning:{line_number}: Typo "{typo}" would falsely trigger '
|
||||||
|
f'on correctly spelled word "{word}".'
|
||||||
|
)
|
||||||
|
elif not typo.startswith(":") and not typo.endswith(":"):
|
||||||
|
for word in CORRECT_WORDS:
|
||||||
|
if typo in word:
|
||||||
|
print(
|
||||||
|
f'Warning:{line_number}: Typo "{typo}" would falsely trigger '
|
||||||
|
f'on correctly spelled word "{word}".'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_trie(
|
||||||
|
autocorrections: List[Tuple[str, str]], trie: Dict[str, Any]
|
||||||
|
) -> List[int]:
|
||||||
|
"""Serializes trie and correction data in a form readable by the C code.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
autocorrections: List of (typo, correction) tuples.
|
||||||
|
trie: Dict of dicts.
|
||||||
|
Returns:
|
||||||
|
List of ints in the range 0-255.
|
||||||
|
"""
|
||||||
|
table = []
|
||||||
|
|
||||||
|
# Traverse trie in depth first order.
|
||||||
|
def traverse(trie_node: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
if "LEAF" in trie_node: # Handle a leaf trie node.
|
||||||
|
typo, correction = trie_node["LEAF"]
|
||||||
|
word_boundary_ending = typo[-1] == ":"
|
||||||
|
typo = typo.strip(":")
|
||||||
|
i = 0 # Make the autocorrection data for this entry and serialize it.
|
||||||
|
while i < min(len(typo), len(correction)) and typo[i] == correction[i]:
|
||||||
|
i += 1
|
||||||
|
backspaces = len(typo) - i - 1 + word_boundary_ending
|
||||||
|
assert 0 <= backspaces <= 63
|
||||||
|
correction = correction[i:]
|
||||||
|
data = [backspaces + 128] + list(bytes(correction, "ascii")) + [0]
|
||||||
|
|
||||||
|
entry = {"data": data, "links": [], "byte_offset": 0}
|
||||||
|
table.append(entry)
|
||||||
|
elif len(trie_node) == 1: # Handle trie node with a single child.
|
||||||
|
c, trie_node = next(iter(trie_node.items()))
|
||||||
|
entry = {"chars": c, "byte_offset": 0}
|
||||||
|
|
||||||
|
# It's common for a trie to have long chains of single-child nodes. We
|
||||||
|
# find the whole chain so that we can serialize it more efficiently.
|
||||||
|
while len(trie_node) == 1 and "LEAF" not in trie_node:
|
||||||
|
c, trie_node = next(iter(trie_node.items()))
|
||||||
|
entry["chars"] += c
|
||||||
|
|
||||||
|
table.append(entry)
|
||||||
|
entry["links"] = [traverse(trie_node)]
|
||||||
|
else: # Handle trie node with multiple children.
|
||||||
|
entry = {"chars": "".join(sorted(trie_node.keys())), "byte_offset": 0}
|
||||||
|
table.append(entry)
|
||||||
|
entry["links"] = [traverse(trie_node[c]) for c in entry["chars"]]
|
||||||
|
return entry
|
||||||
|
|
||||||
|
traverse(trie)
|
||||||
|
|
||||||
|
def serialize(e: Dict[str, Any]) -> List[int]:
|
||||||
|
if not e["links"]: # Handle a leaf table entry.
|
||||||
|
return e["data"]
|
||||||
|
elif len(e["links"]) == 1: # Handle a chain table entry.
|
||||||
|
return [TYPO_CHARS[c] for c in e["chars"]] + [0]
|
||||||
|
else: # Handle a branch table entry.
|
||||||
|
data = []
|
||||||
|
for c, link in zip(e["chars"], e["links"]):
|
||||||
|
data += [TYPO_CHARS[c] | (0 if data else 64)] + encode_link(link)
|
||||||
|
return data + [0]
|
||||||
|
|
||||||
|
byte_offset = 0
|
||||||
|
for e in table: # To encode links, first compute byte offset of each entry.
|
||||||
|
e["byte_offset"] = byte_offset
|
||||||
|
byte_offset += len(serialize(e))
|
||||||
|
|
||||||
|
return [b for e in table for b in serialize(e)] # Serialize final table.
|
||||||
|
|
||||||
|
|
||||||
|
def encode_link(link: Dict[str, Any]) -> List[int]:
|
||||||
|
"""Encodes a node link as two bytes."""
|
||||||
|
byte_offset = link["byte_offset"]
|
||||||
|
if not (0 <= byte_offset <= 0xFFFF):
|
||||||
|
print(
|
||||||
|
"Error: The autocorrection table is too large, a node link exceeds "
|
||||||
|
"64KB limit. Try reducing the autocorrection dict to fewer entries."
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
return [byte_offset & 255, byte_offset >> 8]
|
||||||
|
|
||||||
|
|
||||||
|
def write_generated_code(
|
||||||
|
autocorrections: List[Tuple[str, str]], data: List[int], file_name: str
|
||||||
|
) -> None:
|
||||||
|
"""Writes autocorrection data as generated C code to `file_name`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
autocorrections: List of (typo, correction) tuples.
|
||||||
|
data: List of ints in 0-255, the serialized trie.
|
||||||
|
file_name: String, path of the output C file.
|
||||||
|
"""
|
||||||
|
assert all(0 <= b <= 255 for b in data)
|
||||||
|
|
||||||
|
def typo_len(e: Tuple[str, str]) -> int:
|
||||||
|
return len(e[0])
|
||||||
|
|
||||||
|
min_typo = min(autocorrections, key=typo_len)[0]
|
||||||
|
max_typo = max(autocorrections, key=typo_len)[0]
|
||||||
|
generated_code = "".join(
|
||||||
|
[
|
||||||
|
"// Generated code.\n\n",
|
||||||
|
f"// Autocorrection dictionary ({len(autocorrections)} entries):\n",
|
||||||
|
"".join(
|
||||||
|
sorted(
|
||||||
|
f"// {typo:<{len(max_typo)}} -> {correction}\n"
|
||||||
|
for typo, correction in autocorrections
|
||||||
|
)
|
||||||
|
),
|
||||||
|
f'\n#define AUTOCORRECTION_MIN_LENGTH {len(min_typo)} // "{min_typo}"\n',
|
||||||
|
f'#define AUTOCORRECTION_MAX_LENGTH {len(max_typo)} // "{max_typo}"\n\n',
|
||||||
|
textwrap.fill(
|
||||||
|
"static const uint8_t autocorrection_data[%d] PROGMEM = {%s};"
|
||||||
|
% (len(data), ", ".join(map(str, data))),
|
||||||
|
width=80,
|
||||||
|
subsequent_indent=" ",
|
||||||
|
),
|
||||||
|
"\n\n",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(file_name, "wt") as f:
|
||||||
|
f.write(generated_code)
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_h_file(dict_file: str) -> str:
|
||||||
|
return os.path.join(os.path.dirname(dict_file), "autocorrect_data.h")
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
dict_file = argv[1] if len(argv) > 1 else "dict.txt"
|
||||||
|
h_file = argv[2] if len(argv) > 2 else get_default_h_file(dict_file)
|
||||||
|
|
||||||
|
autocorrections = parse_file(dict_file)
|
||||||
|
trie = make_trie(autocorrections)
|
||||||
|
data = serialize_trie(autocorrections, trie)
|
||||||
|
print(
|
||||||
|
f"Processed %d autocorrection entries to table with %d bytes."
|
||||||
|
% (len(autocorrections), len(data))
|
||||||
|
)
|
||||||
|
write_generated_code(autocorrections, data, h_file)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main(sys.argv)
|
|
@ -8,7 +8,9 @@
|
||||||
|
|
||||||
|
|
||||||
// Enable spellcheck over HID
|
// Enable spellcheck over HID
|
||||||
#define ENABLE_HID_SPELLCHECK
|
//#define ENABLE_HID_SPELLCHECK
|
||||||
|
|
||||||
|
#define ENABLE_AUTOCORRECT
|
||||||
|
|
||||||
// RGBLight effects
|
// RGBLight effects
|
||||||
// Static color is always enabled.
|
// Static color is always enabled.
|
||||||
|
|
|
@ -7,6 +7,10 @@
|
||||||
|
|
||||||
#include "features/beta_rawhid.h"
|
#include "features/beta_rawhid.h"
|
||||||
|
|
||||||
|
#ifdef ENABLE_AUTOCORRECT
|
||||||
|
#include "features/autocorrect/autocorrect.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
// Values that should not be saved to git.
|
// Values that should not be saved to git.
|
||||||
// Create a `secrets.h` in the keymap directory.
|
// Create a `secrets.h` in the keymap directory.
|
||||||
//
|
//
|
||||||
|
@ -27,7 +31,6 @@ bool send_special_character(uint16_t keycode) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
LEADER_EXTERNS();
|
LEADER_EXTERNS();
|
||||||
void leader_start(void) { ergodox_right_led_3_on(); }
|
void leader_start(void) { ergodox_right_led_3_on(); }
|
||||||
void leader_end(void) { ergodox_right_led_3_off(); }
|
void leader_end(void) { ergodox_right_led_3_off(); }
|
||||||
|
@ -67,6 +70,10 @@ bool process_record_user(uint16_t keycode, keyrecord_t *record) {
|
||||||
if (!process_spellcheck(keycode, record)) { return false; }
|
if (!process_spellcheck(keycode, record)) { return false; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_AUTOCORRECT
|
||||||
|
if (!process_autocorrection(keycode, record)) { return false; }
|
||||||
|
#endif
|
||||||
|
|
||||||
// Handle special chars
|
// Handle special chars
|
||||||
if (record->event.pressed) {
|
if (record->event.pressed) {
|
||||||
if (!send_special_character(keycode)) { return false; }
|
if (!send_special_character(keycode)) { return false; }
|
||||||
|
|
Loading…
Reference in New Issue