fix: log SJIS decoding errors instead of silently discarding them

Add SJISToUTF8Lossy() that wraps SJISToUTF8() and logs decode errors at
slog.Debug level. Replace all 31 call sites across 17 files that previously
discarded the error with `_, _ =`. This makes garbled text from malformed
SJIS client data debuggable without adding noise at default log levels.
This commit is contained in:
Houmgaor
2026-02-22 17:01:22 +01:00
parent 59fd722d37
commit f640cfee27
20 changed files with 63 additions and 43 deletions

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"fmt"
"io"
"log/slog"
"strconv"
"strings"
@@ -40,6 +41,16 @@ func SJISToUTF8(b []byte) (string, error) {
return string(result), nil
}
// SJISToUTF8Lossy decodes Shift-JIS bytes to a UTF-8 string, logging
// any decoding error at debug level instead of returning it.
func SJISToUTF8Lossy(b []byte) string {
s, err := SJISToUTF8(b)
if err != nil {
slog.Debug("SJIS decode failed", "error", err, "raw_len", len(b))
}
return s
}
// ToNGWord converts a UTF-8 string into a slice of uint16 values in the
// Shift-JIS byte-swapped format used by the MHF NG-word (chat filter) system.
func ToNGWord(x string) []uint16 {

View File

@@ -461,6 +461,25 @@ func BenchmarkCSVElems(b *testing.B) {
}
}
func TestSJISToUTF8Lossy(t *testing.T) {
// Valid SJIS (ASCII subset) decodes correctly.
got := SJISToUTF8Lossy([]byte("Hello"))
if got != "Hello" {
t.Errorf("SJISToUTF8Lossy(valid) = %q, want %q", got, "Hello")
}
// Truncated multi-byte SJIS sequence (lead byte 0x82 without trail byte)
// does not panic and returns some result (lossy).
got = SJISToUTF8Lossy([]byte{0x82})
_ = got // must not panic
// Nil input returns empty string.
got = SJISToUTF8Lossy(nil)
if got != "" {
t.Errorf("SJISToUTF8Lossy(nil) = %q, want %q", got, "")
}
}
func TestUTF8ToSJIS_UnsupportedCharacters(t *testing.T) {
// Regression test for PR #116: Characters outside the Shift-JIS range
// (e.g. Lenny face, cuneiform) previously caused a panic in UTF8ToSJIS,