From 08e7de2c5ed3ee42ecb7c6bd32bf7dd2dc31da56 Mon Sep 17 00:00:00 2001 From: Houmgaor Date: Thu, 19 Mar 2026 19:28:30 +0100 Subject: [PATCH] feat(savedata): recover from rotating backups on hash mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When primary savedata fails its SHA-256 integrity check, query savedata_backups in recency order and return the first slot that decompresses cleanly. Recovery is read-only — the next successful Save() overwrites the primary with fresh data and a new hash, self-healing the corruption transparently. Closes #178 --- CLAUDE.md | 2 +- server/channelserver/handlers_character.go | 61 +++++++- .../channelserver/handlers_character_test.go | 138 ++++++++++++++++++ server/channelserver/repo_character.go | 32 ++++ server/channelserver/repo_interfaces.go | 3 + server/channelserver/repo_mocks_test.go | 3 + 6 files changed, 236 insertions(+), 3 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 1c4bff185..f32b27c4d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -165,4 +165,4 @@ golangci-lint run ./... # Must pass with zero errors - Branch naming: `feature/`, `fix/`, `refactor/`, `docs/` - Commit messages: conventional commits (`feat:`, `fix:`, `refactor:`, `docs:`) -- Update `CHANGELOG.md` under "Unreleased" for all changes +- Update `CHANGELOG.md` under "Unreleased" for every feature or fix — one concise line per change (two lines maximum) diff --git a/server/channelserver/handlers_character.go b/server/channelserver/handlers_character.go index b8a84bfca..63632dbd6 100644 --- a/server/channelserver/handlers_character.go +++ b/server/channelserver/handlers_character.go @@ -63,8 +63,7 @@ func GetCharacterSaveData(s *Session, charID uint32) (*CharacterSaveData, error) zap.Binary("stored_hash", storedHash), zap.Binary("computed_hash", computedHash[:]), ) - // TODO: attempt recovery from savedata_backups here - return nil, errors.New("savedata integrity check failed") + return recoverFromBackups(s, saveData, charID) } } @@ -73,6 +72,64 @@ func GetCharacterSaveData(s *Session, charID uint32) (*CharacterSaveData, error) return saveData, nil } +// recoverFromBackups is called when the primary savedata fails its integrity check. +// It queries savedata_backups in recency order and returns the first slot whose +// compressed blob decompresses cleanly. It never writes to the database — the +// next successful Save() will overwrite the primary with fresh data and a new hash, +// self-healing the corruption without any extra recovery writes. +func recoverFromBackups(s *Session, base *CharacterSaveData, charID uint32) (*CharacterSaveData, error) { + backups, err := s.server.charRepo.LoadBackupsByRecency(charID) + if err != nil { + s.logger.Error("Failed to load savedata backups during recovery", + zap.Uint32("charID", charID), + zap.Error(err), + ) + return nil, errors.New("savedata integrity check failed") + } + + if len(backups) == 0 { + s.logger.Error("Savedata corrupted and no backups available", + zap.Uint32("charID", charID), + ) + return nil, errors.New("savedata integrity check failed: no backups available") + } + + for _, backup := range backups { + candidate := &CharacterSaveData{ + CharID: base.CharID, + IsNewCharacter: base.IsNewCharacter, + Name: base.Name, + Mode: base.Mode, + Pointers: base.Pointers, + compSave: backup.Data, + } + + if err := candidate.Decompress(); err != nil { + s.logger.Warn("Backup slot decompression failed during recovery, trying next", + zap.Uint32("charID", charID), + zap.Int("slot", backup.Slot), + zap.Time("saved_at", backup.SavedAt), + zap.Error(err), + ) + continue + } + + s.logger.Warn("Savedata recovered from backup — primary was corrupt", + zap.Uint32("charID", charID), + zap.Int("slot", backup.Slot), + zap.Time("saved_at", backup.SavedAt), + ) + candidate.updateStructWithSaveData() + return candidate, nil + } + + s.logger.Error("Savedata corrupted and all backup slots failed decompression", + zap.Uint32("charID", charID), + zap.Int("backups_tried", len(backups)), + ) + return nil, errors.New("savedata integrity check failed: all backup slots exhausted") +} + func (save *CharacterSaveData) Save(s *Session) error { if save.decompSave == nil { s.logger.Warn("No decompressed save data, skipping save", diff --git a/server/channelserver/handlers_character_test.go b/server/channelserver/handlers_character_test.go index 40047c7ae..1f320e97e 100644 --- a/server/channelserver/handlers_character_test.go +++ b/server/channelserver/handlers_character_test.go @@ -446,6 +446,144 @@ func TestGetCharacterSaveData_Integration(t *testing.T) { } } +// TestGetCharacterSaveData_BackupRecovery tests that a character whose primary +// savedata has a hash mismatch is transparently recovered from the backup table. +func TestGetCharacterSaveData_BackupRecovery(t *testing.T) { + db := SetupTestDB(t) + defer TeardownTestDB(t, db) + + // Build valid compressed savedata (same layout as CreateTestCharacter). + rawSave := make([]byte, 150000) + copy(rawSave[88:], append([]byte("BackupChar"), 0x00)) + validCompressed, err := nullcomp.Compress(rawSave) + if err != nil { + t.Fatalf("compress valid savedata: %v", err) + } + + // Build a compressed blob that will fail decompression (garbage bytes). + invalidCompressed := []byte("this is not valid compressed data") + + corruptHash := make([]byte, 32) // all-zero hash is wrong for any real savedata + corruptHash[0] = 0xFF + + repo := NewCharacterRepository(db) + + t.Run("recovers_from_most_recent_backup", func(t *testing.T) { + userID := CreateTestUser(t, db, "recovery_user") + charID := CreateTestCharacter(t, db, userID, "BackupChar") + + // Store a valid backup in slot 0. + if err := repo.SaveBackup(charID, 0, validCompressed); err != nil { + t.Fatalf("SaveBackup: %v", err) + } + + // Set a wrong hash on the primary so the integrity check fails. + if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil { + t.Fatalf("set corrupt hash: %v", err) + } + + mock := &MockCryptConn{sentPackets: make([][]byte, 0)} + s := createTestSession(mock) + s.charID = charID + SetTestDB(s.server, db) + s.server.erupeConfig.RealClientMode = cfg.Z2 + + got, err := GetCharacterSaveData(s, charID) + if err != nil { + t.Fatalf("GetCharacterSaveData() unexpected error: %v", err) + } + if got == nil { + t.Fatal("GetCharacterSaveData() returned nil") + } + if got.CharID != charID { + t.Errorf("CharID = %d, want %d", got.CharID, charID) + } + }) + + t.Run("skips_corrupt_backup_and_uses_next", func(t *testing.T) { + userID := CreateTestUser(t, db, "multibackup_user") + charID := CreateTestCharacter(t, db, userID, "BackupChar") + + // Slot 1 is newer (saved second) but has invalid compressed data. + // Slot 0 is older but valid. Recovery must skip slot 1 and use slot 0. + if err := repo.SaveBackup(charID, 0, validCompressed); err != nil { + t.Fatalf("SaveBackup slot 0: %v", err) + } + if err := repo.SaveBackup(charID, 1, invalidCompressed); err != nil { + t.Fatalf("SaveBackup slot 1: %v", err) + } + // Update slot 1's saved_at to be newer than slot 0. + if _, err := db.Exec( + "UPDATE savedata_backups SET saved_at = now() + interval '1 minute' WHERE char_id = $1 AND slot = 1", + charID, + ); err != nil { + t.Fatalf("update saved_at: %v", err) + } + + if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil { + t.Fatalf("set corrupt hash: %v", err) + } + + mock := &MockCryptConn{sentPackets: make([][]byte, 0)} + s := createTestSession(mock) + s.charID = charID + SetTestDB(s.server, db) + s.server.erupeConfig.RealClientMode = cfg.Z2 + + got, err := GetCharacterSaveData(s, charID) + if err != nil { + t.Fatalf("GetCharacterSaveData() unexpected error: %v", err) + } + if got == nil { + t.Fatal("GetCharacterSaveData() returned nil") + } + }) + + t.Run("returns_error_when_no_backups", func(t *testing.T) { + userID := CreateTestUser(t, db, "nobackup_user") + charID := CreateTestCharacter(t, db, userID, "NoBackupChar") + + if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil { + t.Fatalf("set corrupt hash: %v", err) + } + + mock := &MockCryptConn{sentPackets: make([][]byte, 0)} + s := createTestSession(mock) + s.charID = charID + SetTestDB(s.server, db) + s.server.erupeConfig.RealClientMode = cfg.Z2 + + _, err := GetCharacterSaveData(s, charID) + if err == nil { + t.Fatal("expected error when no backups available, got nil") + } + }) + + t.Run("returns_error_when_all_backups_corrupt", func(t *testing.T) { + userID := CreateTestUser(t, db, "allcorrupt_user") + charID := CreateTestCharacter(t, db, userID, "AllCorruptChar") + + if err := repo.SaveBackup(charID, 0, invalidCompressed); err != nil { + t.Fatalf("SaveBackup: %v", err) + } + + if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil { + t.Fatalf("set corrupt hash: %v", err) + } + + mock := &MockCryptConn{sentPackets: make([][]byte, 0)} + s := createTestSession(mock) + s.charID = charID + SetTestDB(s.server, db) + s.server.erupeConfig.RealClientMode = cfg.Z2 + + _, err := GetCharacterSaveData(s, charID) + if err == nil { + t.Fatal("expected error when all backups corrupt, got nil") + } + }) +} + // TestCharacterSaveData_Save_Integration tests saving character data to database func TestCharacterSaveData_Save_Integration(t *testing.T) { db := SetupTestDB(t) diff --git a/server/channelserver/repo_character.go b/server/channelserver/repo_character.go index 6bc3a9b6b..2fc20cdb0 100644 --- a/server/channelserver/repo_character.go +++ b/server/channelserver/repo_character.go @@ -237,6 +237,38 @@ func (r *CharacterRepository) UpdateGCPAndPact(charID uint32, gcp uint32, pactID return err } +// SavedataBackup holds one row from the savedata_backups table. +type SavedataBackup struct { + Slot int + Data []byte + SavedAt time.Time +} + +// LoadBackupsByRecency returns all backup slots for a character, ordered +// most-recent first. Returns an empty (non-nil) slice if no backups exist. +func (r *CharacterRepository) LoadBackupsByRecency(charID uint32) ([]SavedataBackup, error) { + rows, err := r.db.Query( + `SELECT slot, savedata, saved_at FROM savedata_backups + WHERE char_id = $1 + ORDER BY saved_at DESC`, + charID, + ) + if err != nil { + return nil, err + } + defer rows.Close() //nolint:errcheck // rows.Close error is non-actionable here + + backups := make([]SavedataBackup, 0) + for rows.Next() { + var b SavedataBackup + if err := rows.Scan(&b.Slot, &b.Data, &b.SavedAt); err != nil { + return nil, err + } + backups = append(backups, b) + } + return backups, rows.Err() +} + // SaveBackup upserts a savedata snapshot into the rotating backup table. func (r *CharacterRepository) SaveBackup(charID uint32, slot int, data []byte) error { _, err := r.db.Exec(` diff --git a/server/channelserver/repo_interfaces.go b/server/channelserver/repo_interfaces.go index 7d630a0c6..1a7104396 100644 --- a/server/channelserver/repo_interfaces.go +++ b/server/channelserver/repo_interfaces.go @@ -48,6 +48,9 @@ type CharacterRepo interface { // LoadSaveDataWithHash loads savedata along with its stored SHA-256 hash. // The hash may be nil for characters saved before checksums were introduced. LoadSaveDataWithHash(charID uint32) (id uint32, savedata []byte, isNew bool, name string, hash []byte, err error) + // LoadBackupsByRecency returns all backup slots for a character ordered + // most-recent first. Returns an empty slice if no backups exist. + LoadBackupsByRecency(charID uint32) ([]SavedataBackup, error) } // GuildRepo defines the contract for guild data access. diff --git a/server/channelserver/repo_mocks_test.go b/server/channelserver/repo_mocks_test.go index dbd527669..0484ef43e 100644 --- a/server/channelserver/repo_mocks_test.go +++ b/server/channelserver/repo_mocks_test.go @@ -247,6 +247,9 @@ func (m *mockCharacterRepo) SaveCharacterDataAtomic(_ SaveAtomicParams) error { func (m *mockCharacterRepo) LoadSaveDataWithHash(_ uint32) (uint32, []byte, bool, string, []byte, error) { return m.loadSaveDataID, m.loadSaveDataData, m.loadSaveDataNew, m.loadSaveDataName, nil, m.loadSaveDataErr } +func (m *mockCharacterRepo) LoadBackupsByRecency(_ uint32) ([]SavedataBackup, error) { + return []SavedataBackup{}, nil +} // --- mockGoocooRepo ---