feat(savedata): recover from rotating backups on hash mismatch

When primary savedata fails its SHA-256 integrity check, query
savedata_backups in recency order and return the first slot that
decompresses cleanly. Recovery is read-only — the next successful
Save() overwrites the primary with fresh data and a new hash,
self-healing the corruption transparently.

Closes #178
This commit is contained in:
Houmgaor
2026-03-19 19:28:30 +01:00
parent 6139e90968
commit 08e7de2c5e
6 changed files with 236 additions and 3 deletions

View File

@@ -165,4 +165,4 @@ golangci-lint run ./... # Must pass with zero errors
- Branch naming: `feature/`, `fix/`, `refactor/`, `docs/` - Branch naming: `feature/`, `fix/`, `refactor/`, `docs/`
- Commit messages: conventional commits (`feat:`, `fix:`, `refactor:`, `docs:`) - Commit messages: conventional commits (`feat:`, `fix:`, `refactor:`, `docs:`)
- Update `CHANGELOG.md` under "Unreleased" for all changes - Update `CHANGELOG.md` under "Unreleased" for every feature or fix — one concise line per change (two lines maximum)

View File

@@ -63,8 +63,7 @@ func GetCharacterSaveData(s *Session, charID uint32) (*CharacterSaveData, error)
zap.Binary("stored_hash", storedHash), zap.Binary("stored_hash", storedHash),
zap.Binary("computed_hash", computedHash[:]), zap.Binary("computed_hash", computedHash[:]),
) )
// TODO: attempt recovery from savedata_backups here return recoverFromBackups(s, saveData, charID)
return nil, errors.New("savedata integrity check failed")
} }
} }
@@ -73,6 +72,64 @@ func GetCharacterSaveData(s *Session, charID uint32) (*CharacterSaveData, error)
return saveData, nil return saveData, nil
} }
// recoverFromBackups is called when the primary savedata fails its integrity check.
// It queries savedata_backups in recency order and returns the first slot whose
// compressed blob decompresses cleanly. It never writes to the database — the
// next successful Save() will overwrite the primary with fresh data and a new hash,
// self-healing the corruption without any extra recovery writes.
func recoverFromBackups(s *Session, base *CharacterSaveData, charID uint32) (*CharacterSaveData, error) {
backups, err := s.server.charRepo.LoadBackupsByRecency(charID)
if err != nil {
s.logger.Error("Failed to load savedata backups during recovery",
zap.Uint32("charID", charID),
zap.Error(err),
)
return nil, errors.New("savedata integrity check failed")
}
if len(backups) == 0 {
s.logger.Error("Savedata corrupted and no backups available",
zap.Uint32("charID", charID),
)
return nil, errors.New("savedata integrity check failed: no backups available")
}
for _, backup := range backups {
candidate := &CharacterSaveData{
CharID: base.CharID,
IsNewCharacter: base.IsNewCharacter,
Name: base.Name,
Mode: base.Mode,
Pointers: base.Pointers,
compSave: backup.Data,
}
if err := candidate.Decompress(); err != nil {
s.logger.Warn("Backup slot decompression failed during recovery, trying next",
zap.Uint32("charID", charID),
zap.Int("slot", backup.Slot),
zap.Time("saved_at", backup.SavedAt),
zap.Error(err),
)
continue
}
s.logger.Warn("Savedata recovered from backup — primary was corrupt",
zap.Uint32("charID", charID),
zap.Int("slot", backup.Slot),
zap.Time("saved_at", backup.SavedAt),
)
candidate.updateStructWithSaveData()
return candidate, nil
}
s.logger.Error("Savedata corrupted and all backup slots failed decompression",
zap.Uint32("charID", charID),
zap.Int("backups_tried", len(backups)),
)
return nil, errors.New("savedata integrity check failed: all backup slots exhausted")
}
func (save *CharacterSaveData) Save(s *Session) error { func (save *CharacterSaveData) Save(s *Session) error {
if save.decompSave == nil { if save.decompSave == nil {
s.logger.Warn("No decompressed save data, skipping save", s.logger.Warn("No decompressed save data, skipping save",

View File

@@ -446,6 +446,144 @@ func TestGetCharacterSaveData_Integration(t *testing.T) {
} }
} }
// TestGetCharacterSaveData_BackupRecovery tests that a character whose primary
// savedata has a hash mismatch is transparently recovered from the backup table.
func TestGetCharacterSaveData_BackupRecovery(t *testing.T) {
db := SetupTestDB(t)
defer TeardownTestDB(t, db)
// Build valid compressed savedata (same layout as CreateTestCharacter).
rawSave := make([]byte, 150000)
copy(rawSave[88:], append([]byte("BackupChar"), 0x00))
validCompressed, err := nullcomp.Compress(rawSave)
if err != nil {
t.Fatalf("compress valid savedata: %v", err)
}
// Build a compressed blob that will fail decompression (garbage bytes).
invalidCompressed := []byte("this is not valid compressed data")
corruptHash := make([]byte, 32) // all-zero hash is wrong for any real savedata
corruptHash[0] = 0xFF
repo := NewCharacterRepository(db)
t.Run("recovers_from_most_recent_backup", func(t *testing.T) {
userID := CreateTestUser(t, db, "recovery_user")
charID := CreateTestCharacter(t, db, userID, "BackupChar")
// Store a valid backup in slot 0.
if err := repo.SaveBackup(charID, 0, validCompressed); err != nil {
t.Fatalf("SaveBackup: %v", err)
}
// Set a wrong hash on the primary so the integrity check fails.
if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil {
t.Fatalf("set corrupt hash: %v", err)
}
mock := &MockCryptConn{sentPackets: make([][]byte, 0)}
s := createTestSession(mock)
s.charID = charID
SetTestDB(s.server, db)
s.server.erupeConfig.RealClientMode = cfg.Z2
got, err := GetCharacterSaveData(s, charID)
if err != nil {
t.Fatalf("GetCharacterSaveData() unexpected error: %v", err)
}
if got == nil {
t.Fatal("GetCharacterSaveData() returned nil")
}
if got.CharID != charID {
t.Errorf("CharID = %d, want %d", got.CharID, charID)
}
})
t.Run("skips_corrupt_backup_and_uses_next", func(t *testing.T) {
userID := CreateTestUser(t, db, "multibackup_user")
charID := CreateTestCharacter(t, db, userID, "BackupChar")
// Slot 1 is newer (saved second) but has invalid compressed data.
// Slot 0 is older but valid. Recovery must skip slot 1 and use slot 0.
if err := repo.SaveBackup(charID, 0, validCompressed); err != nil {
t.Fatalf("SaveBackup slot 0: %v", err)
}
if err := repo.SaveBackup(charID, 1, invalidCompressed); err != nil {
t.Fatalf("SaveBackup slot 1: %v", err)
}
// Update slot 1's saved_at to be newer than slot 0.
if _, err := db.Exec(
"UPDATE savedata_backups SET saved_at = now() + interval '1 minute' WHERE char_id = $1 AND slot = 1",
charID,
); err != nil {
t.Fatalf("update saved_at: %v", err)
}
if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil {
t.Fatalf("set corrupt hash: %v", err)
}
mock := &MockCryptConn{sentPackets: make([][]byte, 0)}
s := createTestSession(mock)
s.charID = charID
SetTestDB(s.server, db)
s.server.erupeConfig.RealClientMode = cfg.Z2
got, err := GetCharacterSaveData(s, charID)
if err != nil {
t.Fatalf("GetCharacterSaveData() unexpected error: %v", err)
}
if got == nil {
t.Fatal("GetCharacterSaveData() returned nil")
}
})
t.Run("returns_error_when_no_backups", func(t *testing.T) {
userID := CreateTestUser(t, db, "nobackup_user")
charID := CreateTestCharacter(t, db, userID, "NoBackupChar")
if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil {
t.Fatalf("set corrupt hash: %v", err)
}
mock := &MockCryptConn{sentPackets: make([][]byte, 0)}
s := createTestSession(mock)
s.charID = charID
SetTestDB(s.server, db)
s.server.erupeConfig.RealClientMode = cfg.Z2
_, err := GetCharacterSaveData(s, charID)
if err == nil {
t.Fatal("expected error when no backups available, got nil")
}
})
t.Run("returns_error_when_all_backups_corrupt", func(t *testing.T) {
userID := CreateTestUser(t, db, "allcorrupt_user")
charID := CreateTestCharacter(t, db, userID, "AllCorruptChar")
if err := repo.SaveBackup(charID, 0, invalidCompressed); err != nil {
t.Fatalf("SaveBackup: %v", err)
}
if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil {
t.Fatalf("set corrupt hash: %v", err)
}
mock := &MockCryptConn{sentPackets: make([][]byte, 0)}
s := createTestSession(mock)
s.charID = charID
SetTestDB(s.server, db)
s.server.erupeConfig.RealClientMode = cfg.Z2
_, err := GetCharacterSaveData(s, charID)
if err == nil {
t.Fatal("expected error when all backups corrupt, got nil")
}
})
}
// TestCharacterSaveData_Save_Integration tests saving character data to database // TestCharacterSaveData_Save_Integration tests saving character data to database
func TestCharacterSaveData_Save_Integration(t *testing.T) { func TestCharacterSaveData_Save_Integration(t *testing.T) {
db := SetupTestDB(t) db := SetupTestDB(t)

View File

@@ -237,6 +237,38 @@ func (r *CharacterRepository) UpdateGCPAndPact(charID uint32, gcp uint32, pactID
return err return err
} }
// SavedataBackup holds one row from the savedata_backups table.
type SavedataBackup struct {
Slot int
Data []byte
SavedAt time.Time
}
// LoadBackupsByRecency returns all backup slots for a character, ordered
// most-recent first. Returns an empty (non-nil) slice if no backups exist.
func (r *CharacterRepository) LoadBackupsByRecency(charID uint32) ([]SavedataBackup, error) {
rows, err := r.db.Query(
`SELECT slot, savedata, saved_at FROM savedata_backups
WHERE char_id = $1
ORDER BY saved_at DESC`,
charID,
)
if err != nil {
return nil, err
}
defer rows.Close() //nolint:errcheck // rows.Close error is non-actionable here
backups := make([]SavedataBackup, 0)
for rows.Next() {
var b SavedataBackup
if err := rows.Scan(&b.Slot, &b.Data, &b.SavedAt); err != nil {
return nil, err
}
backups = append(backups, b)
}
return backups, rows.Err()
}
// SaveBackup upserts a savedata snapshot into the rotating backup table. // SaveBackup upserts a savedata snapshot into the rotating backup table.
func (r *CharacterRepository) SaveBackup(charID uint32, slot int, data []byte) error { func (r *CharacterRepository) SaveBackup(charID uint32, slot int, data []byte) error {
_, err := r.db.Exec(` _, err := r.db.Exec(`

View File

@@ -48,6 +48,9 @@ type CharacterRepo interface {
// LoadSaveDataWithHash loads savedata along with its stored SHA-256 hash. // LoadSaveDataWithHash loads savedata along with its stored SHA-256 hash.
// The hash may be nil for characters saved before checksums were introduced. // The hash may be nil for characters saved before checksums were introduced.
LoadSaveDataWithHash(charID uint32) (id uint32, savedata []byte, isNew bool, name string, hash []byte, err error) LoadSaveDataWithHash(charID uint32) (id uint32, savedata []byte, isNew bool, name string, hash []byte, err error)
// LoadBackupsByRecency returns all backup slots for a character ordered
// most-recent first. Returns an empty slice if no backups exist.
LoadBackupsByRecency(charID uint32) ([]SavedataBackup, error)
} }
// GuildRepo defines the contract for guild data access. // GuildRepo defines the contract for guild data access.

View File

@@ -247,6 +247,9 @@ func (m *mockCharacterRepo) SaveCharacterDataAtomic(_ SaveAtomicParams) error {
func (m *mockCharacterRepo) LoadSaveDataWithHash(_ uint32) (uint32, []byte, bool, string, []byte, error) { func (m *mockCharacterRepo) LoadSaveDataWithHash(_ uint32) (uint32, []byte, bool, string, []byte, error) {
return m.loadSaveDataID, m.loadSaveDataData, m.loadSaveDataNew, m.loadSaveDataName, nil, m.loadSaveDataErr return m.loadSaveDataID, m.loadSaveDataData, m.loadSaveDataNew, m.loadSaveDataName, nil, m.loadSaveDataErr
} }
func (m *mockCharacterRepo) LoadBackupsByRecency(_ uint32) ([]SavedataBackup, error) {
return []SavedataBackup{}, nil
}
// --- mockGoocooRepo --- // --- mockGoocooRepo ---