mirror of
https://github.com/Mezeporta/Erupe.git
synced 2026-03-22 15:43:49 +01:00
feat(savedata): recover from rotating backups on hash mismatch
When primary savedata fails its SHA-256 integrity check, query savedata_backups in recency order and return the first slot that decompresses cleanly. Recovery is read-only — the next successful Save() overwrites the primary with fresh data and a new hash, self-healing the corruption transparently. Closes #178
This commit is contained in:
@@ -63,8 +63,7 @@ func GetCharacterSaveData(s *Session, charID uint32) (*CharacterSaveData, error)
|
||||
zap.Binary("stored_hash", storedHash),
|
||||
zap.Binary("computed_hash", computedHash[:]),
|
||||
)
|
||||
// TODO: attempt recovery from savedata_backups here
|
||||
return nil, errors.New("savedata integrity check failed")
|
||||
return recoverFromBackups(s, saveData, charID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,6 +72,64 @@ func GetCharacterSaveData(s *Session, charID uint32) (*CharacterSaveData, error)
|
||||
return saveData, nil
|
||||
}
|
||||
|
||||
// recoverFromBackups is called when the primary savedata fails its integrity check.
|
||||
// It queries savedata_backups in recency order and returns the first slot whose
|
||||
// compressed blob decompresses cleanly. It never writes to the database — the
|
||||
// next successful Save() will overwrite the primary with fresh data and a new hash,
|
||||
// self-healing the corruption without any extra recovery writes.
|
||||
func recoverFromBackups(s *Session, base *CharacterSaveData, charID uint32) (*CharacterSaveData, error) {
|
||||
backups, err := s.server.charRepo.LoadBackupsByRecency(charID)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to load savedata backups during recovery",
|
||||
zap.Uint32("charID", charID),
|
||||
zap.Error(err),
|
||||
)
|
||||
return nil, errors.New("savedata integrity check failed")
|
||||
}
|
||||
|
||||
if len(backups) == 0 {
|
||||
s.logger.Error("Savedata corrupted and no backups available",
|
||||
zap.Uint32("charID", charID),
|
||||
)
|
||||
return nil, errors.New("savedata integrity check failed: no backups available")
|
||||
}
|
||||
|
||||
for _, backup := range backups {
|
||||
candidate := &CharacterSaveData{
|
||||
CharID: base.CharID,
|
||||
IsNewCharacter: base.IsNewCharacter,
|
||||
Name: base.Name,
|
||||
Mode: base.Mode,
|
||||
Pointers: base.Pointers,
|
||||
compSave: backup.Data,
|
||||
}
|
||||
|
||||
if err := candidate.Decompress(); err != nil {
|
||||
s.logger.Warn("Backup slot decompression failed during recovery, trying next",
|
||||
zap.Uint32("charID", charID),
|
||||
zap.Int("slot", backup.Slot),
|
||||
zap.Time("saved_at", backup.SavedAt),
|
||||
zap.Error(err),
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
s.logger.Warn("Savedata recovered from backup — primary was corrupt",
|
||||
zap.Uint32("charID", charID),
|
||||
zap.Int("slot", backup.Slot),
|
||||
zap.Time("saved_at", backup.SavedAt),
|
||||
)
|
||||
candidate.updateStructWithSaveData()
|
||||
return candidate, nil
|
||||
}
|
||||
|
||||
s.logger.Error("Savedata corrupted and all backup slots failed decompression",
|
||||
zap.Uint32("charID", charID),
|
||||
zap.Int("backups_tried", len(backups)),
|
||||
)
|
||||
return nil, errors.New("savedata integrity check failed: all backup slots exhausted")
|
||||
}
|
||||
|
||||
func (save *CharacterSaveData) Save(s *Session) error {
|
||||
if save.decompSave == nil {
|
||||
s.logger.Warn("No decompressed save data, skipping save",
|
||||
|
||||
@@ -446,6 +446,144 @@ func TestGetCharacterSaveData_Integration(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetCharacterSaveData_BackupRecovery tests that a character whose primary
|
||||
// savedata has a hash mismatch is transparently recovered from the backup table.
|
||||
func TestGetCharacterSaveData_BackupRecovery(t *testing.T) {
|
||||
db := SetupTestDB(t)
|
||||
defer TeardownTestDB(t, db)
|
||||
|
||||
// Build valid compressed savedata (same layout as CreateTestCharacter).
|
||||
rawSave := make([]byte, 150000)
|
||||
copy(rawSave[88:], append([]byte("BackupChar"), 0x00))
|
||||
validCompressed, err := nullcomp.Compress(rawSave)
|
||||
if err != nil {
|
||||
t.Fatalf("compress valid savedata: %v", err)
|
||||
}
|
||||
|
||||
// Build a compressed blob that will fail decompression (garbage bytes).
|
||||
invalidCompressed := []byte("this is not valid compressed data")
|
||||
|
||||
corruptHash := make([]byte, 32) // all-zero hash is wrong for any real savedata
|
||||
corruptHash[0] = 0xFF
|
||||
|
||||
repo := NewCharacterRepository(db)
|
||||
|
||||
t.Run("recovers_from_most_recent_backup", func(t *testing.T) {
|
||||
userID := CreateTestUser(t, db, "recovery_user")
|
||||
charID := CreateTestCharacter(t, db, userID, "BackupChar")
|
||||
|
||||
// Store a valid backup in slot 0.
|
||||
if err := repo.SaveBackup(charID, 0, validCompressed); err != nil {
|
||||
t.Fatalf("SaveBackup: %v", err)
|
||||
}
|
||||
|
||||
// Set a wrong hash on the primary so the integrity check fails.
|
||||
if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil {
|
||||
t.Fatalf("set corrupt hash: %v", err)
|
||||
}
|
||||
|
||||
mock := &MockCryptConn{sentPackets: make([][]byte, 0)}
|
||||
s := createTestSession(mock)
|
||||
s.charID = charID
|
||||
SetTestDB(s.server, db)
|
||||
s.server.erupeConfig.RealClientMode = cfg.Z2
|
||||
|
||||
got, err := GetCharacterSaveData(s, charID)
|
||||
if err != nil {
|
||||
t.Fatalf("GetCharacterSaveData() unexpected error: %v", err)
|
||||
}
|
||||
if got == nil {
|
||||
t.Fatal("GetCharacterSaveData() returned nil")
|
||||
}
|
||||
if got.CharID != charID {
|
||||
t.Errorf("CharID = %d, want %d", got.CharID, charID)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("skips_corrupt_backup_and_uses_next", func(t *testing.T) {
|
||||
userID := CreateTestUser(t, db, "multibackup_user")
|
||||
charID := CreateTestCharacter(t, db, userID, "BackupChar")
|
||||
|
||||
// Slot 1 is newer (saved second) but has invalid compressed data.
|
||||
// Slot 0 is older but valid. Recovery must skip slot 1 and use slot 0.
|
||||
if err := repo.SaveBackup(charID, 0, validCompressed); err != nil {
|
||||
t.Fatalf("SaveBackup slot 0: %v", err)
|
||||
}
|
||||
if err := repo.SaveBackup(charID, 1, invalidCompressed); err != nil {
|
||||
t.Fatalf("SaveBackup slot 1: %v", err)
|
||||
}
|
||||
// Update slot 1's saved_at to be newer than slot 0.
|
||||
if _, err := db.Exec(
|
||||
"UPDATE savedata_backups SET saved_at = now() + interval '1 minute' WHERE char_id = $1 AND slot = 1",
|
||||
charID,
|
||||
); err != nil {
|
||||
t.Fatalf("update saved_at: %v", err)
|
||||
}
|
||||
|
||||
if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil {
|
||||
t.Fatalf("set corrupt hash: %v", err)
|
||||
}
|
||||
|
||||
mock := &MockCryptConn{sentPackets: make([][]byte, 0)}
|
||||
s := createTestSession(mock)
|
||||
s.charID = charID
|
||||
SetTestDB(s.server, db)
|
||||
s.server.erupeConfig.RealClientMode = cfg.Z2
|
||||
|
||||
got, err := GetCharacterSaveData(s, charID)
|
||||
if err != nil {
|
||||
t.Fatalf("GetCharacterSaveData() unexpected error: %v", err)
|
||||
}
|
||||
if got == nil {
|
||||
t.Fatal("GetCharacterSaveData() returned nil")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("returns_error_when_no_backups", func(t *testing.T) {
|
||||
userID := CreateTestUser(t, db, "nobackup_user")
|
||||
charID := CreateTestCharacter(t, db, userID, "NoBackupChar")
|
||||
|
||||
if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil {
|
||||
t.Fatalf("set corrupt hash: %v", err)
|
||||
}
|
||||
|
||||
mock := &MockCryptConn{sentPackets: make([][]byte, 0)}
|
||||
s := createTestSession(mock)
|
||||
s.charID = charID
|
||||
SetTestDB(s.server, db)
|
||||
s.server.erupeConfig.RealClientMode = cfg.Z2
|
||||
|
||||
_, err := GetCharacterSaveData(s, charID)
|
||||
if err == nil {
|
||||
t.Fatal("expected error when no backups available, got nil")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("returns_error_when_all_backups_corrupt", func(t *testing.T) {
|
||||
userID := CreateTestUser(t, db, "allcorrupt_user")
|
||||
charID := CreateTestCharacter(t, db, userID, "AllCorruptChar")
|
||||
|
||||
if err := repo.SaveBackup(charID, 0, invalidCompressed); err != nil {
|
||||
t.Fatalf("SaveBackup: %v", err)
|
||||
}
|
||||
|
||||
if _, err := db.Exec("UPDATE characters SET savedata_hash = $1 WHERE id = $2", corruptHash, charID); err != nil {
|
||||
t.Fatalf("set corrupt hash: %v", err)
|
||||
}
|
||||
|
||||
mock := &MockCryptConn{sentPackets: make([][]byte, 0)}
|
||||
s := createTestSession(mock)
|
||||
s.charID = charID
|
||||
SetTestDB(s.server, db)
|
||||
s.server.erupeConfig.RealClientMode = cfg.Z2
|
||||
|
||||
_, err := GetCharacterSaveData(s, charID)
|
||||
if err == nil {
|
||||
t.Fatal("expected error when all backups corrupt, got nil")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestCharacterSaveData_Save_Integration tests saving character data to database
|
||||
func TestCharacterSaveData_Save_Integration(t *testing.T) {
|
||||
db := SetupTestDB(t)
|
||||
|
||||
@@ -237,6 +237,38 @@ func (r *CharacterRepository) UpdateGCPAndPact(charID uint32, gcp uint32, pactID
|
||||
return err
|
||||
}
|
||||
|
||||
// SavedataBackup holds one row from the savedata_backups table.
|
||||
type SavedataBackup struct {
|
||||
Slot int
|
||||
Data []byte
|
||||
SavedAt time.Time
|
||||
}
|
||||
|
||||
// LoadBackupsByRecency returns all backup slots for a character, ordered
|
||||
// most-recent first. Returns an empty (non-nil) slice if no backups exist.
|
||||
func (r *CharacterRepository) LoadBackupsByRecency(charID uint32) ([]SavedataBackup, error) {
|
||||
rows, err := r.db.Query(
|
||||
`SELECT slot, savedata, saved_at FROM savedata_backups
|
||||
WHERE char_id = $1
|
||||
ORDER BY saved_at DESC`,
|
||||
charID,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close() //nolint:errcheck // rows.Close error is non-actionable here
|
||||
|
||||
backups := make([]SavedataBackup, 0)
|
||||
for rows.Next() {
|
||||
var b SavedataBackup
|
||||
if err := rows.Scan(&b.Slot, &b.Data, &b.SavedAt); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
backups = append(backups, b)
|
||||
}
|
||||
return backups, rows.Err()
|
||||
}
|
||||
|
||||
// SaveBackup upserts a savedata snapshot into the rotating backup table.
|
||||
func (r *CharacterRepository) SaveBackup(charID uint32, slot int, data []byte) error {
|
||||
_, err := r.db.Exec(`
|
||||
|
||||
@@ -48,6 +48,9 @@ type CharacterRepo interface {
|
||||
// LoadSaveDataWithHash loads savedata along with its stored SHA-256 hash.
|
||||
// The hash may be nil for characters saved before checksums were introduced.
|
||||
LoadSaveDataWithHash(charID uint32) (id uint32, savedata []byte, isNew bool, name string, hash []byte, err error)
|
||||
// LoadBackupsByRecency returns all backup slots for a character ordered
|
||||
// most-recent first. Returns an empty slice if no backups exist.
|
||||
LoadBackupsByRecency(charID uint32) ([]SavedataBackup, error)
|
||||
}
|
||||
|
||||
// GuildRepo defines the contract for guild data access.
|
||||
|
||||
@@ -247,6 +247,9 @@ func (m *mockCharacterRepo) SaveCharacterDataAtomic(_ SaveAtomicParams) error {
|
||||
func (m *mockCharacterRepo) LoadSaveDataWithHash(_ uint32) (uint32, []byte, bool, string, []byte, error) {
|
||||
return m.loadSaveDataID, m.loadSaveDataData, m.loadSaveDataNew, m.loadSaveDataName, nil, m.loadSaveDataErr
|
||||
}
|
||||
func (m *mockCharacterRepo) LoadBackupsByRecency(_ uint32) ([]SavedataBackup, error) {
|
||||
return []SavedataBackup{}, nil
|
||||
}
|
||||
|
||||
// --- mockGoocooRepo ---
|
||||
|
||||
|
||||
Reference in New Issue
Block a user