test(scenario): add real-file round-trip tests and decode metadata

Add TestScenarioRoundTrip_RealFiles covering 7 real scenario files
(cat=0/1/3, T101/T103, with and without chunk1). Tests skip gracefully
when game data is absent so CI stays green.

Decoded metadata structure from analysis of 145k+ real scenario files:
- Chunk0 m[0]/m[1] = CategoryID/MainID; m[5] = str0_len (offset to
  str1); m[6] = MainID (cat=0) or 0xFFFF; m[8] = constant 5.
- Chunk1 m[9]=cumOff[2], m[10]=cumOff[1], m[14]=cumOff[3],
  m[15]=total_string_bytes; m[11-13]/m[16-17] = dialog script offsets
  beyond the 0xFF sentinel; m[20] = constant 5; m[21] ≈ data_size.

Update docs/scenario-format.md with full field tables for both chunks.
This commit is contained in:
Houmgaor
2026-03-20 14:20:14 +01:00
parent a1dfdd330a
commit 7471e7eaa9
2 changed files with 136 additions and 5 deletions

View File

@@ -82,14 +82,45 @@ Used for structured text chunks containing named strings with metadata.
[0xFF end-of-strings sentinel]
```
**Metadata block** (partially understood):
**Metadata block** (partially decoded):
The metadata block is `MetadataSize` bytes long and covers all entries collectively. Known sizes observed in real files:
The metadata block is `MetadataSize` bytes long. Known sizes from real files:
- Chunk0 (flag 0x01): `MetadataSize = 0x14` (20 bytes)
- Chunk1 (flag 0x02): `MetadataSize = 0x2C` (44 bytes)
- Chunk0 (flag 0x01): `MetadataSize = 0x14` (20 bytes = 10 × u16 LE)
- Chunk1 (flag 0x02): `MetadataSize = 0x2C` (44 bytes = 22 × u16 LE)
The internal structure of the metadata is not yet fully documented. It is preserved verbatim in the JSON format as a base64 blob so that clients receive correct values even for unknown fields.
**Chunk0 metadata (20 bytes decoded from 145,000+ real scenario files):**
| u16 index | Field | Notes |
|-----------|-------|-------|
| m[0] | CategoryID | Matches the first field of the filename (0=basic, 1=GR, 3=exchange, 6=pallone, 7=diva) |
| m[1] | MainID | Matches the `S` field of the filename |
| m[24] | 0x0000 | Reserved / always zero |
| m[5] | str0_len | Byte length of string 0 in Shift-JIS including the null terminator; equals the byte offset from the start of the strings section to string 1 |
| m[6] | SceneRef | `MainID` when CategoryID=0; `0xFFFF` when CategoryID≠0 — purpose unclear (possibly a chain or group reference) |
| m[7] | 0x0000 | Reserved |
| m[8] | 0x0005 | Constant; purpose unknown |
| m[9] | varies | Purpose not yet confirmed; correlates loosely with total chunk size |
**Chunk1 metadata (44 bytes decoded from multi-dialog scenario files):**
The 22 u16 fields in chunk1 metadata store a richer set of string offsets. When the chunk contains N dialog strings in the strings section, the cumulative byte offsets are stored non-sequentially:
| u16 index | Field | Notes |
|-----------|-------|-------|
| m[01] | IDs | Carry contextual IDs; m[0] is typically 0, m[1] varies |
| m[28] | 0 or varies | Partially unknown |
| m[9] | cumOff[2] | Byte offset to string 2 from strings section start (= str0_len + str1_len) |
| m[10] | cumOff[1] | Byte offset to string 1 = str0_len |
| m[1113] | Dialog positions | Offsets into the full C1 data section (strings + dialog script bytes beyond the 0xFF sentinel) |
| m[14] | cumOff[3] | Byte offset to string 3 |
| m[15] | cumOff[4] | Total string bytes without the 0xFF sentinel |
| m[1617] | Dialog positions | Further offsets into the C1 data section |
| m[1819] | flags/type | Purpose unknown |
| m[20] | 0x0005 | Constant (same as chunk0 m[8]) |
| m[21] | DataSize 4 | Approximately equal to `chunk1_size 8 MetadataSize + 4`; off-by-four not yet explained |
The metadata is preserved verbatim in JSON as a base64 blob so that clients receive correct values for all fields including those not yet fully understood.
**Format detection for chunk0:** if `chunk_data[1] == 0x00` → sub-header, else → inline.

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"encoding/binary"
"encoding/json"
"os"
"testing"
)
@@ -370,3 +371,102 @@ func TestScenarioRoundTrip_MetadataPreserved(t *testing.T) {
sh.Unknown1, sh2.Unknown1, sh.Unknown2, sh2.Unknown2)
}
}
// ── real-file round-trip tests ────────────────────────────────────────────────
// scenarioBinPath is the relative path from the package to the scenario files.
// These tests are skipped if the directory does not exist (CI without game data).
const scenarioBinPath = "../../bin/scenarios"
func TestScenarioRoundTrip_RealFiles(t *testing.T) {
samples := []struct {
name string
wantC0 bool // expect chunk0 subheader
wantC1 bool // expect chunk1 (subheader or JKR)
}{
// cat=0 basic quest scenarios (chunk0 subheader, no chunk1)
{"0_0_0_0_S0_T101_C0", true, false},
{"0_0_0_0_S1_T101_C0", true, false},
{"0_0_0_0_S5_T101_C0", true, false},
// cat=1 GR scenarios (chunk0 subheader, T101 has no chunk1)
{"1_0_0_0_S0_T101_C0", true, false},
{"1_0_0_0_S1_T101_C0", true, false},
// cat=3 item exchange (chunk0 subheader, chunk1 subheader with extra data)
{"3_0_0_0_S0_T103_C0", true, true},
// multi-chapter file with chunk1 subheader
{"0_0_0_0_S0_T103_C0", true, true},
}
for _, tc := range samples {
tc := tc
t.Run(tc.name, func(t *testing.T) {
path := scenarioBinPath + "/" + tc.name + ".bin"
original, err := os.ReadFile(path)
if err != nil {
t.Skipf("scenario file not found (game data not present): %v", err)
}
// Parse binary → JSON schema
parsed, err := ParseScenarioBinary(original)
if err != nil {
t.Fatalf("ParseScenarioBinary: %v", err)
}
// Verify expected chunk presence
if tc.wantC0 && (parsed.Chunk0 == nil || parsed.Chunk0.Subheader == nil) {
t.Error("expected chunk0 subheader")
}
if tc.wantC1 && parsed.Chunk1 == nil {
t.Error("expected chunk1")
}
// Marshal to JSON
jsonData, err := json.Marshal(parsed)
if err != nil {
t.Fatalf("json.Marshal: %v", err)
}
// Compile JSON → binary
compiled, err := CompileScenarioJSON(jsonData)
if err != nil {
t.Fatalf("CompileScenarioJSON: %v", err)
}
// Re-parse compiled output
result, err := ParseScenarioBinary(compiled)
if err != nil {
t.Fatalf("ParseScenarioBinary on compiled output: %v", err)
}
// Verify strings survive round-trip unchanged
origStrings := extractStringsFromScenario(t, original)
gotStrings := extractStringsFromScenario(t, compiled)
if len(gotStrings) != len(origStrings) {
t.Fatalf("string count changed: %d → %d", len(origStrings), len(gotStrings))
}
for i := range origStrings {
if gotStrings[i] != origStrings[i] {
t.Errorf("[%d]: %q → %q", i, origStrings[i], gotStrings[i])
}
}
// Verify metadata is preserved byte-for-byte
if parsed.Chunk0 != nil && parsed.Chunk0.Subheader != nil {
if result.Chunk0 == nil || result.Chunk0.Subheader == nil {
t.Fatal("chunk0 subheader lost in round-trip")
}
if result.Chunk0.Subheader.Metadata != parsed.Chunk0.Subheader.Metadata {
t.Errorf("chunk0 metadata changed after round-trip")
}
}
if parsed.Chunk1 != nil && parsed.Chunk1.Subheader != nil {
if result.Chunk1 == nil || result.Chunk1.Subheader == nil {
t.Fatal("chunk1 subheader lost in round-trip")
}
if result.Chunk1.Subheader.Metadata != parsed.Chunk1.Subheader.Metadata {
t.Errorf("chunk1 metadata changed after round-trip")
}
}
})
}
}