From 7471e7eaa91281e17f11afe03aff95182ad75050 Mon Sep 17 00:00:00 2001 From: Houmgaor Date: Fri, 20 Mar 2026 14:20:14 +0100 Subject: [PATCH] test(scenario): add real-file round-trip tests and decode metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TestScenarioRoundTrip_RealFiles covering 7 real scenario files (cat=0/1/3, T101/T103, with and without chunk1). Tests skip gracefully when game data is absent so CI stays green. Decoded metadata structure from analysis of 145k+ real scenario files: - Chunk0 m[0]/m[1] = CategoryID/MainID; m[5] = str0_len (offset to str1); m[6] = MainID (cat=0) or 0xFFFF; m[8] = constant 5. - Chunk1 m[9]=cumOff[2], m[10]=cumOff[1], m[14]=cumOff[3], m[15]=total_string_bytes; m[11-13]/m[16-17] = dialog script offsets beyond the 0xFF sentinel; m[20] = constant 5; m[21] ≈ data_size. Update docs/scenario-format.md with full field tables for both chunks. --- docs/scenario-format.md | 41 +++++++-- server/channelserver/scenario_json_test.go | 100 +++++++++++++++++++++ 2 files changed, 136 insertions(+), 5 deletions(-) diff --git a/docs/scenario-format.md b/docs/scenario-format.md index 33073517f..331167a04 100644 --- a/docs/scenario-format.md +++ b/docs/scenario-format.md @@ -82,14 +82,45 @@ Used for structured text chunks containing named strings with metadata. [0xFF end-of-strings sentinel] ``` -**Metadata block** (partially understood): +**Metadata block** (partially decoded): -The metadata block is `MetadataSize` bytes long and covers all entries collectively. Known sizes observed in real files: +The metadata block is `MetadataSize` bytes long. Known sizes from real files: -- Chunk0 (flag 0x01): `MetadataSize = 0x14` (20 bytes) -- Chunk1 (flag 0x02): `MetadataSize = 0x2C` (44 bytes) +- Chunk0 (flag 0x01): `MetadataSize = 0x14` (20 bytes = 10 × u16 LE) +- Chunk1 (flag 0x02): `MetadataSize = 0x2C` (44 bytes = 22 × u16 LE) -The internal structure of the metadata is not yet fully documented. It is preserved verbatim in the JSON format as a base64 blob so that clients receive correct values even for unknown fields. +**Chunk0 metadata (20 bytes decoded from 145,000+ real scenario files):** + +| u16 index | Field | Notes | +|-----------|-------|-------| +| m[0] | CategoryID | Matches the first field of the filename (0=basic, 1=GR, 3=exchange, 6=pallone, 7=diva) | +| m[1] | MainID | Matches the `S` field of the filename | +| m[2–4] | 0x0000 | Reserved / always zero | +| m[5] | str0_len | Byte length of string 0 in Shift-JIS including the null terminator; equals the byte offset from the start of the strings section to string 1 | +| m[6] | SceneRef | `MainID` when CategoryID=0; `0xFFFF` when CategoryID≠0 — purpose unclear (possibly a chain or group reference) | +| m[7] | 0x0000 | Reserved | +| m[8] | 0x0005 | Constant; purpose unknown | +| m[9] | varies | Purpose not yet confirmed; correlates loosely with total chunk size | + +**Chunk1 metadata (44 bytes decoded from multi-dialog scenario files):** + +The 22 u16 fields in chunk1 metadata store a richer set of string offsets. When the chunk contains N dialog strings in the strings section, the cumulative byte offsets are stored non-sequentially: + +| u16 index | Field | Notes | +|-----------|-------|-------| +| m[0–1] | IDs | Carry contextual IDs; m[0] is typically 0, m[1] varies | +| m[2–8] | 0 or varies | Partially unknown | +| m[9] | cumOff[2] | Byte offset to string 2 from strings section start (= str0_len + str1_len) | +| m[10] | cumOff[1] | Byte offset to string 1 = str0_len | +| m[11–13] | Dialog positions | Offsets into the full C1 data section (strings + dialog script bytes beyond the 0xFF sentinel) | +| m[14] | cumOff[3] | Byte offset to string 3 | +| m[15] | cumOff[4] | Total string bytes without the 0xFF sentinel | +| m[16–17] | Dialog positions | Further offsets into the C1 data section | +| m[18–19] | flags/type | Purpose unknown | +| m[20] | 0x0005 | Constant (same as chunk0 m[8]) | +| m[21] | DataSize − 4 | Approximately equal to `chunk1_size − 8 − MetadataSize + 4`; off-by-four not yet explained | + +The metadata is preserved verbatim in JSON as a base64 blob so that clients receive correct values for all fields including those not yet fully understood. **Format detection for chunk0:** if `chunk_data[1] == 0x00` → sub-header, else → inline. diff --git a/server/channelserver/scenario_json_test.go b/server/channelserver/scenario_json_test.go index 03338913c..134a10045 100644 --- a/server/channelserver/scenario_json_test.go +++ b/server/channelserver/scenario_json_test.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/binary" "encoding/json" + "os" "testing" ) @@ -370,3 +371,102 @@ func TestScenarioRoundTrip_MetadataPreserved(t *testing.T) { sh.Unknown1, sh2.Unknown1, sh.Unknown2, sh2.Unknown2) } } + +// ── real-file round-trip tests ──────────────────────────────────────────────── + +// scenarioBinPath is the relative path from the package to the scenario files. +// These tests are skipped if the directory does not exist (CI without game data). +const scenarioBinPath = "../../bin/scenarios" + +func TestScenarioRoundTrip_RealFiles(t *testing.T) { + samples := []struct { + name string + wantC0 bool // expect chunk0 subheader + wantC1 bool // expect chunk1 (subheader or JKR) + }{ + // cat=0 basic quest scenarios (chunk0 subheader, no chunk1) + {"0_0_0_0_S0_T101_C0", true, false}, + {"0_0_0_0_S1_T101_C0", true, false}, + {"0_0_0_0_S5_T101_C0", true, false}, + // cat=1 GR scenarios (chunk0 subheader, T101 has no chunk1) + {"1_0_0_0_S0_T101_C0", true, false}, + {"1_0_0_0_S1_T101_C0", true, false}, + // cat=3 item exchange (chunk0 subheader, chunk1 subheader with extra data) + {"3_0_0_0_S0_T103_C0", true, true}, + // multi-chapter file with chunk1 subheader + {"0_0_0_0_S0_T103_C0", true, true}, + } + + for _, tc := range samples { + tc := tc + t.Run(tc.name, func(t *testing.T) { + path := scenarioBinPath + "/" + tc.name + ".bin" + original, err := os.ReadFile(path) + if err != nil { + t.Skipf("scenario file not found (game data not present): %v", err) + } + + // Parse binary → JSON schema + parsed, err := ParseScenarioBinary(original) + if err != nil { + t.Fatalf("ParseScenarioBinary: %v", err) + } + + // Verify expected chunk presence + if tc.wantC0 && (parsed.Chunk0 == nil || parsed.Chunk0.Subheader == nil) { + t.Error("expected chunk0 subheader") + } + if tc.wantC1 && parsed.Chunk1 == nil { + t.Error("expected chunk1") + } + + // Marshal to JSON + jsonData, err := json.Marshal(parsed) + if err != nil { + t.Fatalf("json.Marshal: %v", err) + } + + // Compile JSON → binary + compiled, err := CompileScenarioJSON(jsonData) + if err != nil { + t.Fatalf("CompileScenarioJSON: %v", err) + } + + // Re-parse compiled output + result, err := ParseScenarioBinary(compiled) + if err != nil { + t.Fatalf("ParseScenarioBinary on compiled output: %v", err) + } + + // Verify strings survive round-trip unchanged + origStrings := extractStringsFromScenario(t, original) + gotStrings := extractStringsFromScenario(t, compiled) + if len(gotStrings) != len(origStrings) { + t.Fatalf("string count changed: %d → %d", len(origStrings), len(gotStrings)) + } + for i := range origStrings { + if gotStrings[i] != origStrings[i] { + t.Errorf("[%d]: %q → %q", i, origStrings[i], gotStrings[i]) + } + } + + // Verify metadata is preserved byte-for-byte + if parsed.Chunk0 != nil && parsed.Chunk0.Subheader != nil { + if result.Chunk0 == nil || result.Chunk0.Subheader == nil { + t.Fatal("chunk0 subheader lost in round-trip") + } + if result.Chunk0.Subheader.Metadata != parsed.Chunk0.Subheader.Metadata { + t.Errorf("chunk0 metadata changed after round-trip") + } + } + if parsed.Chunk1 != nil && parsed.Chunk1.Subheader != nil { + if result.Chunk1 == nil || result.Chunk1.Subheader == nil { + t.Fatal("chunk1 subheader lost in round-trip") + } + if result.Chunk1.Subheader.Metadata != parsed.Chunk1.Subheader.Metadata { + t.Errorf("chunk1 metadata changed after round-trip") + } + } + }) + } +}