From a68df4c15a3e6fd8108fdcd4352a0e1806cc2365 Mon Sep 17 00:00:00 2001 From: Rainchus Date: Thu, 3 Apr 2025 00:45:53 -0500 Subject: [PATCH 1/6] add shiftjis as possible data type for symbols --- Cargo.lock | 1 + Cargo.toml | 1 + src/obj/symbols.rs | 2 ++ src/util/asm.rs | 51 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/config.rs | 4 ++++ 5 files changed, 59 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index a24f72b..a182304 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -363,6 +363,7 @@ dependencies = [ "cwextab", "dyn-clone", "enable-ansi-support", + "encoding_rs", "filetime", "fixedbitset 0.5.7", "flagset", diff --git a/Cargo.toml b/Cargo.toml index 848e1d7..3906823 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ strip = "debuginfo" codegen-units = 1 [dependencies] +encoding_rs = "0.8" aes = "0.8" anyhow = { version = "1.0", features = ["backtrace"] } ar = { git = "https://github.com/bjorn3/rust-ar.git", branch = "write_symbol_table" } diff --git a/src/obj/symbols.rs b/src/obj/symbols.rs index 46b2a4e..d995ae5 100644 --- a/src/obj/symbols.rs +++ b/src/obj/symbols.rs @@ -175,8 +175,10 @@ pub enum ObjDataKind { Float, Double, String, + Shiftjis, String16, StringTable, + ShiftjisTable, String16Table, Int, Short, diff --git a/src/util/asm.rs b/src/util/asm.rs index d54951b..f6b1307 100644 --- a/src/util/asm.rs +++ b/src/util/asm.rs @@ -668,6 +668,43 @@ where W: Write + ?Sized { Ok(()) } +use encoding_rs::SHIFT_JIS; + +fn write_string_shiftjis(w: &mut W, data: &[u8]) -> Result<()> +where + W: std::io::Write + ?Sized, +{ + if data.last() != Some(&0x00) { + anyhow::bail!("Non-terminated Shift-JIS string"); + } + + // Decode the Shift-JIS bytes (without the null terminator) into a UTF-8 string. + let (cow, _, had_errors) = SHIFT_JIS.decode(&data[..data.len() - 1]); + if had_errors { + anyhow::bail!("Invalid Shift-JIS data"); + } + let s = cow; + + write!(w, "\t.string \"")?; + + // For each character, apply escaping for control characters and quotes as needed. + for c in s.chars() { + match c { + '\x08' => write!(w, "\\b")?, + '\x09' => write!(w, "\\t")?, + '\x0A' => write!(w, "\\n")?, + '\x0C' => write!(w, "\\f")?, + '\x0D' => write!(w, "\\r")?, + '\\' => write!(w, "\\\\")?, + '"' => write!(w, "\\\"")?, + _ => write!(w, "{}", c)?, + } + } + + writeln!(w, "\"")?; + Ok(()) +} + fn write_string16(w: &mut W, data: &[u16]) -> Result<()> where W: Write + ?Sized { if matches!(data.last(), Some(&b) if b == 0) { @@ -705,6 +742,12 @@ where W: Write + ?Sized { ObjDataKind::String => { return write_string(w, data); } + ObjDataKind::Shiftjis => { + if data.is_empty() || data.last() != Some(&0x00) { + anyhow::bail!("Non-terminated Shift-JIS string"); + } + return write_string_shiftjis(w, data); + } ObjDataKind::String16 => { if data.len() % 2 != 0 { bail!("Attempted to write wstring with length {:#X}", data.len()); @@ -734,6 +777,12 @@ where W: Write + ?Sized { } return Ok(()); } + ObjDataKind::ShiftjisTable => { + for slice in data.split_inclusive(|&b| b == 0) { + write_string_shiftjis(w, slice)?; + } + return Ok(()); + } _ => {} } let chunk_size = match data_kind { @@ -742,7 +791,9 @@ where W: Write + ?Sized { ObjDataKind::Byte | ObjDataKind::Byte8 | ObjDataKind::Double => 8, ObjDataKind::String | ObjDataKind::String16 + | ObjDataKind::Shiftjis | ObjDataKind::StringTable + | ObjDataKind::ShiftjisTable | ObjDataKind::String16Table => unreachable!(), }; for chunk in remain.chunks(chunk_size) { diff --git a/src/util/config.rs b/src/util/config.rs index 2ee8d1a..ae4a611 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -329,8 +329,10 @@ fn symbol_data_kind_to_str(kind: ObjDataKind) -> Option<&'static str> { ObjDataKind::Float => Some("float"), ObjDataKind::Double => Some("double"), ObjDataKind::String => Some("string"), + ObjDataKind::Shiftjis => Some("shiftjis"), ObjDataKind::String16 => Some("wstring"), ObjDataKind::StringTable => Some("string_table"), + ObjDataKind::ShiftjisTable => Some("shiftjis_table"), ObjDataKind::String16Table => Some("wstring_table"), ObjDataKind::Int => Some("int"), ObjDataKind::Short => Some("short"), @@ -382,8 +384,10 @@ fn symbol_data_kind_from_str(s: &str) -> Option { "float" => Some(ObjDataKind::Float), "double" => Some(ObjDataKind::Double), "string" => Some(ObjDataKind::String), + "shiftjis" => Some(ObjDataKind::Shiftjis), "wstring" => Some(ObjDataKind::String16), "string_table" => Some(ObjDataKind::StringTable), + "shiftjis_table" => Some(ObjDataKind::ShiftjisTable), "wstring_table" => Some(ObjDataKind::String16Table), "int" => Some(ObjDataKind::Int), "short" => Some(ObjDataKind::Short), From 00df09409beb9fea64253174d9c1fc2ed5251a01 Mon Sep 17 00:00:00 2001 From: Rainchus Date: Thu, 3 Apr 2025 00:59:32 -0500 Subject: [PATCH 2/6] usage of anyhow:bail! -> bail! --- src/util/asm.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/asm.rs b/src/util/asm.rs index f6b1307..92af8fb 100644 --- a/src/util/asm.rs +++ b/src/util/asm.rs @@ -675,13 +675,13 @@ where W: std::io::Write + ?Sized, { if data.last() != Some(&0x00) { - anyhow::bail!("Non-terminated Shift-JIS string"); + bail!("Non-terminated Shift-JIS string"); } // Decode the Shift-JIS bytes (without the null terminator) into a UTF-8 string. let (cow, _, had_errors) = SHIFT_JIS.decode(&data[..data.len() - 1]); if had_errors { - anyhow::bail!("Invalid Shift-JIS data"); + bail!("Invalid Shift-JIS data"); } let s = cow; From 041c745bee6e09f1c2adf195ff7e96d7b58bdc75 Mon Sep 17 00:00:00 2001 From: Rainchus Date: Thu, 17 Apr 2025 00:26:34 -0500 Subject: [PATCH 3/6] revise output of sjis strings --- src/util/asm.rs | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/util/asm.rs b/src/util/asm.rs index 92af8fb..8eaea45 100644 --- a/src/util/asm.rs +++ b/src/util/asm.rs @@ -672,36 +672,37 @@ use encoding_rs::SHIFT_JIS; fn write_string_shiftjis(w: &mut W, data: &[u8]) -> Result<()> where - W: std::io::Write + ?Sized, + W: Write + ?Sized, { if data.last() != Some(&0x00) { bail!("Non-terminated Shift-JIS string"); } - // Decode the Shift-JIS bytes (without the null terminator) into a UTF-8 string. - let (cow, _, had_errors) = SHIFT_JIS.decode(&data[..data.len() - 1]); + let raw_data = &data[..data.len() - 1]; + + // Decode then write SJIS as comment above byte array + let (cow, _, had_errors) = SHIFT_JIS.decode(raw_data); if had_errors { bail!("Invalid Shift-JIS data"); } - let s = cow; - - write!(w, "\t.string \"")?; - - // For each character, apply escaping for control characters and quotes as needed. - for c in s.chars() { + + write!(w, "\t# ")?; + for c in cow.chars() { match c { - '\x08' => write!(w, "\\b")?, - '\x09' => write!(w, "\\t")?, - '\x0A' => write!(w, "\\n")?, - '\x0C' => write!(w, "\\f")?, - '\x0D' => write!(w, "\\r")?, - '\\' => write!(w, "\\\\")?, - '"' => write!(w, "\\\"")?, - _ => write!(w, "{}", c)?, + '#' => write!(w, "\\#")?, + _ => write!(w, "{}", c)?, } } - writeln!(w, "\"")?; + write!(w, "\n\t.byte ")?; + for (i, &b) in data.iter().enumerate() { + write!(w, "0x{:02X}", b)?; + if i + 1 != data.len() { + write!(w, ", ")?; + } + } + + writeln!(w)?; Ok(()) } From e38b32c9ce3aefa7a96bf36ce2f4c3ad5b5c7bb6 Mon Sep 17 00:00:00 2001 From: Rainchus Date: Thu, 17 Apr 2025 00:32:27 -0500 Subject: [PATCH 4/6] rename shiftjis internally, symbols now uses sjis instead of shiftjis --- src/obj/symbols.rs | 4 ++-- src/util/asm.rs | 8 ++++---- src/util/config.rs | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/obj/symbols.rs b/src/obj/symbols.rs index d995ae5..56659b3 100644 --- a/src/obj/symbols.rs +++ b/src/obj/symbols.rs @@ -175,10 +175,10 @@ pub enum ObjDataKind { Float, Double, String, - Shiftjis, + ShiftJIS, String16, StringTable, - ShiftjisTable, + ShiftJISTable, String16Table, Int, Short, diff --git a/src/util/asm.rs b/src/util/asm.rs index 8eaea45..ea484b0 100644 --- a/src/util/asm.rs +++ b/src/util/asm.rs @@ -743,7 +743,7 @@ where W: Write + ?Sized { ObjDataKind::String => { return write_string(w, data); } - ObjDataKind::Shiftjis => { + ObjDataKind::ShiftJIS => { if data.is_empty() || data.last() != Some(&0x00) { anyhow::bail!("Non-terminated Shift-JIS string"); } @@ -778,7 +778,7 @@ where W: Write + ?Sized { } return Ok(()); } - ObjDataKind::ShiftjisTable => { + ObjDataKind::ShiftJISTable => { for slice in data.split_inclusive(|&b| b == 0) { write_string_shiftjis(w, slice)?; } @@ -792,9 +792,9 @@ where W: Write + ?Sized { ObjDataKind::Byte | ObjDataKind::Byte8 | ObjDataKind::Double => 8, ObjDataKind::String | ObjDataKind::String16 - | ObjDataKind::Shiftjis + | ObjDataKind::ShiftJIS | ObjDataKind::StringTable - | ObjDataKind::ShiftjisTable + | ObjDataKind::ShiftJISTable | ObjDataKind::String16Table => unreachable!(), }; for chunk in remain.chunks(chunk_size) { diff --git a/src/util/config.rs b/src/util/config.rs index ae4a611..743a8ee 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -329,10 +329,10 @@ fn symbol_data_kind_to_str(kind: ObjDataKind) -> Option<&'static str> { ObjDataKind::Float => Some("float"), ObjDataKind::Double => Some("double"), ObjDataKind::String => Some("string"), - ObjDataKind::Shiftjis => Some("shiftjis"), + ObjDataKind::ShiftJIS => Some("sjis"), ObjDataKind::String16 => Some("wstring"), ObjDataKind::StringTable => Some("string_table"), - ObjDataKind::ShiftjisTable => Some("shiftjis_table"), + ObjDataKind::ShiftJISTable => Some("sjis_table"), ObjDataKind::String16Table => Some("wstring_table"), ObjDataKind::Int => Some("int"), ObjDataKind::Short => Some("short"), @@ -384,10 +384,10 @@ fn symbol_data_kind_from_str(s: &str) -> Option { "float" => Some(ObjDataKind::Float), "double" => Some(ObjDataKind::Double), "string" => Some(ObjDataKind::String), - "shiftjis" => Some(ObjDataKind::Shiftjis), + "sjis" => Some(ObjDataKind::ShiftJIS), "wstring" => Some(ObjDataKind::String16), "string_table" => Some(ObjDataKind::StringTable), - "shiftjis_table" => Some(ObjDataKind::ShiftjisTable), + "sjis_table" => Some(ObjDataKind::ShiftJISTable), "wstring_table" => Some(ObjDataKind::String16Table), "int" => Some(ObjDataKind::Int), "short" => Some(ObjDataKind::Short), From 98e3f18a3cc6cc676c5575b52cff67567fecdf58 Mon Sep 17 00:00:00 2001 From: Rainchus Date: Thu, 17 Apr 2025 00:36:44 -0500 Subject: [PATCH 5/6] remove sjis decoding error check as the output is a comment --- src/util/asm.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/util/asm.rs b/src/util/asm.rs index ea484b0..9c21679 100644 --- a/src/util/asm.rs +++ b/src/util/asm.rs @@ -681,11 +681,7 @@ where let raw_data = &data[..data.len() - 1]; // Decode then write SJIS as comment above byte array - let (cow, _, had_errors) = SHIFT_JIS.decode(raw_data); - if had_errors { - bail!("Invalid Shift-JIS data"); - } - + let (cow, _, _) = SHIFT_JIS.decode(raw_data); write!(w, "\t# ")?; for c in cow.chars() { match c { From 0da2e3b07395b9df80e2e1acf9e4836de4181cb7 Mon Sep 17 00:00:00 2001 From: Rainchus Date: Thu, 17 Apr 2025 00:55:09 -0500 Subject: [PATCH 6/6] run cargo fmt --- src/util/asm.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/util/asm.rs b/src/util/asm.rs index 9c21679..3fe73c5 100644 --- a/src/util/asm.rs +++ b/src/util/asm.rs @@ -671,9 +671,7 @@ where W: Write + ?Sized { use encoding_rs::SHIFT_JIS; fn write_string_shiftjis(w: &mut W, data: &[u8]) -> Result<()> -where - W: Write + ?Sized, -{ +where W: Write + ?Sized { if data.last() != Some(&0x00) { bail!("Non-terminated Shift-JIS string"); } @@ -686,7 +684,7 @@ where for c in cow.chars() { match c { '#' => write!(w, "\\#")?, - _ => write!(w, "{}", c)?, + _ => write!(w, "{}", c)?, } }