From ca6c7d88bc135d269d0d0a6673ac3678e56ffb10 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 10 Feb 2026 18:33:28 +0100 Subject: [PATCH] Remove allocator_api --- Cargo.toml | 2 +- crates/edit/benches/lib.rs | 25 +- crates/edit/src/base64.rs | 16 +- crates/edit/src/bin/edit/draw_filepicker.rs | 8 +- crates/edit/src/bin/edit/draw_statusbar.rs | 5 +- crates/edit/src/bin/edit/main.rs | 59 +- crates/edit/src/buffer/mod.rs | 74 ++- crates/edit/src/document.rs | 11 +- crates/edit/src/framebuffer.rs | 48 +- crates/edit/src/fuzzy.rs | 27 +- crates/edit/src/icu.rs | 33 +- crates/edit/src/json.rs | 26 +- crates/edit/src/lib.rs | 1 - crates/edit/src/oklab.rs | 4 - crates/edit/src/simd/mod.rs | 2 - crates/edit/src/sys/unix.rs | 60 +- crates/edit/src/sys/windows.rs | 51 +- crates/edit/src/tui.rs | 213 ++++--- crates/stdext/src/alloc.rs | 55 ++ crates/stdext/src/arena/debug.rs | 49 +- crates/stdext/src/arena/fs.rs | 17 +- crates/stdext/src/arena/mod.rs | 20 +- crates/stdext/src/arena/release.rs | 117 +--- crates/stdext/src/arena/scratch.rs | 6 +- crates/stdext/src/arena/string.rs | 315 ---------- crates/stdext/src/collections/mod.rs | 5 + crates/stdext/src/collections/string.rs | 359 ++++++++++++ crates/stdext/src/collections/vec.rs | 616 ++++++++++++++++++++ crates/stdext/src/helpers.rs | 5 +- crates/stdext/src/lib.rs | 7 +- crates/{edit => stdext}/src/simd/memset.rs | 30 +- crates/stdext/src/simd/mod.rs | 3 + 32 files changed, 1465 insertions(+), 804 deletions(-) create mode 100644 crates/stdext/src/alloc.rs delete mode 100644 crates/stdext/src/arena/string.rs create mode 100644 crates/stdext/src/collections/mod.rs create mode 100644 crates/stdext/src/collections/string.rs create mode 100644 crates/stdext/src/collections/vec.rs rename crates/{edit => stdext}/src/simd/memset.rs (95%) create mode 100644 crates/stdext/src/simd/mod.rs diff --git a/Cargo.toml b/Cargo.toml index fb53ab464f5f..1cab5da276be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ resolver = "2" edition = "2024" license = "MIT" repository = "https://github.com/microsoft/edit" -rust-version = "1.88" +rust-version = "1.93" # We use `opt-level = "s"` as it significantly reduces binary size. # We could then use the `#[optimize(speed)]` attribute for spot optimizations. diff --git a/crates/edit/benches/lib.rs b/crates/edit/benches/lib.rs index 56b92c067ee2..18d817f822a2 100644 --- a/crates/edit/benches/lib.rs +++ b/crates/edit/benches/lib.rs @@ -1,28 +1,26 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -#![feature(allocator_api)] - use std::hint::black_box; use std::io::Cursor; use std::{mem, vec}; use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; use edit::helpers::*; -use edit::simd::MemsetSafe; use edit::{buffer, glob, hash, json, oklab, simd, unicode}; -use stdext::arena::{self, Arena, scratch_arena}; +use stdext::arena::{self, scratch_arena}; +use stdext::collections::BVec; struct EditingTracePatch<'a>(usize, usize, &'a str); struct EditingTraceTransaction<'a> { - patches: Vec, &'a Arena>, + patches: BVec<'a, EditingTracePatch<'a>>, } struct EditingTraceData<'a> { start_content: &'a str, end_content: &'a str, - txns: Vec, &'a Arena>, + txns: BVec<'a, EditingTraceTransaction<'a>>, } fn bench_buffer(c: &mut Criterion) { @@ -39,24 +37,25 @@ fn bench_buffer(c: &mut Criterion) { let mut res = EditingTraceData { start_content: root.get_str("startContent").unwrap(), end_content: root.get_str("endContent").unwrap(), - txns: Vec::with_capacity_in(txns.len(), &scratch), + txns: BVec::empty(), }; + res.txns.reserve(&*scratch, txns.len()); for txn in txns { let txn = txn.as_object().unwrap(); let patches = txn.get_array("patches").unwrap(); - let mut txn = - EditingTraceTransaction { patches: Vec::with_capacity_in(patches.len(), &scratch) }; + let mut txn = EditingTraceTransaction { patches: BVec::empty() }; + txn.patches.reserve(&*scratch, patches.len()); for patch in patches { let patch = patch.as_array().unwrap(); let offset = patch[0].as_number().unwrap() as usize; let del_len = patch[1].as_number().unwrap() as usize; let ins_str = patch[2].as_str().unwrap(); - txn.patches.push(EditingTracePatch(offset, del_len, ins_str)); + txn.patches.push(&*scratch, EditingTracePatch(offset, del_len, ins_str)); } - res.txns.push(txn); + res.txns.push(&*scratch, txn); } res @@ -226,7 +225,7 @@ fn bench_simd_memchr2(c: &mut Criterion) { } } -fn bench_simd_memset(c: &mut Criterion) { +fn bench_simd_memset(c: &mut Criterion) { let mut group = c.benchmark_group("simd"); let name = format!("memset<{}>", std::any::type_name::()); let size = mem::size_of::(); @@ -241,7 +240,7 @@ fn bench_simd_memset(c: &mut Criterion) { &bytes, |b, &bytes| { let slice = unsafe { buf.get_unchecked_mut(..bytes / size) }; - b.iter(|| simd::memset(black_box(slice), Default::default())); + b.iter(|| stdext::simd::memset(black_box(slice), Default::default())); }, ); } diff --git a/crates/edit/src/base64.rs b/crates/edit/src/base64.rs index 944e2c8cc675..4df68f46e0a5 100644 --- a/crates/edit/src/base64.rs +++ b/crates/edit/src/base64.rs @@ -3,7 +3,8 @@ //! Base64 facilities. -use stdext::arena::ArenaString; +use stdext::arena::Arena; +use stdext::collections::BString; const CHARSET: [u8; 64] = *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -15,7 +16,7 @@ pub fn encode_len(src_len: usize) -> usize { } /// Encodes the given bytes as base64 and appends them to the destination string. -pub fn encode(dst: &mut ArenaString, src: &[u8]) { +pub fn encode<'a>(arena: &'a Arena, dst: &mut BString<'a>, src: &[u8]) { unsafe { let mut inp = src.as_ptr(); let mut remaining = src.len(); @@ -23,7 +24,7 @@ pub fn encode(dst: &mut ArenaString, src: &[u8]) { let out_len = encode_len(src.len()); // ... we can then use this fact to reserve space all at once. - dst.reserve(out_len); + dst.reserve(arena, out_len); // SAFETY: Getting a pointer to the reserved space is only safe // *after* calling `reserve()` as it may change the pointer. @@ -79,16 +80,17 @@ pub fn encode(dst: &mut ArenaString, src: &[u8]) { #[cfg(test)] mod tests { - use stdext::arena::{Arena, ArenaString}; + use stdext::arena::scratch_arena; + use stdext::collections::BString; use super::encode; #[test] fn test_basic() { - let arena = Arena::new(4 * 1024).unwrap(); + let scratch = scratch_arena(None); let enc = |s: &[u8]| { - let mut dst = ArenaString::new_in(&arena); - encode(&mut dst, s); + let mut dst = BString::empty(); + encode(&scratch, &mut dst, s); dst }; assert_eq!(enc(b""), ""); diff --git a/crates/edit/src/bin/edit/draw_filepicker.rs b/crates/edit/src/bin/edit/draw_filepicker.rs index d6e9d3b73f30..038e34754e6a 100644 --- a/crates/edit/src/bin/edit/draw_filepicker.rs +++ b/crates/edit/src/bin/edit/draw_filepicker.rs @@ -11,6 +11,7 @@ use edit::input::{kbmod, vk}; use edit::tui::*; use edit::{icu, path}; use stdext::arena::scratch_arena; +use stdext::collections::BVec; use crate::localization::*; use crate::state::*; @@ -376,9 +377,10 @@ fn update_autocomplete_suggestions(state: &mut State) { // The problem is finding the upper bound. Here I'm using a trick: // By appending U+10FFFF (the highest possible Unicode code point) // we create a needle that naturally yields an upper bound. - let mut needle_upper_bound = Vec::with_capacity_in(needle.len() + 4, &*scratch); - needle_upper_bound.extend_from_slice(needle); - needle_upper_bound.extend_from_slice(b"\xf4\x8f\xbf\xbf"); + let mut needle_upper_bound = BVec::empty(); + needle_upper_bound.reserve(&*scratch, needle.len() + 4); + needle_upper_bound.extend_from_slice(&*scratch, needle); + needle_upper_bound.extend_from_slice(&*scratch, b"\xf4\x8f\xbf\xbf"); if let Some(dirs_files) = &state.file_picker_entries { 'outer: for entries in &dirs_files[1..] { diff --git a/crates/edit/src/bin/edit/draw_statusbar.rs b/crates/edit/src/bin/edit/draw_statusbar.rs index 10e7ddbcfe17..44a688f834c5 100644 --- a/crates/edit/src/bin/edit/draw_statusbar.rs +++ b/crates/edit/src/bin/edit/draw_statusbar.rs @@ -9,6 +9,7 @@ use edit::input::vk; use edit::tui::*; use stdext::arena::scratch_arena; use stdext::arena_format; +use stdext::collections::BVec; use crate::localization::*; use crate::state::*; @@ -291,14 +292,14 @@ fn encoding_picker_update_list(state: &mut State) { let encodings = icu::get_available_encodings(); let scratch = scratch_arena(None); - let mut matches = Vec::new_in(&*scratch); + let mut matches = BVec::empty(); for enc in encodings.all { let local_scratch = scratch_arena(Some(&scratch)); let (score, _) = score_fuzzy(&local_scratch, enc.label, needle, true); if score > 0 { - matches.push((score, *enc)); + matches.push(&*scratch, (score, *enc)); } } diff --git a/crates/edit/src/bin/edit/main.rs b/crates/edit/src/bin/edit/main.rs index a05756a009cd..1561297ad6d2 100644 --- a/crates/edit/src/bin/edit/main.rs +++ b/crates/edit/src/bin/edit/main.rs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -#![feature(allocator_api, linked_list_cursors, string_from_utf8_lossy_owned)] +#![feature(linked_list_cursors, string_from_utf8_lossy_owned)] mod apperr; mod documents; @@ -13,7 +13,7 @@ mod localization; mod state; use std::borrow::Cow; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::time::Duration; use std::{env, process}; @@ -30,8 +30,9 @@ use edit::vt::{self, Token}; use edit::{base64, path, sys, unicode}; use localization::*; use state::*; -use stdext::arena::{self, Arena, ArenaString, scratch_arena}; +use stdext::arena::{self, Arena, scratch_arena}; use stdext::arena_format; +use stdext::collections::{BString, BVec}; #[cfg(target_pointer_width = "32")] const SCRATCH_ARENA_CAPACITY: usize = 128 * MEBI; @@ -172,15 +173,15 @@ fn run() -> apperr::Result<()> { let scratch = scratch_arena(None); let mut output = tui.render(&scratch); - write_terminal_title(&mut output, &mut state); + write_terminal_title(&scratch, &mut output, &mut state); if state.osc_clipboard_sync { - write_osc_clipboard(&mut tui, &mut state, &mut output); + write_osc_clipboard(&scratch, &mut output, &mut tui, &mut state); } #[cfg(feature = "debug-latency")] { - use std::fmt::Write as _; + use stdext::arena_write_fmt; // Print the number of passes and latency in the top right corner. let time_end = std::time::Instant::now(); @@ -188,7 +189,7 @@ fn run() -> apperr::Result<()> { let scratch_alt = scratch_arena(Some(&scratch)); let status = arena_format!( - &scratch_alt, + &*scratch_alt, "{}P {}B {:.3}μs", passes, output.len(), @@ -204,10 +205,11 @@ fn run() -> apperr::Result<()> { // If the `output` is already very large, // Rust may double the size during the write below. // Let's avoid that by reserving the needed size in advance. - output.reserve_exact(128); + output.reserve_exact(&*scratch, 128); // To avoid moving the cursor, push and pop it onto the VT cursor stack. - _ = write!( + arena_write_fmt!( + &*scratch, output, "\x1b7\x1b[0;41;97m\x1b[1;{0}H{1:2$}{3}\x1b8", tui.size().width - cols - padding + 1, @@ -229,7 +231,7 @@ fn run() -> apperr::Result<()> { // Returns true if the application should exit early. fn handle_args(state: &mut State) -> apperr::Result { let scratch = scratch_arena(None); - let mut paths: Vec = Vec::new_in(&*scratch); + let mut paths = BVec::empty(); let cwd = env::current_dir()?; let mut dir = None; let mut parse_args = true; @@ -261,7 +263,7 @@ fn handle_args(state: &mut State) -> apperr::Result { state.wants_file_picker = StateFilePicker::Open; dir = Some(p); } else { - paths.push(p); + paths.push(&*scratch, p); } } @@ -394,7 +396,7 @@ fn draw_handle_wants_exit(_ctx: &mut Context, state: &mut State) { } } -fn write_terminal_title(output: &mut ArenaString, state: &mut State) { +fn write_terminal_title<'a>(arena: &'a Arena, output: &mut BString<'a>, state: &mut State) { let (filename, dirty) = state .documents .active() @@ -406,15 +408,15 @@ fn write_terminal_title(output: &mut ArenaString, state: &mut State) { return; } - output.push_str("\x1b]0;"); + output.push_str(arena, "\x1b]0;"); if !filename.is_empty() { if dirty { - output.push_str("● "); + output.push_str(arena, "● "); } - output.push_str(&sanitize_control_chars(filename)); - output.push_str(" - "); + output.push_str(arena, &sanitize_control_chars(filename)); + output.push_str(arena, " - "); } - output.push_str("edit\x1b\\"); + output.push_str(arena, "edit\x1b\\"); state.osc_title_file_status.filename = filename.to_string(); state.osc_title_file_status.dirty = dirty; @@ -449,10 +451,10 @@ fn draw_handle_clipboard_change(ctx: &mut Context, state: &mut State) { let template = loc(LocId::LargeClipboardWarningLine2); let size = arena_format!(ctx.arena(), "{}", MetricFormatter(data_len)); - let mut label = - ArenaString::with_capacity_in(template.len() + size.len(), ctx.arena()); - label.push_str(template); - label.replace_once_in_place("{size}", &size); + let mut label = BString::empty(); + label.reserve(ctx.arena(), template.len() + size.len()); + label.push_str(ctx.arena(), template); + label.replace_once_in_place(ctx.arena(), "{size}", &size); label }; @@ -514,7 +516,12 @@ fn draw_handle_clipboard_change(ctx: &mut Context, state: &mut State) { } #[cold] -fn write_osc_clipboard(tui: &mut Tui, state: &mut State, output: &mut ArenaString) { +fn write_osc_clipboard<'a>( + arena: &'a Arena, + output: &mut BString<'a>, + tui: &mut Tui, + state: &mut State, +) { let clipboard = tui.clipboard_mut(); let data = clipboard.read(); @@ -523,10 +530,10 @@ fn write_osc_clipboard(tui: &mut Tui, state: &mut State, output: &mut ArenaStrin // If `data` is *really* large, this may then double // the size of the `output` from e.g. 100MB to 200MB. Not good. // We can avoid that by reserving the needed size in advance. - output.reserve_exact(base64::encode_len(data.len()) + 16); - output.push_str("\x1b]52;c;"); - base64::encode(output, data); - output.push_str("\x1b\\"); + output.reserve_exact(arena, base64::encode_len(data.len()) + 16); + output.push_str(arena, "\x1b]52;c;"); + base64::encode(arena, output, data); + output.push_str(arena, "\x1b\\"); } state.osc_clipboard_sync = false; diff --git a/crates/edit/src/buffer/mod.rs b/crates/edit/src/buffer/mod.rs index 251fa401d141..de146640b444 100644 --- a/crates/edit/src/buffer/mod.rs +++ b/crates/edit/src/buffer/mod.rs @@ -26,7 +26,6 @@ mod navigation; use std::borrow::Cow; use std::cell::UnsafeCell; use std::collections::LinkedList; -use std::fmt::Write as _; use std::fs::File; use std::io::{self, Read as _, Write as _}; use std::mem::{self, MaybeUninit}; @@ -35,8 +34,9 @@ use std::rc::Rc; use std::str; pub use gap_buffer::GapBuffer; -use stdext::arena::{Arena, ArenaString, scratch_arena}; -use stdext::{ReplaceRange as _, minmax, slice_as_uninit_mut, slice_copy_safe}; +use stdext::arena::{Arena, scratch_arena}; +use stdext::collections::{BString, BVec}; +use stdext::{ReplaceRange as _, arena_write_fmt, minmax, slice_as_uninit_mut, slice_copy_safe}; use crate::cell::SemiRefCell; use crate::clipboard::Clipboard; @@ -164,7 +164,7 @@ pub struct SearchOptions { enum RegexReplacement<'a> { Group(i32), - Text(Vec), + Text(BVec<'a, u8>), } /// Caches the start and length of the active edit line for a single edit. @@ -1303,15 +1303,15 @@ impl TextBuffer { arena: &'a Arena, search: &mut ActiveSearch, replacement: &[u8], - ) -> Vec, &'a Arena> { - let mut res = Vec::new_in(arena); + ) -> BVec<'a, RegexReplacement<'a>> { + let mut res = BVec::empty(); if !search.options.use_regex { return res; } let group_count = search.regex.group_count(); - let mut text = Vec::new_in(arena); + let mut text = BVec::empty(); let mut text_beg = 0; loop { @@ -1319,7 +1319,7 @@ impl TextBuffer { // Push the raw, unescaped text, if any. if text_beg < off { - text.extend_from_slice(&replacement[text_beg..off]); + text.extend_from_slice(arena, &replacement[text_beg..off]); } // Unescape any escaped characters. @@ -1333,12 +1333,15 @@ impl TextBuffer { let ch = replacement.get(off - 1).map_or(b'\\', |&c| c); // Unescape and append the character. - text.push(match ch { - b'n' => b'\n', - b'r' => b'\r', - b't' => b'\t', - ch => ch, - }); + text.push( + arena, + match ch { + b'n' => b'\n', + b'r' => b'\r', + b't' => b'\t', + ch => ch, + }, + ); } // Parse out a group number, if any. @@ -1374,18 +1377,18 @@ impl TextBuffer { if !acc_bad { group = acc; } else { - text.extend_from_slice(&replacement[beg..end]); + text.extend_from_slice(arena, &replacement[beg..end]); } off = end; } if !text.is_empty() { - res.push(RegexReplacement::Text(text)); - text = Vec::new_in(arena); + res.push(arena, RegexReplacement::Text(text)); + text = BVec::empty(); } if group >= 0 { - res.push(RegexReplacement::Group(group)); + res.push(arena, RegexReplacement::Group(group)); } text_beg = off; @@ -1753,8 +1756,8 @@ impl TextBuffer { for y in 0..height { let scratch = scratch_arena(None); - let mut line = ArenaString::new_in(&scratch); - line.reserve(width as usize * 2); + let mut line = BString::empty(); + line.reserve(&*scratch, width as usize * 2); let visual_line = origin.y + y; let mut cursor_beg = @@ -1777,14 +1780,21 @@ impl TextBuffer { // because `line_number_width` can't possibly be larger than 19. let off = 19 - line_number_width; unsafe { std::hint::assert_unchecked(off < MARGIN_TEMPLATE.len()) }; - line.push_str(&MARGIN_TEMPLATE[off..]); + line.push_str(&*scratch, &MARGIN_TEMPLATE[off..]); } else if self.word_wrap_column <= 0 || cursor_beg.logical_pos.x == 0 { // Regular line? Place "123 | " in the margin. - _ = write!(line, "{:1$} │ ", cursor_beg.logical_pos.y + 1, line_number_width); + arena_write_fmt!( + &*scratch, + line, + "{:1$} │ ", + cursor_beg.logical_pos.y + 1, + line_number_width + ); } else { // Wrapped line? Place " ... | " in the margin. let number_width = (cursor_beg.logical_pos.y + 1).ilog10() as usize + 1; - _ = write!( + arena_write_fmt!( + &*scratch, line, "{0:1$}{0:∙<2$} │ ", "", @@ -1875,7 +1885,7 @@ impl TextBuffer { if cursor_next.visual_pos.x > origin.x { let overlap = cursor_next.visual_pos.x - origin.x; debug_assert!((1..=7).contains(&overlap)); - line.push_str(&TAB_WHITESPACE[..overlap as usize]); + line.push_str(&*scratch, &TAB_WHITESPACE[..overlap as usize]); cursor_beg = cursor_next; } } @@ -1938,7 +1948,7 @@ impl TextBuffer { ); } - line.push_str(&whitespace[..prefix_add + tab_size as usize]); + line.push_str(&*scratch, &whitespace[..prefix_add + tab_size as usize]); } else if ch <= '\x1f' || ('\u{7f}'..='\u{9f}').contains(&ch) { // Append a Unicode representation of the C0 or C1 control character. visualizer_buf[2] = if ch <= '\x1f' { @@ -1950,7 +1960,9 @@ impl TextBuffer { }; // Our manually constructed UTF8 is never going to be invalid. Trust. - line.push_str(unsafe { str::from_utf8_unchecked(&visualizer_buf) }); + line.push_str(&*scratch, unsafe { + str::from_utf8_unchecked(&visualizer_buf) + }); // Highlight the control character yellow. cursor_line = @@ -1967,7 +1979,7 @@ impl TextBuffer { fb.blend_bg(visualizer_rect, bg); fb.blend_fg(visualizer_rect, fg); } else { - line.push(ch); + line.push(&*scratch, ch); } } @@ -2123,7 +2135,7 @@ impl TextBuffer { let mut offset = 0; let scratch = scratch_arena(None); - let mut newline_buffer = ArenaString::new_in(&scratch); + let mut newline_buffer = BString::empty(); loop { // Can't use `unicode::newlines_forward` because bracketed paste uses CR instead of LF/CRLF. @@ -2170,7 +2182,7 @@ impl TextBuffer { // First, write the newline. newline_buffer.clear(); - newline_buffer.push_str(if self.newlines_are_crlf { "\r\n" } else { "\n" }); + newline_buffer.push_str(&*scratch, if self.newlines_are_crlf { "\r\n" } else { "\n" }); if !raw { // We'll give the next line the same indentation as the previous one. @@ -2203,13 +2215,13 @@ impl TextBuffer { // If tabs are enabled, add as many tabs as we can. if self.indent_with_tabs { let tab_count = newline_indentation / self.tab_size; - newline_buffer.push_repeat('\t', tab_count as usize); + newline_buffer.push_repeat(&*scratch, '\t', tab_count as usize); newline_indentation -= tab_count * self.tab_size; } // If tabs are disabled, or if the indentation wasn't a multiple of the tab size, // add spaces to make up the difference. - newline_buffer.push_repeat(' ', newline_indentation as usize); + newline_buffer.push_repeat(&*scratch, ' ', newline_indentation as usize); } self.edit_write(newline_buffer.as_bytes()); diff --git a/crates/edit/src/document.rs b/crates/edit/src/document.rs index 0059a959772d..493eb013ac16 100644 --- a/crates/edit/src/document.rs +++ b/crates/edit/src/document.rs @@ -9,7 +9,6 @@ use std::ops::Range; use std::path::PathBuf; use stdext::ReplaceRange as _; -use stdext::arena::{ArenaString, scratch_arena}; /// An abstraction over reading from text containers. pub trait ReadableDocument { @@ -76,15 +75,9 @@ impl ReadableDocument for String { impl WriteableDocument for String { fn replace(&mut self, range: Range, replacement: &[u8]) { // `replacement` is not guaranteed to be valid UTF-8, so we need to sanitize it. - let scratch = scratch_arena(None); - let utf8 = ArenaString::from_utf8_lossy(&scratch, replacement); - let src = match &utf8 { - Ok(s) => s, - Err(s) => s.as_str(), - }; - + let utf8 = String::from_utf8_lossy(replacement); // SAFETY: `range` is guaranteed to be on codepoint boundaries. - unsafe { self.as_mut_vec() }.replace_range(range, src.as_bytes()); + unsafe { self.as_mut_vec() }.replace_range(range, utf8.as_bytes()); } } diff --git a/crates/edit/src/framebuffer.rs b/crates/edit/src/framebuffer.rs index f2225e15eddb..464d11941e52 100644 --- a/crates/edit/src/framebuffer.rs +++ b/crates/edit/src/framebuffer.rs @@ -4,16 +4,17 @@ //! A shoddy framebuffer for terminal applications. use std::cell::Cell; -use std::fmt::Write; use std::ops::{BitOr, BitXor}; use std::ptr; use std::slice::ChunksExact; -use stdext::arena::{Arena, ArenaString}; +use stdext::arena::Arena; +use stdext::arena_write_fmt; +use stdext::collections::BString; +use stdext::simd::memset; use crate::helpers::{CoordType, Point, Rect, Size}; use crate::oklab::StraightRgba; -use crate::simd::{MemsetSafe, memset}; use crate::unicode::MeasurementConfig; // Same constants as used in the PCG family of RNGs. @@ -424,7 +425,7 @@ impl Framebuffer { /// Renders the framebuffer contents accumulated since the /// last call to `flip()` and returns them serialized as VT. - pub fn render<'a>(&mut self, arena: &'a Arena) -> ArenaString<'a> { + pub fn render<'a>(&mut self, arena: &'a Arena) -> BString<'a> { let idx = self.frame_counter & 1; // Borrows the front/back buffers without letting Rust know that we have a reference to self. // SAFETY: Well this is certainly correct, but whether Rust and its strict rules likes it is another question. @@ -445,7 +446,7 @@ impl Framebuffer { let mut back_fgs = back.fg_bitmap.iter(); let mut back_attrs = back.attributes.iter(); - let mut result = ArenaString::new_in(arena); + let mut result = BString::empty(); let mut last_bg = u64::MAX; let mut last_fg = u64::MAX; let mut last_attr = Attributes::None; @@ -478,9 +479,9 @@ impl Framebuffer { let mut chunk_end = 0; if result.is_empty() { - result.push_str("\x1b[m"); + result.push_str(arena, "\x1b[m"); } - _ = write!(result, "\x1b[{};1H", y + 1); + arena_write_fmt!(arena, result, "\x1b[{};1H", y + 1); while { let bg = back_bg[chunk_end]; @@ -498,28 +499,28 @@ impl Framebuffer { if last_bg != bg.to_ne() as u64 { last_bg = bg.to_ne() as u64; - self.format_color(&mut result, false, bg); + self.format_color(arena, &mut result, false, bg); } if last_fg != fg.to_ne() as u64 { last_fg = fg.to_ne() as u64; - self.format_color(&mut result, true, fg); + self.format_color(arena, &mut result, true, fg); } if last_attr != attr { let diff = last_attr ^ attr; if diff.is(Attributes::Italic) { if attr.is(Attributes::Italic) { - result.push_str("\x1b[3m"); + result.push_str(arena, "\x1b[3m"); } else { - result.push_str("\x1b[23m"); + result.push_str(arena, "\x1b[23m"); } } if diff.is(Attributes::Underlined) { if attr.is(Attributes::Underlined) { - result.push_str("\x1b[4m"); + result.push_str(arena, "\x1b[4m"); } else { - result.push_str("\x1b[24m"); + result.push_str(arena, "\x1b[24m"); } } last_attr = attr; @@ -527,7 +528,7 @@ impl Framebuffer { let beg = cfg.cursor().offset; let end = cfg.goto_visual(Point { x: chunk_end as CoordType, y: 0 }).offset; - result.push_str(&back_line[beg..end]); + result.push_str(arena, &back_line[beg..end]); chunk_end < back_bg.len() } {} @@ -541,7 +542,8 @@ impl Framebuffer { // CUP to the cursor position. // DECSCUSR to set the cursor style. // DECTCEM to show the cursor. - _ = write!( + arena_write_fmt!( + arena, result, "\x1b[{};{}H\x1b[{} q\x1b[?25h", back.cursor.pos.y + 1, @@ -550,14 +552,20 @@ impl Framebuffer { ); } else { // DECTCEM to hide the cursor. - result.push_str("\x1b[?25l"); + result.push_str(arena, "\x1b[?25l"); } } result } - fn format_color(&self, dst: &mut ArenaString, fg: bool, mut color: StraightRgba) { + fn format_color<'a>( + &self, + arena: &'a Arena, + dst: &mut BString<'a>, + fg: bool, + mut color: StraightRgba, + ) { let typ = if fg { '3' } else { '4' }; // Some terminals support transparent backgrounds which are used @@ -574,7 +582,7 @@ impl Framebuffer { // and "color that happens to be default foreground" separate. // (This also applies to the background color by the way.) if color.to_ne() == 0 { - _ = write!(dst, "\x1b[{typ}9m"); + arena_write_fmt!(arena, dst, "\x1b[{typ}9m"); return; } @@ -587,7 +595,7 @@ impl Framebuffer { let r = color.red(); let g = color.green(); let b = color.blue(); - _ = write!(dst, "\x1b[{typ}8;2;{r};{g};{b}m"); + arena_write_fmt!(arena, dst, "\x1b[{typ}8;2;{r};{g};{b}m"); } } @@ -839,8 +847,6 @@ impl Attributes { } } -unsafe impl MemsetSafe for Attributes {} - impl BitOr for Attributes { type Output = Self; diff --git a/crates/edit/src/fuzzy.rs b/crates/edit/src/fuzzy.rs index 2dc450f9ccb5..92fb945e7f65 100644 --- a/crates/edit/src/fuzzy.rs +++ b/crates/edit/src/fuzzy.rs @@ -5,9 +5,8 @@ //! Other algorithms exist, such as Sublime Text's, or the one used in `fzf`, //! but I figured that this one is what lots of people may be familiar with. -use std::vec; - use stdext::arena::{Arena, scratch_arena}; +use stdext::collections::BVec; use crate::icu; @@ -18,10 +17,10 @@ pub fn score_fuzzy<'a>( haystack: &str, needle: &str, allow_non_contiguous_matches: bool, -) -> (i32, Vec) { +) -> (i32, BVec<'a, usize>) { if haystack.is_empty() || needle.is_empty() { // return early if target or query are empty - return (NO_MATCH, Vec::new_in(arena)); + return (NO_MATCH, BVec::empty()); } let scratch = scratch_arena(Some(arena)); @@ -30,7 +29,7 @@ pub fn score_fuzzy<'a>( if target.len() < query.len() { // impossible for query to be contained in target - return (NO_MATCH, Vec::new_in(arena)); + return (NO_MATCH, BVec::empty()); } let target_lower = icu::fold_case(&scratch, haystack); @@ -39,8 +38,11 @@ pub fn score_fuzzy<'a>( let query_lower = map_chars(&scratch, &query_lower); let area = query.len() * target.len(); - let mut scores = vec::from_elem_in(0, area, &*scratch); - let mut matches = vec::from_elem_in(0, area, &*scratch); + let mut scores = BVec::empty(); + let mut matches = BVec::empty(); + + scores.extend(&*scratch, std::iter::repeat_n(0, area)); + matches.extend(&*scratch, std::iter::repeat_n(0, area)); // // Build Scorer Matrix: @@ -122,7 +124,7 @@ pub fn score_fuzzy<'a>( } // Restore Positions (starting from bottom right of matrix) - let mut positions = Vec::new_in(arena); + let mut positions = BVec::empty(); if !query.is_empty() && !target.is_empty() { let mut query_index = query.len() - 1; @@ -136,7 +138,7 @@ pub fn score_fuzzy<'a>( } target_index -= 1; // go left } else { - positions.push(target_index); + positions.push(arena, target_index); // go up and left if query_index == 0 || target_index == 0 { @@ -214,9 +216,8 @@ fn score_separator_at_pos(ch: char) -> i32 { } } -fn map_chars<'a>(arena: &'a Arena, s: &str) -> Vec { - let mut chars = Vec::with_capacity_in(s.len(), arena); - chars.extend(s.chars()); - chars.shrink_to_fit(); +fn map_chars<'a>(arena: &'a Arena, s: &str) -> BVec<'a, char> { + let mut chars = BVec::empty(); + chars.extend_sloppy(arena, s.chars()); chars } diff --git a/crates/edit/src/icu.rs b/crates/edit/src/icu.rs index a99acac7cc63..c03e99da8498 100644 --- a/crates/edit/src/icu.rs +++ b/crates/edit/src/icu.rs @@ -10,8 +10,9 @@ use std::ops::Range; use std::ptr::{null, null_mut}; use std::{fmt, mem}; -use stdext::arena::{Arena, ArenaString, scratch_arena}; +use stdext::arena::{Arena, scratch_arena}; use stdext::arena_format; +use stdext::collections::{BString, BVec}; use crate::buffer::TextBuffer; use crate::sys; @@ -74,12 +75,12 @@ pub fn get_available_encodings() -> &'static Encodings { unsafe { if ENCODINGS.all.is_empty() { let scratch = scratch_arena(None); - let mut preferred = Vec::new_in(&*scratch); - let mut alternative = Vec::new_in(&*scratch); + let mut preferred = BVec::empty(); + let mut alternative = BVec::empty(); // These encodings are always available. - preferred.push(Encoding { label: "UTF-8", canonical: "UTF-8" }); - preferred.push(Encoding { label: "UTF-8 BOM", canonical: "UTF-8 BOM" }); + preferred.push(&*scratch, Encoding { label: "UTF-8", canonical: "UTF-8" }); + preferred.push(&*scratch, Encoding { label: "UTF-8 BOM", canonical: "UTF-8 BOM" }); if let Ok(f) = init_if_needed() { let mut n = 0; @@ -107,9 +108,9 @@ pub fn get_available_encodings() -> &'static Encodings { ); if !mime.is_null() && status.is_success() { let mime = CStr::from_ptr(mime).to_str().unwrap_unchecked(); - preferred.push(Encoding { label: mime, canonical: name }); + preferred.push(&*scratch, Encoding { label: mime, canonical: name }); } else { - alternative.push(Encoding { label: name, canonical: name }); + alternative.push(&*scratch, Encoding { label: name, canonical: name }); } } } @@ -187,7 +188,7 @@ impl<'pivot> Converter<'pivot> { Ok(Self { source, target, pivot_buffer, pivot_source, pivot_target, reset: true }) } - fn append_nul<'a>(arena: &'a Arena, input: &str) -> ArenaString<'a> { + fn append_nul<'a>(arena: &'a Arena, input: &str) -> BString<'a> { arena_format!(arena, "{}\0", input) } @@ -635,10 +636,10 @@ impl Regex { let f = init_if_needed()?; unsafe { let scratch = scratch_arena(None); - let mut utf16 = Vec::new_in(&*scratch); + let mut utf16 = BVec::empty(); let mut status = icu_ffi::U_ZERO_ERROR; - utf16.extend(pattern.encode_utf16()); + utf16.extend_sloppy(&*scratch, pattern.encode_utf16()); let ptr = (f.uregex_open)( utf16.as_ptr(), @@ -823,7 +824,7 @@ static mut ROOT_CASEMAP: Option<*mut icu_ffi::UCaseMap> = None; /// /// Case folding differs from lower case in that the output is primarily useful /// to machines for comparisons. It's like applying Unicode normalization. -pub fn fold_case<'a>(arena: &'a Arena, input: &str) -> ArenaString<'a> { +pub fn fold_case<'a>(arena: &'a Arena, input: &str) -> BString<'a> { // OnceCell for people that want to put it into a static. #[allow(static_mut_refs)] let casemap = unsafe { @@ -841,13 +842,13 @@ pub fn fold_case<'a>(arena: &'a Arena, input: &str) -> ArenaString<'a> { if !casemap.is_null() { let f = assume_loaded(); let mut status = icu_ffi::U_ZERO_ERROR; - let mut output = Vec::new_in(arena); + let mut output = BVec::empty(); let mut output_len; // First, guess the output length: // TODO: What's a good heuristic here? { - output.reserve_exact(input.len() + 16); + output.reserve_exact(arena, input.len() + 16); let output = output.spare_capacity_mut(); output_len = unsafe { (f.ucasemap_utf8FoldCase)( @@ -863,7 +864,7 @@ pub fn fold_case<'a>(arena: &'a Arena, input: &str) -> ArenaString<'a> { // If that failed to fit, retry with the correct length. if status == icu_ffi::U_BUFFER_OVERFLOW_ERROR && output_len > 0 { - output.reserve_exact(output_len as usize); + output.reserve_exact(arena, output_len as usize); let output = output.spare_capacity_mut(); output_len = unsafe { (f.ucasemap_utf8FoldCase)( @@ -881,11 +882,11 @@ pub fn fold_case<'a>(arena: &'a Arena, input: &str) -> ArenaString<'a> { unsafe { output.set_len(output_len as usize); } - return unsafe { ArenaString::from_utf8_unchecked(output) }; + return unsafe { BString::from_utf8_unchecked(output) }; } } - let mut result = ArenaString::from_str(arena, input); + let mut result = BString::from_str(arena, input); for b in unsafe { result.as_bytes_mut() } { b.make_ascii_lowercase(); } diff --git a/crates/edit/src/json.rs b/crates/edit/src/json.rs index 368de37c8c7c..299b2f357d6d 100644 --- a/crates/edit/src/json.rs +++ b/crates/edit/src/json.rs @@ -9,7 +9,8 @@ use std::fmt; use std::hint::unreachable_unchecked; -use stdext::arena::{Arena, ArenaString}; +use stdext::arena::Arena; +use stdext::collections::{BString, BVec}; use crate::unicode::MeasurementConfig; @@ -224,7 +225,7 @@ impl<'a, 'i> Parser<'a, 'i> { fn parse_string(&mut self) -> Result, ParseError> { self.expect(b'"')?; - let mut result = ArenaString::new_in(self.arena); + let mut result = BString::empty(); loop { if self.pos >= self.bytes.len() { @@ -251,16 +252,17 @@ impl<'a, 'i> Parser<'a, 'i> { self.pos += 1; } - result.push_str(&self.input[beg..self.pos]); + result.push_str(self.arena, &self.input[beg..self.pos]); } } } - Ok(Value::String(result.leak())) + let str = result.leak(); + Ok(Value::String(str)) } #[cold] - fn parse_escape(&mut self, result: &mut ArenaString) -> Result<(), ParseError> { + fn parse_escape(&mut self, result: &mut BString<'a>) -> Result<(), ParseError> { if self.pos >= self.bytes.len() { // Unterminated escape sequence return Err(self.fail(self.pos, ParseErrorKind::Syntax)); @@ -285,12 +287,12 @@ impl<'a, 'i> Parser<'a, 'i> { } }; - result.push(ch as char); + result.push(self.arena, ch as char); Ok(()) } #[cold] - fn parse_unicode_escape(&mut self, result: &mut ArenaString) -> Result<(), ParseError> { + fn parse_unicode_escape(&mut self, result: &mut BString<'a>) -> Result<(), ParseError> { let start = self.pos - 2; // parse_escape() already advanced past "\u" let mut code = self.parse_hex4()?; @@ -308,7 +310,7 @@ impl<'a, 'i> Parser<'a, 'i> { match char::from_u32(code) { Some(c) => { - result.push(c); + result.push(self.arena, c); Ok(()) } None => Err(self.fail(start, ParseErrorKind::Syntax)), @@ -331,7 +333,7 @@ impl<'a, 'i> Parser<'a, 'i> { } fn parse_array(&mut self, depth: usize) -> Result, ParseError> { - let mut values = Vec::new_in(self.arena); + let mut values = BVec::empty(); let mut expects_comma = false; self.expect(b'[')?; @@ -359,7 +361,7 @@ impl<'a, 'i> Parser<'a, 'i> { return Err(self.fail(self.pos, ParseErrorKind::Syntax)); } - values.push(self.parse_value(depth + 1)?); + values.push(self.arena, self.parse_value(depth + 1)?); expects_comma = true; } } @@ -370,7 +372,7 @@ impl<'a, 'i> Parser<'a, 'i> { } fn parse_object(&mut self, depth: usize) -> Result, ParseError> { - let mut entries = Vec::new_in(self.arena); + let mut entries = BVec::empty(); let mut expects_comma = false; self.expect(b'{')?; @@ -409,7 +411,7 @@ impl<'a, 'i> Parser<'a, 'i> { self.expect(b':')?; let value = self.parse_value(depth + 1)?; - entries.push((key, value)); + entries.push(self.arena, (key, value)); expects_comma = true; } } diff --git a/crates/edit/src/lib.rs b/crates/edit/src/lib.rs index 6dea3a7380bd..141cb07abade 100644 --- a/crates/edit/src/lib.rs +++ b/crates/edit/src/lib.rs @@ -2,7 +2,6 @@ // Licensed under the MIT License. #![feature( - allocator_api, breakpoint, cold_path, linked_list_cursors, diff --git a/crates/edit/src/oklab.rs b/crates/edit/src/oklab.rs index 8340bab0b4e6..32ef973f7637 100644 --- a/crates/edit/src/oklab.rs +++ b/crates/edit/src/oklab.rs @@ -9,8 +9,6 @@ use std::fmt::Debug; -use crate::simd::MemsetSafe; - /// A sRGB color with straight (= not premultiplied) alpha. #[derive(Default, Clone, Copy, PartialEq, Eq)] #[repr(transparent)] @@ -102,8 +100,6 @@ impl Debug for StraightRgba { } } -unsafe impl MemsetSafe for StraightRgba {} - /// An Oklab color with alpha. By convention, it uses straight alpha. #[derive(Clone, Copy)] pub struct Oklab([f32; 4]); diff --git a/crates/edit/src/simd/mod.rs b/crates/edit/src/simd/mod.rs index 7f60ed42053e..ed6e7b35e2e3 100644 --- a/crates/edit/src/simd/mod.rs +++ b/crates/edit/src/simd/mod.rs @@ -6,12 +6,10 @@ pub mod lines_bwd; pub mod lines_fwd; mod memchr2; -mod memset; pub use lines_bwd::*; pub use lines_fwd::*; pub use memchr2::*; -pub use memset::*; #[cfg(test)] mod test { diff --git a/crates/edit/src/sys/unix.rs b/crates/edit/src/sys/unix.rs index b0e275cfc558..552eb4eaed63 100644 --- a/crates/edit/src/sys/unix.rs +++ b/crates/edit/src/sys/unix.rs @@ -14,8 +14,9 @@ use std::path::Path; use std::ptr::{NonNull, null_mut}; use std::{io, thread, time}; -use stdext::arena::{Arena, ArenaString, scratch_arena}; +use stdext::arena::{Arena, scratch_arena}; use stdext::arena_format; +use stdext::collections::{BString, BVec}; use crate::helpers::*; @@ -169,24 +170,24 @@ fn get_window_size() -> (u16, u16) { /// Returns `None` if there was an error reading from stdin. /// Returns `Some("")` if the given timeout was reached. /// Otherwise, it returns the read, non-empty string. -pub fn read_stdin(arena: &Arena, mut timeout: time::Duration) -> Option> { +pub fn read_stdin(arena: &Arena, mut timeout: time::Duration) -> Option> { unsafe { if STATE.inject_resize { timeout = time::Duration::ZERO; } let read_poll = timeout != time::Duration::MAX; - let mut buf = Vec::new_in(arena); + let mut buf = BVec::empty(); // We don't know if the input is valid UTF8, so we first use a Vec and then // later turn it into UTF8 using `from_utf8_lossy_owned`. // It is important that we allocate the buffer with an explicit capacity, // because we later use `spare_capacity_mut` to access it. - buf.reserve(4 * KIBI); + buf.reserve(arena, 4 * KIBI); // We got some leftover broken UTF8 from a previous read? Prepend it. if STATE.utf8_len != 0 { - buf.extend_from_slice(&STATE.utf8_buf[..STATE.utf8_len]); + buf.extend_from_slice(arena, &STATE.utf8_buf[..STATE.utf8_len]); STATE.utf8_len = 0; } @@ -271,7 +272,7 @@ pub fn read_stdin(arena: &Arena, mut timeout: time::Duration) -> Option Option 0 && h > 0 { let scratch = scratch_arena(Some(arena)); - let seq = arena_format!(&scratch, "\x1b[8;{h};{w}t"); - result.replace_range(0..0, &seq); + let seq = arena_format!(&*scratch, "\x1b[8;{h};{w}t"); + result.replace_range(arena, 0..0, &seq); } } - result.shrink_to_fit(); Some(result) } } @@ -431,11 +431,11 @@ pub fn load_icu() -> io::Result { /// but I found that many (most?) Linux distributions don't do this for some reason. /// This function returns the suffix, if any. #[cfg(edit_icu_renaming_auto_detect)] -pub fn icu_detect_renaming_suffix(arena: &Arena, handle: NonNull) -> ArenaString<'_> { +pub fn icu_detect_renaming_suffix(arena: &Arena, handle: NonNull) -> BString<'_> { unsafe { type T = *const c_void; - let mut res = ArenaString::new_in(arena); + let mut res = BString::empty(); // Check if the ICU library is using unversioned symbols. // Return an empty suffix in that case. @@ -481,8 +481,8 @@ pub fn icu_detect_renaming_suffix(arena: &Arena, handle: NonNull) -> Are let version_end = version.find('.').unwrap_or(version.len()); let version = &version[..version_end]; - res.push('_'); - res.push_str(version); + res.push(arena, '_'); + res.push_str(arena, version); res } } @@ -506,29 +506,35 @@ where let name = unsafe { std::ffi::CStr::from_ptr(name) }; let name = unsafe { name.to_str().unwrap_unchecked() }; - let mut res = ManuallyDrop::new(ArenaString::new_in(arena)); - res.reserve(name.len() + suffix.len() + 1); - res.push_str(name); - res.push_str(suffix); - res.push('\0'); + let mut res = BString::empty(); + res.reserve(arena, name.len() + suffix.len() + 1); + res.push_str(arena, name); + res.push_str(arena, suffix); + res.push(arena, '\0'); res.as_ptr() as *const c_char } } -pub fn preferred_languages(arena: &Arena) -> Vec, &Arena> { - let mut locales = Vec::new_in(arena); +pub fn preferred_languages(arena: &Arena) -> BVec<'_, BString<'_>> { + let mut locales = BVec::empty(); for key in ["LANGUAGE", "LC_ALL", "LANG"] { if let Ok(val) = std::env::var(key) && !val.is_empty() { - locales.extend(val.split(':').filter(|s| !s.is_empty()).map(|s| { - // Replace all underscores with dashes, - // because the localization code expects pt-br, not pt_BR. - let mut res = Vec::new_in(arena); - res.extend(s.as_bytes().iter().map(|&b| if b == b'_' { b'-' } else { b })); - unsafe { ArenaString::from_utf8_unchecked(res) } - })); + locales.extend_sloppy( + arena, + val.split(':').filter(|s| !s.is_empty()).map(|s| { + // Replace all underscores with dashes, + // because the localization code expects pt-br, not pt_BR. + let mut res = BVec::empty(); + res.extend( + arena, + s.as_bytes().iter().map(|&b| if b == b'_' { b'-' } else { b }), + ); + unsafe { BString::from_utf8_unchecked(res) } + }), + ); break; } } diff --git a/crates/edit/src/sys/windows.rs b/crates/edit/src/sys/windows.rs index b54754862805..f25364960633 100644 --- a/crates/edit/src/sys/windows.rs +++ b/crates/edit/src/sys/windows.rs @@ -2,7 +2,6 @@ // Licensed under the MIT License. use std::ffi::{OsString, c_char, c_void}; -use std::fmt::Write as _; use std::fs::{self, File}; use std::mem::MaybeUninit; use std::os::windows::io::{AsRawHandle as _, FromRawHandle}; @@ -10,7 +9,9 @@ use std::path::{Path, PathBuf}; use std::ptr::{self, NonNull, null, null_mut}; use std::{io, mem, time}; -use stdext::arena::{Arena, ArenaString, scratch_arena}; +use stdext::arena::{Arena, scratch_arena}; +use stdext::arena_write_fmt; +use stdext::collections::{BString, BVec}; use windows_sys::Win32::Storage::FileSystem; use windows_sys::Win32::System::{Console, IO, LibraryLoader, Threading}; use windows_sys::Win32::{Foundation, Globalization}; @@ -247,7 +248,7 @@ fn get_console_size() -> Option { /// * `None` if there was an error reading from stdin. /// * `Some("")` if the given timeout was reached. /// * Otherwise, it returns the read, non-empty string. -pub fn read_stdin(arena: &Arena, mut timeout: time::Duration) -> Option> { +pub fn read_stdin(arena: &Arena, mut timeout: time::Duration) -> Option> { let scratch = scratch_arena(Some(arena)); // On startup we're asked to inject a window size so that the UI system can layout the elements. @@ -344,15 +345,15 @@ pub fn read_stdin(arena: &Arena, mut timeout: time::Duration) -> Option Option io::Result { } /// Returns a list of preferred languages for the current user. -pub fn preferred_languages(arena: &Arena) -> Vec, &Arena> { +pub fn preferred_languages<'a>(arena: &'a Arena) -> BVec<'a, &'a str> { // If the GetUserPreferredUILanguages() don't fit into 512 characters, // honestly, just give up. How many languages do you realistically need? const LEN: usize = 512; let scratch = scratch_arena(Some(arena)); - let mut res = Vec::new_in(arena); // Get the list of preferred languages via `GetUserPreferredUILanguages`. let langs = unsafe { @@ -607,40 +606,12 @@ pub fn preferred_languages(arena: &Arena) -> Vec, &Arena> { }; // Convert UTF16 to UTF8. - let langs = wide_to_utf8(&scratch, langs); + let langs = BString::from_utf16_lossy(arena, langs).leak(); // Split the null-delimited string into individual chunks // and copy them into the given arena. - res.extend( - langs - .split_terminator('\0') - .filter(|s| !s.is_empty()) - .map(|s| ArenaString::from_str(arena, s)), - ); - res -} - -fn wide_to_utf8<'a>(arena: &'a Arena, wide: &[u16]) -> ArenaString<'a> { - let mut res = ArenaString::new_in(arena); - res.reserve(wide.len() * 3); - - let len = unsafe { - Globalization::WideCharToMultiByte( - Globalization::CP_UTF8, - 0, - wide.as_ptr(), - wide.len() as i32, - res.as_mut_ptr() as *mut _, - res.capacity() as i32, - null(), - null_mut(), - ) - }; - if len > 0 { - unsafe { res.as_mut_vec().set_len(len as usize) }; - } - - res.shrink_to_fit(); + let mut res = BVec::empty(); + res.extend_sloppy(arena, langs.split_terminator('\0').filter(|s| !s.is_empty())); res } diff --git a/crates/edit/src/tui.rs b/crates/edit/src/tui.rs index 247505a60419..1d2e15a5a6ee 100644 --- a/crates/edit/src/tui.rs +++ b/crates/edit/src/tui.rs @@ -146,11 +146,11 @@ use std::arch::breakpoint; #[cfg(debug_assertions)] use std::collections::HashSet; -use std::fmt::Write as _; use std::{io, iter, mem, ptr, time}; -use stdext::arena::{Arena, ArenaString, scratch_arena}; -use stdext::{arena_format, opt_ptr_eq, str_from_raw_parts}; +use stdext::arena::{Arena, scratch_arena}; +use stdext::collections::{BString, BVec}; +use stdext::{arena_format, arena_write_fmt, opt_ptr_eq, str_from_raw_parts}; use crate::buffer::{CursorMovement, MoveLineDirection, RcTextBuffer, TextBuffer, TextBufferCell}; use crate::cell::*; @@ -773,7 +773,7 @@ impl Tui { for root in Tree::iterate_siblings(Some(self.prev_tree.root_first)) { let mut root = root.borrow_mut(); - root.compute_intrinsic_size(); + root.compute_intrinsic_size(unsafe { mem::transmute(&self.arena_next) }); } let viewport = self.size.as_rect(); @@ -852,7 +852,7 @@ impl Tui { } /// Renders the last frame into the framebuffer and returns the VT output. - pub fn render<'a>(&mut self, arena: &'a Arena) -> ArenaString<'a> { + pub fn render<'a>(&mut self, arena: &'a Arena) -> BString<'a> { self.framebuffer.flip(self.size); for child in self.prev_tree.iterate_roots() { let mut child = child.borrow_mut(); @@ -869,15 +869,18 @@ impl Tui { return; } - let scratch = scratch_arena(None); - if node.attributes.bordered { // ┌────┐ { - let mut fill = ArenaString::new_in(&scratch); - fill.push('┌'); - fill.push_repeat('─', (outer_clipped.right - outer_clipped.left - 2) as usize); - fill.push('┐'); + let scratch = scratch_arena(None); + let mut fill = BString::empty(); + fill.push(&*scratch, '┌'); + fill.push_repeat( + &*scratch, + '─', + (outer_clipped.right - outer_clipped.left - 2) as usize, + ); + fill.push(&*scratch, '┐'); self.framebuffer.replace_text( outer_clipped.top, outer_clipped.left, @@ -888,10 +891,15 @@ impl Tui { // │ │ { - let mut fill = ArenaString::new_in(&scratch); - fill.push('│'); - fill.push_repeat(' ', (outer_clipped.right - outer_clipped.left - 2) as usize); - fill.push('│'); + let scratch = scratch_arena(None); + let mut fill = BString::empty(); + fill.push(&*scratch, '│'); + fill.push_repeat( + &*scratch, + ' ', + (outer_clipped.right - outer_clipped.left - 2) as usize, + ); + fill.push(&*scratch, '│'); for y in outer_clipped.top + 1..outer_clipped.bottom - 1 { self.framebuffer.replace_text( @@ -905,10 +913,15 @@ impl Tui { // └────┘ { - let mut fill = ArenaString::new_in(&scratch); - fill.push('└'); - fill.push_repeat('─', (outer_clipped.right - outer_clipped.left - 2) as usize); - fill.push('┘'); + let scratch = scratch_arena(None); + let mut fill = BString::empty(); + fill.push(&*scratch, '└'); + fill.push_repeat( + &*scratch, + '─', + (outer_clipped.right - outer_clipped.left - 2) as usize, + ); + fill.push(&*scratch, '┘'); self.framebuffer.replace_text( outer_clipped.bottom - 1, outer_clipped.left, @@ -920,8 +933,13 @@ impl Tui { if node.attributes.float.is_some() { if !node.attributes.bordered { - let mut fill = ArenaString::new_in(&scratch); - fill.push_repeat(' ', (outer_clipped.right - outer_clipped.left) as usize); + let scratch = scratch_arena(None); + let mut fill = BString::empty(); + fill.push_repeat( + &*scratch, + ' ', + (outer_clipped.right - outer_clipped.left) as usize, + ); for y in outer_clipped.top..outer_clipped.bottom { self.framebuffer.replace_text( @@ -1079,11 +1097,11 @@ impl Tui { let scratch = scratch_arena(None); - let mut modified = ArenaString::new_in(&scratch); - modified.reserve(text.len() + 3); - modified.push_str(&text[..skipped.start]); - modified.push('…'); - modified.push_str(&text[skipped.end..]); + let mut modified = BString::empty(); + modified.reserve(&*scratch, text.len() + 3); + modified.push_str(&*scratch, &text[..skipped.start]); + modified.push(&*scratch, '…'); + modified.push_str(&*scratch, &text[skipped.end..]); self.framebuffer.replace_text(target.top, target.left, target.right, &modified); } @@ -1129,89 +1147,104 @@ impl Tui { } /// Outputs a debug string of the layout and focus tree. - pub fn debug_layout<'a>(&mut self, arena: &'a Arena) -> ArenaString<'a> { - let mut result = ArenaString::new_in(arena); - result.push_str("general:\r\n- focus_path:\r\n"); + pub fn debug_layout<'a>(&mut self, arena: &'a Arena) -> BString<'a> { + let mut result = BString::empty(); + result.push_str(arena, "general:\r\n- focus_path:\r\n"); for &id in &self.focused_node_path { - _ = write!(result, " - {id:016x}\r\n"); + arena_write_fmt!(arena, result, " - {id:016x}\r\n"); } - result.push_str("\r\ntree:\r\n"); + result.push_str(arena, "\r\ntree:\r\n"); for root in self.prev_tree.iterate_roots() { Tree::visit_all(root, root, true, |node| { let node = node.borrow(); let depth = node.depth; - result.push_repeat(' ', depth * 2); - _ = write!(result, "- id: {:016x}\r\n", node.id); + result.push_repeat(arena, ' ', depth * 2); + arena_write_fmt!(arena, result, "- id: {:016x}\r\n", node.id); - result.push_repeat(' ', depth * 2); - _ = write!(result, " classname: {}\r\n", node.classname); + result.push_repeat(arena, ' ', depth * 2); + arena_write_fmt!(arena, result, " classname: {}\r\n", node.classname); if depth == 0 && let Some(parent) = node.parent { let parent = parent.borrow(); - result.push_repeat(' ', depth * 2); - _ = write!(result, " parent: {:016x}\r\n", parent.id); + result.push_repeat(arena, ' ', depth * 2); + arena_write_fmt!(arena, result, " parent: {:016x}\r\n", parent.id); } - result.push_repeat(' ', depth * 2); - _ = write!( + result.push_repeat(arena, ' ', depth * 2); + arena_write_fmt!( + arena, result, " intrinsic: {{{}, {}}}\r\n", - node.intrinsic_size.width, node.intrinsic_size.height + node.intrinsic_size.width, + node.intrinsic_size.height ); - result.push_repeat(' ', depth * 2); - _ = write!( + result.push_repeat(arena, ' ', depth * 2); + arena_write_fmt!( + arena, result, " outer: {{{}, {}, {}, {}}}\r\n", - node.outer.left, node.outer.top, node.outer.right, node.outer.bottom + node.outer.left, + node.outer.top, + node.outer.right, + node.outer.bottom ); - result.push_repeat(' ', depth * 2); - _ = write!( + result.push_repeat(arena, ' ', depth * 2); + arena_write_fmt!( + arena, result, " inner: {{{}, {}, {}, {}}}\r\n", - node.inner.left, node.inner.top, node.inner.right, node.inner.bottom + node.inner.left, + node.inner.top, + node.inner.right, + node.inner.bottom ); if node.attributes.bordered { - result.push_repeat(' ', depth * 2); - result.push_str(" bordered: true\r\n"); + result.push_repeat(arena, ' ', depth * 2); + result.push_str(arena, " bordered: true\r\n"); } if node.attributes.bg.to_ne() != 0 { - result.push_repeat(' ', depth * 2); - _ = write!(result, " bg: {:?}\r\n", node.attributes.bg); + result.push_repeat(arena, ' ', depth * 2); + arena_write_fmt!(arena, result, " bg: {:?}\r\n", node.attributes.bg); } if node.attributes.fg.to_ne() != 0 { - result.push_repeat(' ', depth * 2); - _ = write!(result, " fg: {:?}\r\n", node.attributes.fg); + result.push_repeat(arena, ' ', depth * 2); + arena_write_fmt!(arena, result, " fg: {:?}\r\n", node.attributes.fg); } if self.is_node_focused(node.id) { - result.push_repeat(' ', depth * 2); - result.push_str(" focused: true\r\n"); + result.push_repeat(arena, ' ', depth * 2); + result.push_str(arena, " focused: true\r\n"); } match &node.content { NodeContent::Text(content) => { - result.push_repeat(' ', depth * 2); - _ = write!(result, " text: \"{}\"\r\n", &content.text); + result.push_repeat(arena, ' ', depth * 2); + arena_write_fmt!( + arena, + result, + " text: \"{}\"\r\n", + &content.text + ); } NodeContent::Textarea(content) => { let tb = content.buffer.borrow(); let tb = &*tb; - result.push_repeat(' ', depth * 2); - _ = write!(result, " textarea: {tb:p}\r\n"); + result.push_repeat(arena, ' ', depth * 2); + arena_write_fmt!(arena, result, " textarea: {tb:p}\r\n"); } NodeContent::Scrollarea(..) => { - result.push_repeat(' ', depth * 2); - result.push_str(" scrollable: true\r\n"); + result.push_repeat(arena, ' ', depth * 2); + result.push_str(arena, " scrollable: true\r\n"); } _ => {} } @@ -1746,7 +1779,7 @@ impl<'a> Context<'a, '_> { let mut last_node = self.tree.last_node.borrow_mut(); let title = if title.is_empty() { - ArenaString::new_in(self.arena()) + BString::empty() } else { arena_format!(self.arena(), " {} ", title) }; @@ -1778,7 +1811,7 @@ impl<'a> Context<'a, '_> { let mut last_node = self.tree.last_node.borrow_mut(); last_node.content = NodeContent::Table(TableContent { - columns: Vec::new_in(self.arena()), + columns: BVec::empty(), cell_gap: Default::default(), }); } @@ -1789,7 +1822,7 @@ impl<'a> Context<'a, '_> { let mut last_node = self.tree.last_node.borrow_mut(); if let NodeContent::Table(spec) = &mut last_node.content { spec.columns.clear(); - spec.columns.extend_from_slice(columns); + spec.columns.extend_from_slice(self.arena(), columns); } else { debug_assert!(false); } @@ -1936,8 +1969,8 @@ impl<'a> Context<'a, '_> { pub fn styled_label_begin(&mut self, classname: &'static str) { self.block_begin(classname); self.tree.last_node.borrow_mut().content = NodeContent::Text(TextContent { - text: ArenaString::new_in(self.arena()), - chunks: Vec::with_capacity_in(4, self.arena()), + text: BString::empty(), + chunks: BVec::empty(), overflow: Overflow::Clip, }); } @@ -1951,11 +1984,10 @@ impl<'a> Context<'a, '_> { let last = content.chunks.last().unwrap_or(&INVALID_STYLED_TEXT_CHUNK); if last.offset != content.text.len() && last.fg != fg { - content.chunks.push(StyledTextChunk { - offset: content.text.len(), - fg, - attr: last.attr, - }); + content.chunks.push( + self.arena(), + StyledTextChunk { offset: content.text.len(), fg, attr: last.attr }, + ); } } @@ -1968,7 +2000,10 @@ impl<'a> Context<'a, '_> { let last = content.chunks.last().unwrap_or(&INVALID_STYLED_TEXT_CHUNK); if last.offset != content.text.len() && last.attr != attr { - content.chunks.push(StyledTextChunk { offset: content.text.len(), fg: last.fg, attr }); + content.chunks.push( + self.arena(), + StyledTextChunk { offset: content.text.len(), fg: last.fg, attr }, + ); } } @@ -1979,7 +2014,7 @@ impl<'a> Context<'a, '_> { unreachable!(); }; - content.text.push_str(text); + content.text.push_str(self.arena(), text); } /// Ends the current label block. @@ -3316,20 +3351,20 @@ impl<'a> Context<'a, '_> { fn menubar_shortcut(&mut self, shortcut: InputKey) { let shortcut_letter = shortcut.value() as u8 as char; if shortcut_letter.is_ascii_uppercase() { - let mut shortcut_text = ArenaString::new_in(self.arena()); + let mut shortcut_text = BString::empty(); if shortcut.modifiers_contains(kbmod::CTRL) { - shortcut_text.push_str(self.tui.modifier_translations.ctrl); - shortcut_text.push('+'); + shortcut_text.push_str(self.arena(), self.tui.modifier_translations.ctrl); + shortcut_text.push(self.arena(), '+'); } if shortcut.modifiers_contains(kbmod::ALT) { - shortcut_text.push_str(self.tui.modifier_translations.alt); - shortcut_text.push('+'); + shortcut_text.push_str(self.arena(), self.tui.modifier_translations.alt); + shortcut_text.push(self.arena(), '+'); } if shortcut.modifiers_contains(kbmod::SHIFT) { - shortcut_text.push_str(self.tui.modifier_translations.shift); - shortcut_text.push('+'); + shortcut_text.push_str(self.arena(), self.tui.modifier_translations.shift); + shortcut_text.push(self.arena(), '+'); } - shortcut_text.push(shortcut_letter); + shortcut_text.push(self.arena(), shortcut_letter); self.label("shortcut", &shortcut_text); } else { @@ -3650,7 +3685,7 @@ struct ListContent<'a> { /// NOTE: Must not contain items that require drop(). struct TableContent<'a> { - columns: Vec, + columns: BVec<'a, CoordType>, cell_gap: Size, } @@ -3666,8 +3701,8 @@ const INVALID_STYLED_TEXT_CHUNK: StyledTextChunk = /// NOTE: Must not contain items that require drop(). struct TextContent<'a> { - text: ArenaString<'a>, - chunks: Vec, + text: BString<'a>, + chunks: BVec<'a, StyledTextChunk>, overflow: Overflow, } @@ -3700,7 +3735,7 @@ enum NodeContent<'a> { #[default] None, List(ListContent<'a>), - Modal(ArenaString<'a>), // title + Modal(BString<'a>), // title Table(TableContent<'a>), Text(TextContent<'a>), Textarea(TextareaContent<'a>), @@ -3777,7 +3812,7 @@ struct Node<'a> { inner_clipped: Rect, // in screen-space, calculated during layout, restricted to the viewport } -impl Node<'_> { +impl<'a> Node<'a> { /// Given an outer rectangle (including padding and borders) of this node, /// this returns the inner rectangle (excluding padding and borders). fn outer_to_inner(&self, mut outer: Rect) -> Rect { @@ -3814,7 +3849,7 @@ impl Node<'_> { } /// Computes the intrinsic size of this node and its children. - fn compute_intrinsic_size(&mut self) { + fn compute_intrinsic_size(&mut self, arena: &'a Arena) { match &mut self.content { NodeContent::Table(spec) => { // Calculate each row's height and the maximum width of each of its columns. @@ -3824,7 +3859,7 @@ impl Node<'_> { for (column, cell) in Tree::iterate_siblings(row.children.first).enumerate() { let mut cell = cell.borrow_mut(); - cell.compute_intrinsic_size(); + cell.compute_intrinsic_size(arena); let size = cell.intrinsic_to_outer(); @@ -3838,7 +3873,7 @@ impl Node<'_> { // last column (flexible 1/1) must be 3 times as wide as the 2nd one (1/3rd). // It's not a big deal yet, because such functionality isn't needed just yet. if column >= spec.columns.len() { - spec.columns.push(0); + spec.columns.push(arena, 0); } spec.columns[column] = spec.columns[column].max(size.width); @@ -3884,7 +3919,7 @@ impl Node<'_> { for child in Tree::iterate_siblings(self.children.first) { let mut child = child.borrow_mut(); - child.compute_intrinsic_size(); + child.compute_intrinsic_size(arena); let size = child.intrinsic_to_outer(); max_width = max_width.max(size.width); diff --git a/crates/stdext/src/alloc.rs b/crates/stdext/src/alloc.rs new file mode 100644 index 000000000000..2fef89c6164b --- /dev/null +++ b/crates/stdext/src/alloc.rs @@ -0,0 +1,55 @@ +use std::alloc::{Layout, alloc, dealloc, handle_alloc_error, realloc}; +use std::ptr::NonNull; + +pub trait Allocator { + /// # Safety + /// + /// It's an allocator trait. It's unsafe. + /// Note that `old_ptr` may be invalid if `old_size` is 0. + unsafe fn realloc( + &self, + old_ptr: NonNull, + old_size: usize, + new_size: usize, + align: usize, + ) -> NonNull<[u8]>; + + /// # Safety + /// + /// Naturally, `ptr` must be valid. + unsafe fn dealloc(&self, ptr: NonNull, size: usize, align: usize); +} + +pub struct GlobalAllocator; + +impl Allocator for GlobalAllocator { + unsafe fn realloc( + &self, + old_ptr: NonNull, + old_size: usize, + new_size: usize, + align: usize, + ) -> NonNull<[u8]> { + unsafe { + let new_ptr = if old_size == 0 { + let layout = Layout::from_size_align_unchecked(new_size, align); + alloc(layout) + } else { + let layout = Layout::from_size_align_unchecked(old_size, align); + realloc(old_ptr.as_ptr(), layout, new_size) + }; + let Some(new_ptr) = NonNull::new(new_ptr) else { + let layout = Layout::from_size_align_unchecked(new_size, align); + handle_alloc_error(layout); + }; + NonNull::slice_from_raw_parts(new_ptr, new_size) + } + } + + unsafe fn dealloc(&self, ptr: NonNull, size: usize, align: usize) { + unsafe { + let layout = Layout::from_size_align_unchecked(size, align); + dealloc(ptr.as_ptr(), layout); + } + } +} diff --git a/crates/stdext/src/arena/debug.rs b/crates/stdext/src/arena/debug.rs index 851474e546f6..da1786437b5d 100644 --- a/crates/stdext/src/arena/debug.rs +++ b/crates/stdext/src/arena/debug.rs @@ -3,12 +3,12 @@ #![allow(clippy::missing_safety_doc, clippy::mut_from_ref)] -use std::alloc::{AllocError, Allocator, Layout}; use std::io; use std::mem::MaybeUninit; use std::ptr::NonNull; use super::release; +use crate::alloc::Allocator; /// A debug wrapper for [`release::Arena`]. /// @@ -112,45 +112,16 @@ impl Arena { } } -unsafe impl Allocator for Arena { - fn allocate(&self, layout: Layout) -> Result, AllocError> { - Ok(self.delegate_target().alloc_raw(layout.size(), layout.align())) - } - - fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { - self.delegate_target().allocate_zeroed(layout) - } - - // While it is possible to shrink the tail end of the arena, it is - // not very useful given the existence of scoped scratch arenas. - unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { - unsafe { self.delegate_target().deallocate(ptr, layout) } - } - - unsafe fn grow( +impl Allocator for Arena { + unsafe fn realloc( &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - unsafe { self.delegate_target().grow(ptr, old_layout, new_layout) } + old_ptr: NonNull, + old_size: usize, + new_size: usize, + align: usize, + ) -> NonNull<[u8]> { + unsafe { self.delegate_target().realloc(old_ptr, old_size, new_size, align) } } - unsafe fn grow_zeroed( - &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - unsafe { self.delegate_target().grow_zeroed(ptr, old_layout, new_layout) } - } - - unsafe fn shrink( - &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - unsafe { self.delegate_target().shrink(ptr, old_layout, new_layout) } - } + unsafe fn dealloc(&self, _ptr: NonNull, _size: usize, _align: usize) {} } diff --git a/crates/stdext/src/arena/fs.rs b/crates/stdext/src/arena/fs.rs index 58396233c804..9bdecc5e8966 100644 --- a/crates/stdext/src/arena/fs.rs +++ b/crates/stdext/src/arena/fs.rs @@ -4,19 +4,20 @@ use std::mem::MaybeUninit; use std::path::Path; use std::slice::from_raw_parts_mut; -use super::{Arena, ArenaString}; +use crate::arena::Arena; +use crate::collections::{BString, BVec}; -pub fn read_to_vec>(arena: &Arena, path: P) -> io::Result> { - fn inner<'a>(arena: &'a Arena, path: &Path) -> io::Result> { +pub fn read_to_vec>(arena: &'_ Arena, path: P) -> io::Result> { + fn inner<'a>(arena: &'a Arena, path: &Path) -> io::Result> { let mut file = File::open(path)?; - let mut vec = Vec::new_in(arena); + let mut vec = BVec::empty(); const MIN_SIZE: usize = 1024; const MAX_SIZE: usize = 128 * 1024; let mut buf_size = MIN_SIZE; loop { - vec.reserve(buf_size); + vec.reserve(arena, buf_size); let spare = vec.spare_capacity_mut(); let to_read = spare.len().min(buf_size); @@ -36,10 +37,10 @@ pub fn read_to_vec>(arena: &Arena, path: P) -> io::Result>(arena: &Arena, path: P) -> io::Result> { - fn inner<'a>(arena: &'a Arena, path: &Path) -> io::Result> { +pub fn read_to_string>(arena: &Arena, path: P) -> io::Result> { + fn inner<'a>(arena: &'a Arena, path: &Path) -> io::Result> { let vec = read_to_vec(arena, path)?; - ArenaString::from_utf8(vec).map_err(|_| { + BString::from_utf8(vec).map_err(|_| { io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8") }) } diff --git a/crates/stdext/src/arena/mod.rs b/crates/stdext/src/arena/mod.rs index 2a76e210b4e4..e946a6e68f17 100644 --- a/crates/stdext/src/arena/mod.rs +++ b/crates/stdext/src/arena/mod.rs @@ -8,7 +8,6 @@ mod debug; mod fs; mod release; mod scratch; -mod string; #[cfg(all(not(doc), debug_assertions))] pub use self::debug::*; @@ -16,4 +15,21 @@ pub use self::fs::*; #[cfg(any(doc, not(debug_assertions)))] pub use self::release::*; pub use self::scratch::*; -pub use self::string::*; + +#[macro_export] +macro_rules! arena_format { + ($arena:expr, $($arg:tt)*) => {{ + use std::fmt::Write as _; + let mut output = ::stdext::collections::BString::empty(); + let _ = output.formatter($arena).write_fmt(format_args!($($arg)*)); + output + }} +} + +#[macro_export] +macro_rules! arena_write_fmt { + ($arena:expr, $output:expr, $($arg:tt)*) => {{ + use std::fmt::Write as _; + let _ = $output.formatter($arena).write_fmt(format_args!($($arg)*)); + }} +} diff --git a/crates/stdext/src/arena/release.rs b/crates/stdext/src/arena/release.rs index 53fea9505adb..0446d5b69d5d 100644 --- a/crates/stdext/src/arena/release.rs +++ b/crates/stdext/src/arena/release.rs @@ -3,13 +3,13 @@ #![allow(clippy::mut_from_ref)] -use std::alloc::{AllocError, Allocator, Layout}; use std::cell::Cell; use std::mem::MaybeUninit; use std::ptr::{self, NonNull}; use std::{io, mem, slice}; -use crate::{cold_path, sys}; +use crate::alloc::Allocator; +use crate::sys; #[cfg(target_pointer_width = "32")] const ALLOC_CHUNK_SIZE: usize = 32 * 1024; @@ -189,98 +189,35 @@ impl Default for Arena { } } -unsafe impl Allocator for Arena { - fn allocate(&self, layout: Layout) -> Result, AllocError> { - Ok(self.alloc_raw(layout.size(), layout.align())) - } - - fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { - let p = self.alloc_raw(layout.size(), layout.align()); - unsafe { p.cast::().as_ptr().write_bytes(0, p.len()) } - Ok(p) - } - - // While it is possible to shrink the tail end of the arena, it is - // not very useful given the existence of scoped scratch arenas. - unsafe fn deallocate(&self, _: NonNull, _: Layout) {} - - unsafe fn grow( +impl Allocator for Arena { + unsafe fn realloc( &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - debug_assert!(new_layout.size() >= old_layout.size()); - debug_assert!(new_layout.align() <= old_layout.align()); - - let new_ptr; - - // Growing the given area is possible if it is at the end of the arena. - if unsafe { ptr.add(old_layout.size()) == self.base.add(self.offset.get()) } { - new_ptr = ptr; - let delta = new_layout.size() - old_layout.size(); - // Assuming that the given ptr/length area is at the end of the arena, - // we can just push more memory to the end of the arena to grow it. - self.alloc_raw(delta, 1); - } else { - cold_path(); - - new_ptr = self.allocate(new_layout)?.cast(); - - // SAFETY: It's weird to me that this doesn't assert new_layout.size() >= old_layout.size(), - // but neither does the stdlib code at the time of writing. - // So, assuming that is not needed, this code is safe since it just copies the old data over. + old_ptr: NonNull, + old_size: usize, + new_size: usize, + align: usize, + ) -> NonNull<[u8]> { + if unsafe { old_ptr.add(old_size) == self.base.add(self.offset.get()) } { + // Check if it's the last allocation we made. + // If so, we can grow/shrink it in place without copying. + if new_size > old_size { + self.alloc_raw(new_size - old_size, align); + } else { + self.offset.set(self.offset.get() - old_size + new_size); + } + NonNull::slice_from_raw_parts(old_ptr, new_size) + } else if new_size > old_size { + // Otherwise, we have to allocate a new area and copy it over. unsafe { - ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_layout.size()); - self.deallocate(ptr, old_layout); + let new_ptr = self.alloc_raw(new_size, align); + ptr::copy_nonoverlapping(old_ptr.as_ptr(), new_ptr.as_ptr() as *mut _, old_size); + new_ptr } - } - - Ok(NonNull::slice_from_raw_parts(new_ptr, new_layout.size())) - } - - unsafe fn grow_zeroed( - &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - unsafe { - // SAFETY: Same as grow(). - let ptr = self.grow(ptr, old_layout, new_layout)?; - - // SAFETY: At this point, `ptr` must be valid for `new_layout.size()` bytes, - // allowing us to safely zero out the delta since `old_layout.size()`. - ptr.cast::() - .add(old_layout.size()) - .write_bytes(0, new_layout.size() - old_layout.size()); - - Ok(ptr) - } - } - - unsafe fn shrink( - &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - debug_assert!(new_layout.size() <= old_layout.size()); - debug_assert!(new_layout.align() <= old_layout.align()); - - let mut len = old_layout.size(); - - // Shrinking the given area is possible if it is at the end of the arena. - if unsafe { ptr.add(len) == self.base.add(self.offset.get()) } { - self.offset.set(self.offset.get() - len + new_layout.size()); - len = new_layout.size(); } else { - debug_assert!( - false, - "Did you call shrink_to_fit()? Only the last allocation can be shrunk!" - ); + debug_assert!(false, "only the last allocation can be shrunk"); + NonNull::slice_from_raw_parts(old_ptr, old_size) } - - Ok(NonNull::slice_from_raw_parts(ptr, len)) } + + unsafe fn dealloc(&self, _ptr: NonNull, _size: usize, _align: usize) {} } diff --git a/crates/stdext/src/arena/scratch.rs b/crates/stdext/src/arena/scratch.rs index 036a8ce2b34f..18fd129b5139 100644 --- a/crates/stdext/src/arena/scratch.rs +++ b/crates/stdext/src/arena/scratch.rs @@ -2,6 +2,8 @@ // Licensed under the MIT License. use std::io; +#[cfg(debug_assertions)] +use std::marker::PhantomData; use std::ops::Deref; #[cfg(debug_assertions)] @@ -16,7 +18,7 @@ use crate::helpers::*; pub struct ScratchArena<'a> { arena: debug::Arena, offset: usize, - _phantom: std::marker::PhantomData<&'a ()>, + _phantom: PhantomData<&'a ()>, } #[cfg(not(debug_assertions))] @@ -29,7 +31,7 @@ pub struct ScratchArena<'a> { impl<'a> ScratchArena<'a> { fn new(arena: &'a release::Arena) -> Self { let offset = arena.offset(); - ScratchArena { arena: Arena::delegated(arena), _phantom: std::marker::PhantomData, offset } + ScratchArena { arena: Arena::delegated(arena), _phantom: PhantomData, offset } } } diff --git a/crates/stdext/src/arena/string.rs b/crates/stdext/src/arena/string.rs deleted file mode 100644 index 1322fb154175..000000000000 --- a/crates/stdext/src/arena/string.rs +++ /dev/null @@ -1,315 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::fmt; -use std::ops::{Bound, Deref, DerefMut, RangeBounds}; -use std::str::Utf8Error; - -use super::Arena; -use crate::helpers::*; - -/// A custom string type, because `std` lacks allocator support for [`String`]. -/// -/// To keep things simple, this one is hardcoded to [`Arena`]. -#[derive(Clone)] -pub struct ArenaString<'a> { - vec: Vec, -} - -impl<'a> ArenaString<'a> { - /// Creates a new [`ArenaString`] in the given arena. - #[must_use] - pub const fn new_in(arena: &'a Arena) -> Self { - Self { vec: Vec::new_in(arena) } - } - - #[must_use] - pub fn with_capacity_in(capacity: usize, arena: &'a Arena) -> Self { - Self { vec: Vec::with_capacity_in(capacity, arena) } - } - - /// Turns a [`str`] into an [`ArenaString`]. - #[must_use] - pub fn from_str(arena: &'a Arena, s: &str) -> Self { - let mut res = Self::new_in(arena); - res.push_str(s); - res - } - - pub fn from_utf8(vec: Vec) -> Result { - str::from_utf8(&vec)?; - Ok(Self { vec }) - } - - /// It says right here that you checked if `bytes` is valid UTF-8 - /// and you are sure it is. Presto! Here's an `ArenaString`! - /// - /// # Safety - /// - /// You fool! It says "unchecked" right there. Now the house is burning. - #[inline] - #[must_use] - pub unsafe fn from_utf8_unchecked(bytes: Vec) -> Self { - Self { vec: bytes } - } - - /// Checks whether `text` contains only valid UTF-8. - /// If the entire string is valid, it returns `Ok(text)`. - /// Otherwise, it returns `Err(ArenaString)` with all invalid sequences replaced with U+FFFD. - pub fn from_utf8_lossy<'s>(arena: &'a Arena, text: &'s [u8]) -> Result<&'s str, Self> { - let mut iter = text.utf8_chunks(); - let Some(mut chunk) = iter.next() else { - return Ok(""); - }; - - let valid = chunk.valid(); - if chunk.invalid().is_empty() { - debug_assert_eq!(valid.len(), text.len()); - return Ok(unsafe { str::from_utf8_unchecked(text) }); - } - - const REPLACEMENT: &str = "\u{FFFD}"; - - let mut res = Self::new_in(arena); - res.reserve(text.len()); - - loop { - res.push_str(chunk.valid()); - if !chunk.invalid().is_empty() { - res.push_str(REPLACEMENT); - } - chunk = match iter.next() { - Some(chunk) => chunk, - None => break, - }; - } - - Err(res) - } - - /// Turns a [`Vec`] into an [`ArenaString`], replacing invalid UTF-8 sequences with U+FFFD. - #[must_use] - pub fn from_utf8_lossy_owned(v: Vec) -> Self { - match Self::from_utf8_lossy(v.allocator(), &v) { - Ok(..) => unsafe { Self::from_utf8_unchecked(v) }, - Err(s) => s, - } - } - - #[must_use] - pub fn from_iter>(arena: &'a Arena, iter: T) -> Self { - let mut s = Self::new_in(arena); - s.extend(iter); - s - } - - /// It's empty. - pub fn is_empty(&self) -> bool { - self.vec.is_empty() - } - - /// It's lengthy. - pub fn len(&self) -> usize { - self.vec.len() - } - - /// It's capacatity. - pub fn capacity(&self) -> usize { - self.vec.capacity() - } - - /// It's a [`String`], now it's a [`str`]. Wow! - pub fn as_str(&self) -> &str { - unsafe { str::from_utf8_unchecked(self.vec.as_slice()) } - } - - /// It's a [`String`], now it's a [`str`]. And it's mutable! WOW! - pub fn as_mut_str(&mut self) -> &mut str { - unsafe { str::from_utf8_unchecked_mut(self.vec.as_mut_slice()) } - } - - /// Now it's bytes! - pub fn as_bytes(&self) -> &[u8] { - self.vec.as_slice() - } - - pub fn leak(self) -> &'a str { - unsafe { str::from_utf8_unchecked(self.vec.leak()) } - } - - /// Returns a mutable reference to the contents of this `String`. - /// - /// # Safety - /// - /// The underlying `&mut Vec` allows writing bytes which are not valid UTF-8. - pub unsafe fn as_mut_vec(&mut self) -> &mut Vec { - &mut self.vec - } - - /// Reserves *additional* memory. For you old folks out there (totally not me), - /// this is different from C++'s `reserve` which reserves a total size. - pub fn reserve(&mut self, additional: usize) { - self.vec.reserve(additional) - } - - /// Just like [`ArenaString::reserve`], but it doesn't overallocate. - pub fn reserve_exact(&mut self, additional: usize) { - self.vec.reserve_exact(additional) - } - - /// Now it's small! Alarming! - /// - /// *Do not* call this unless this string is the last thing on the arena. - /// Arenas are stacks, they can't deallocate what's in the middle. - pub fn shrink_to_fit(&mut self) { - self.vec.shrink_to_fit() - } - - /// To no surprise, this clears the string. - pub fn clear(&mut self) { - self.vec.clear() - } - - /// Append some text. - pub fn push_str(&mut self, string: &str) { - self.vec.extend_from_slice(string.as_bytes()) - } - - /// Append a single character. - #[inline] - pub fn push(&mut self, ch: char) { - match ch.len_utf8() { - 1 => self.vec.push(ch as u8), - _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), - } - } - - /// Same as `push(char)` but with a specified number of character copies. - /// Shockingly absent from the standard library. - pub fn push_repeat(&mut self, ch: char, total_copies: usize) { - if total_copies == 0 { - return; - } - - let buf = unsafe { self.as_mut_vec() }; - - if ch.is_ascii() { - // Compiles down to `memset()`. - buf.extend(std::iter::repeat_n(ch as u8, total_copies)); - } else { - // Implements efficient string padding using quadratic duplication. - let mut utf8_buf = [0; 4]; - let utf8 = ch.encode_utf8(&mut utf8_buf).as_bytes(); - let initial_len = buf.len(); - let added_len = utf8.len() * total_copies; - let final_len = initial_len + added_len; - - buf.reserve(added_len); - buf.extend_from_slice(utf8); - - while buf.len() != final_len { - let end = (final_len - buf.len() + initial_len).min(buf.len()); - buf.extend_from_within(initial_len..end); - } - } - } - - /// Replaces a range of characters with a new string. - pub fn replace_range>(&mut self, range: R, replace_with: &str) { - match range.start_bound() { - Bound::Included(&n) => assert!(self.is_char_boundary(n)), - Bound::Excluded(&n) => assert!(self.is_char_boundary(n + 1)), - Bound::Unbounded => {} - }; - match range.end_bound() { - Bound::Included(&n) => assert!(self.is_char_boundary(n + 1)), - Bound::Excluded(&n) => assert!(self.is_char_boundary(n)), - Bound::Unbounded => {} - }; - unsafe { self.as_mut_vec() }.replace_range(range, replace_with.as_bytes()); - } - - /// Finds `old` in the string and replaces it with `new`. - /// Only performs one replacement. - pub fn replace_once_in_place(&mut self, old: &str, new: &str) { - if let Some(beg) = self.find(old) { - unsafe { self.as_mut_vec() }.replace_range(beg..beg + old.len(), new.as_bytes()); - } - } -} - -impl fmt::Debug for ArenaString<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&**self, f) - } -} - -impl PartialEq> for ArenaString<'_> { - fn eq(&self, other: &ArenaString) -> bool { - self.as_str() == other.as_str() - } -} - -impl PartialEq<&str> for ArenaString<'_> { - fn eq(&self, other: &&str) -> bool { - self.as_str() == *other - } -} - -impl Eq for ArenaString<'_> {} - -impl Deref for ArenaString<'_> { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - -impl DerefMut for ArenaString<'_> { - fn deref_mut(&mut self) -> &mut Self::Target { - self.as_mut_str() - } -} - -impl fmt::Display for ArenaString<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&**self, f) - } -} - -impl fmt::Write for ArenaString<'_> { - #[inline] - fn write_str(&mut self, s: &str) -> fmt::Result { - self.push_str(s); - Ok(()) - } - - #[inline] - fn write_char(&mut self, c: char) -> fmt::Result { - self.push(c); - Ok(()) - } -} - -impl Extend for ArenaString<'_> { - fn extend>(&mut self, iter: I) { - let iterator = iter.into_iter(); - let (lower_bound, _) = iterator.size_hint(); - self.reserve(lower_bound); - iterator.for_each(move |c| self.push(c)); - } - - // TODO: This is where I'd put `extend_one` and `extend_reserve` impls, *but as always*, - // essential stdlib functions are unstable and that means we can't have them. -} - -#[macro_export] -macro_rules! arena_format { - ($arena:expr, $($arg:tt)*) => {{ - use std::fmt::Write as _; - let mut output = stdext::arena::ArenaString::new_in($arena); - output.write_fmt(format_args!($($arg)*)).unwrap(); - output - }} -} diff --git a/crates/stdext/src/collections/mod.rs b/crates/stdext/src/collections/mod.rs new file mode 100644 index 000000000000..203e6ce4d7af --- /dev/null +++ b/crates/stdext/src/collections/mod.rs @@ -0,0 +1,5 @@ +mod string; +mod vec; + +pub use string::{BString, BStringFormatter}; +pub use vec::BVec; diff --git a/crates/stdext/src/collections/string.rs b/crates/stdext/src/collections/string.rs new file mode 100644 index 000000000000..7ba8023d8f9e --- /dev/null +++ b/crates/stdext/src/collections/string.rs @@ -0,0 +1,359 @@ +use std::fmt::{self}; +use std::ops::{Bound, Deref, DerefMut, RangeBounds}; +use std::slice; +use std::str::Utf8Error; + +use crate::alloc::Allocator; +use crate::cold_path; +use crate::collections::BVec; + +/// Like a `String` but on borrowed memory. Built on top of [`BVec`]. +pub struct BString<'a> { + vec: BVec<'a, u8>, +} + +impl<'a> BString<'a> { + /// The label on the tin says "empty". You open it. It's empty. + #[inline] + pub const fn empty() -> Self { + Self { vec: BVec::empty() } + } + + /// See [`BVec::from_std_vec()`]. + pub fn from_std_string(str: String) -> Self { + Self { vec: BVec::from_std_vec(str.into_bytes()) } + } + + /// See [`BVec::into_std_vec()`]. + pub fn into_std_string(self) -> String { + unsafe { String::from_utf8_unchecked(self.vec.into_std_vec()) } + } + + /// Validates and wraps a byte vec as UTF-8. + pub fn from_utf8(vec: BVec<'a, u8>) -> Result { + str::from_utf8(&vec)?; + Ok(Self { vec }) + } + + /// Validates UTF-8, replacing invalid sequences with U+FFFD. + pub fn from_utf8_lossy(alloc: &'a dyn Allocator, vec: BVec<'a, u8>) -> Self { + let mut iter = vec.utf8_chunks(); + + if let Some(mut chunk) = iter.next() + && !chunk.invalid().is_empty() + { + // We only need to create a copy if the input is non-empty + // and contains at least some invalid UTF-8. + cold_path(); + + let mut res = Self::empty(); + res.reserve(alloc, vec.len()); + + loop { + res.push_str(alloc, chunk.valid()); + if !chunk.invalid().is_empty() { + res.push_str(alloc, "\u{FFFD}"); + } + chunk = match iter.next() { + Some(chunk) => chunk, + None => break, + }; + } + + res + } else { + // Otherwise, we can just return the `vec` as-is. + Self { vec } + } + } + + /// Wraps a byte vec as UTF-8 without validating it. + /// + /// # Safety + /// + /// The bytes in `vec` must be valid UTF-8. + #[inline] + pub unsafe fn from_utf8_unchecked(vec: BVec<'a, u8>) -> Self { + Self { vec } + } + + /// Copies `&str` into the allocator. + pub fn from_str(alloc: &'a dyn Allocator, s: &str) -> Self { + let mut res = Self::empty(); + res.push_str(alloc, s); + res + } + + /// Decodes UTF-16, replacing unpaired surrogates with U+FFFD. + pub fn from_utf16_lossy(alloc: &'a dyn Allocator, string: &[u16]) -> Self { + let mut res = Self::empty(); + res.push_utf16_lossy(alloc, string); + res + } + + /// Length in bytes, not characters. + #[inline] + pub fn len(&self) -> usize { + self.vec.len() + } + + /// Total byte capacity of the backing buffer. + #[inline] + pub fn capacity(&self) -> usize { + self.vec.capacity() + } + + /// True if the string is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.vec.is_empty() + } + + /// True if if the buffer is full. + #[inline] + pub fn is_full(&self) -> bool { + self.vec.is_full() + } + + /// The raw UTF-8 bytes. + #[inline] + pub fn as_bytes(&self) -> &[u8] { + self.vec.as_slice() + } + + /// View as a `&str`. + #[inline] + pub fn as_str(&self) -> &str { + unsafe { str::from_utf8_unchecked(self.vec.as_slice()) } + } + + /// View as a `&mut str`. + #[inline] + pub fn as_mut_str(&mut self) -> &mut str { + unsafe { str::from_utf8_unchecked_mut(self.vec.as_mut_slice()) } + } + + /// # Safety + /// + /// The underlying `&mut Vec` allows writing bytes which are not valid UTF-8. + #[inline] + pub unsafe fn as_mut_vec(&mut self) -> &mut BVec<'a, u8> { + &mut self.vec + } + + /// Consume the string, returning a `&mut str` that lives as long as the borrowed memory. + #[inline] + pub fn leak(self) -> &'a mut str { + unsafe { str::from_utf8_unchecked_mut(self.vec.leak()) } + } + + /// Ensures space for at least `additional` more bytes, with amortized growth. + #[inline] + pub fn reserve(&mut self, alloc: &'a dyn Allocator, additional: usize) { + self.vec.reserve(alloc, additional); + } + + /// Ensures space for at least `additional` more bytes, without over-allocating. + #[inline] + pub fn reserve_exact(&mut self, arena: &'a dyn Allocator, additional: usize) { + self.vec.reserve_exact(arena, additional); + } + + /// Appends a single `char`, encoding it as UTF-8. + pub fn push(&mut self, alloc: &'a dyn Allocator, ch: char) { + self.reserve(alloc, 4); + unsafe { + let len = self.vec.len(); + let dst = self.vec.as_mut_ptr().add(len); + let add = ch.encode_utf8(slice::from_raw_parts_mut(dst, 4)).len(); + self.vec.set_len(len + add); + } + } + + /// Empties the string. The allocation is kept. + pub fn clear(&mut self) { + self.vec.clear(); + } + + /// Returns a [`BorrowedStringFormatter`] pairing this string with an allocator, + /// enabling use with `write!` and `fmt::Write`. + pub fn formatter(&mut self, alloc: &'a A) -> BStringFormatter<'_, 'a, A> + where + A: Allocator, + { + BStringFormatter { string: self, alloc } + } + + /// Appends a `&str`. + pub fn push_str(&mut self, alloc: &'a dyn Allocator, string: &str) { + self.vec.extend_from_slice(alloc, string.as_bytes()); + } + + /// Appends a UTF-16 slice, replacing unpaired surrogates with U+FFFD. + pub fn push_utf16_lossy(&mut self, alloc: &'a dyn Allocator, string: &[u16]) { + self.extend( + alloc, + char::decode_utf16(string.iter().cloned()) + .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER)), + ); + } + + /// Same as `push(char)` but with a specified number of character copies. + /// Shockingly absent from the standard library. + pub fn push_repeat(&mut self, alloc: &'a dyn Allocator, ch: char, total_copies: usize) { + if total_copies == 0 { + return; + } + + let buf = unsafe { self.as_mut_vec() }; + + if ch.is_ascii() { + // Compiles down to `memset()`. + buf.push_repeat(alloc, ch as u8, total_copies); + } else { + // Implements efficient string padding using quadratic duplication. + let mut utf8_buf = [0; 4]; + let utf8 = ch.encode_utf8(&mut utf8_buf).as_bytes(); + let initial_len = buf.len(); + let added_len = utf8.len() * total_copies; + let final_len = initial_len + added_len; + + buf.reserve(alloc, added_len); + buf.extend_from_slice(alloc, utf8); + + while buf.len() != final_len { + let end = (final_len - buf.len() + initial_len).min(buf.len()); + buf.extend_from_within(alloc, initial_len..end); + } + } + } + + /// Appends each `char` from the iterator. + pub fn extend(&mut self, alloc: &'a dyn Allocator, iter: I) + where + I: IntoIterator, + { + let iterator = iter.into_iter(); + let (lower_bound, _) = iterator.size_hint(); + self.reserve(alloc, lower_bound); + iterator.for_each(move |c| self.push(alloc, c)); + } + + /// Replaces a range of characters with a new string. + pub fn replace_range>( + &mut self, + alloc: &'a dyn Allocator, + range: R, + replace_with: &str, + ) { + match range.start_bound() { + Bound::Included(&n) => assert!(self.is_char_boundary(n)), + Bound::Excluded(&n) => assert!(self.is_char_boundary(n + 1)), + Bound::Unbounded => {} + }; + match range.end_bound() { + Bound::Included(&n) => assert!(self.is_char_boundary(n + 1)), + Bound::Excluded(&n) => assert!(self.is_char_boundary(n)), + Bound::Unbounded => {} + }; + unsafe { self.as_mut_vec() }.replace_range(alloc, range, replace_with.as_bytes()); + } + + /// Finds `old` in the string and replaces it with `new`. + /// Only performs one replacement. + pub fn replace_once_in_place(&mut self, alloc: &'a dyn Allocator, old: &str, new: &str) { + if let Some(beg) = self.find(old) { + unsafe { self.as_mut_vec().replace_range(alloc, beg..beg + old.len(), new.as_bytes()) }; + } + } +} + +impl Default for BString<'_> { + fn default() -> Self { + Self::empty() + } +} + +impl Deref for BString<'_> { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + self.as_str() + } +} + +impl DerefMut for BString<'_> { + #[inline] + fn deref_mut(&mut self) -> &mut str { + self.as_mut_str() + } +} + +impl PartialEq> for BString<'_> { + #[inline] + fn eq(&self, other: &BString) -> bool { + self.as_str() == other.as_str() + } +} + +impl Eq for BString<'_> {} + +impl PartialEq<&str> for BString<'_> { + #[inline] + fn eq(&self, other: &&str) -> bool { + self.as_str() == *other + } +} + +impl PartialOrd for BString<'_> { + #[inline] + fn partial_cmp(&self, other: &BString) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for BString<'_> { + #[inline] + fn cmp(&self, other: &BString) -> std::cmp::Ordering { + self.as_str().cmp(other.as_str()) + } +} + +impl fmt::Debug for BString<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_str(), f) + } +} + +impl fmt::Display for BString<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.as_str(), f) + } +} + +/// Pairs a [`BString`] with an allocator so you can use `write!` on it. +// NOTE: This struct uses a generic allocator, because I found that it shrinks the binary by 3KB somehow. +// I never investigated why that is, or what the impact of that is, but it can't be good. +// It does kind of make sense though, since this struct is generally temporary only. +pub struct BStringFormatter<'s, 'a, A> { + string: &'s mut BString<'a>, + alloc: &'a A, +} + +impl fmt::Write for BStringFormatter<'_, '_, A> +where + A: Allocator, +{ + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + self.string.push_str(self.alloc, s); + Ok(()) + } + + #[inline] + fn write_char(&mut self, c: char) -> fmt::Result { + self.string.push(self.alloc, c); + Ok(()) + } +} diff --git a/crates/stdext/src/collections/vec.rs b/crates/stdext/src/collections/vec.rs new file mode 100644 index 000000000000..09e35153b237 --- /dev/null +++ b/crates/stdext/src/collections/vec.rs @@ -0,0 +1,616 @@ +use std::iter::FusedIterator; +use std::marker::PhantomData; +use std::mem::MaybeUninit; +use std::ops::{Bound, Deref, DerefMut, Range, RangeBounds}; +use std::ptr::{self, NonNull}; +use std::{fmt, slice}; + +use crate::alloc::Allocator; +#[cfg(debug_assertions)] +use crate::alloc::GlobalAllocator; +use crate::simd::memset; + +/// Similar to slices in Go, this slice has an additional capacity field. +/// It allows you to push more elements into the slice beyond its length, +/// up to the capacity. Like a `Vec` but on borrowed memory. +/// +/// # Safety +/// +/// The struct does not drop the elements, nor does it deallocate any memory. +pub struct BVec<'a, T> { + // NOTE: Only the first `len` elemennts are `T`, the rest are essentially `MaybeUninit`. + // This is an important distinction, due to Rust's highly nebulous rules around uninitialized memory. + // You should avoid `self.ptr.as_ptr().add(self.len)` and use `self.spare_mut_ptr()` instead. + ptr: NonNull, + len: usize, + cap: usize, + _marker: PhantomData<&'a T>, + #[cfg(debug_assertions)] + alloc: Option<&'a dyn Allocator>, +} + +impl<'a, T> BVec<'a, T> { + /// The label on the tin says "empty". You open it. It's empty. + #[inline] + pub const fn empty() -> Self { + Self { + ptr: NonNull::dangling(), + len: 0, + cap: 0, + _marker: PhantomData, + #[cfg(debug_assertions)] + alloc: None, + } + } + + pub fn from_slice(slice: &'a mut [T]) -> Self { + let slice = NonNull::from_mut(slice); + Self { + ptr: slice.cast(), + len: slice.len(), + cap: slice.len(), + _marker: PhantomData, + #[cfg(debug_assertions)] + alloc: None, + } + } + + /// Leaks a `Vec` and turns it into a "borrowed" `BVec`. + pub fn from_std_vec(vec: Vec) -> Self { + let (ptr, len, cap) = vec.into_raw_parts(); + // A `Vec` always has a non-null pointer (it's dangling). + let ptr = unsafe { NonNull::new_unchecked(ptr) }; + Self { + ptr, + len, + cap, + _marker: PhantomData, + #[cfg(debug_assertions)] + alloc: Some(&GlobalAllocator), + } + } + + /// Under the assumption that your `BVec` uses `GlobalAlloc`, + /// this turns it back into a standard `Vec`. + /// + /// It's not marked as `unsafe`, because people count the "unsafe" keyword as a measure of safety + /// the way managers count lines of code to measure productivity. So, by not marking it as unsafe, + /// I've effectively improved the security of this project. The "Real Men of Genius" ad plays in my head. + /// + /// In all seriousness though, there are debug runtime checks. That's sufficient for my purpose. + pub fn into_std_vec(self) -> Vec { + #[cfg(debug_assertions)] + debug_assert!( + self.alloc.is_none_or(|a| std::ptr::eq(a, &GlobalAllocator)), + "BVec can only be converted into Vec if it was allocated with GlobalAlloc" + ); + + unsafe { Vec::from_raw_parts(self.ptr.as_ptr(), self.len, self.cap) } + } + + /// Number of initialized elements. + #[inline] + pub fn len(&self) -> usize { + self.len + } + + /// Total number of elements the buffer can hold. + #[inline] + pub fn capacity(&self) -> usize { + self.cap + } + + /// True if there are zero elements. + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// True if if the buffer is full. + #[inline] + pub fn is_full(&self) -> bool { + self.len == self.cap + } + + /// Forcibly sets the length. + /// + /// # Safety + /// + /// The first `new_len` items must be initialized. + /// Items beyond `new_len` are not dropped when you call `set_len()`. + #[inline] + pub unsafe fn set_len(&mut self, new_len: usize) { + debug_assert!(new_len <= self.cap); + self.len = new_len; + } + + /// Shortens the vector. + pub fn truncate(&mut self, len: usize) { + unsafe { + // NOTE: It's intentional that this doesn't avoid drops when `len == self.len`, + // because that would introduce a branch for the common case of `truncate(0)`. + if let Some(r) = self.len.checked_sub(len) { + let s = ptr::slice_from_raw_parts_mut(self.as_mut_ptr().add(len), r); + self.len = len; + ptr::drop_in_place(s); + } + } + } + + /// Raw pointer to the backing buffer. + #[inline] + pub fn as_ptr(&self) -> *const T { + self.ptr.as_ptr() + } + + /// Mutable raw pointer to the backing buffer. + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + self.ptr.as_ptr() + } + + #[inline] + fn spare_mut_ptr(&mut self) -> *mut MaybeUninit { + unsafe { (self.ptr.as_ptr() as *mut MaybeUninit).add(self.len) } + } + + /// View as a shared slice. + #[inline] + pub fn as_slice(&self) -> &[T] { + self + } + + /// View as a mutable slice. + #[inline] + pub fn as_mut_slice(&mut self) -> &mut [T] { + self + } + + /// Consume the string, returning a `&mut [T]` that lives as long as the borrowed memory. + #[inline] + pub fn leak(self) -> &'a mut [T] { + unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) } + } + + /// Drops all elements and resets length to zero. The allocation is kept. + #[inline] + pub fn clear(&mut self) { + let elems = self.as_mut_slice() as *mut _; + self.len = 0; + unsafe { ptr::drop_in_place(elems) }; + } + + /// Ensures space for at least `additional` more elements, with amortized growth. + #[inline] + pub fn reserve(&mut self, alloc: &'a dyn Allocator, additional: usize) { + if additional > self.cap - self.len { + self.grow(alloc, self.cap, additional); + } + } + + /// Ensures space for at least `additional` more elements, without over-allocating. + #[inline] + pub fn reserve_exact(&mut self, alloc: &'a dyn Allocator, additional: usize) { + if additional > self.cap - self.len { + self.grow(alloc, 0, additional); + } + } + + #[inline] + fn reserve_one(&mut self, alloc: &'a dyn Allocator) { + if self.is_full() { + self.grow(alloc, self.cap, 1); + } + } + + // NOTE: I'm using dyn(amic dispatch) to avoid monomorphization bloat and more + // importantly because I counter-intuitively found it to boost performance by +20%. + #[cold] + fn grow(&mut self, alloc: &'a dyn Allocator, cap: usize, add: usize) { + debug_assert!(add > 0, "growing by zero makes no sense"); + + #[cfg(debug_assertions)] + debug_assert!( + self.alloc.is_none_or(|a| std::ptr::eq(a, alloc)), + "switching between allocators on a single BVec heavily suggests you're about to leak memory" + ); + + let new_cap = (cap * 2).max(self.len + add).max(8); + let new_ptr = unsafe { + alloc.realloc( + self.ptr.cast(), + self.cap * size_of::(), + new_cap * size_of::(), + align_of::(), + ) + }; + self.ptr = new_ptr.cast(); + self.cap = new_ptr.len() / size_of::(); + } + + /// Returns the uninitialized tail of the buffer. Fill it, then `set_len()`. + pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { + unsafe { slice::from_raw_parts_mut(self.spare_mut_ptr(), self.cap - self.len) } + } + + /// Appends one element, returning a mutable reference to it. + pub fn push(&mut self, alloc: &'a dyn Allocator, value: T) -> &mut T { + self.reserve_one(alloc); + + unsafe { + let dst = self.spare_mut_ptr(); + self.len += 1; + (*dst).write(value) + } + } + + /// Append the items from the iterator `iter`. + /// + /// By assuming that your "exact size iterator" returns an *exact* size, + /// it can preallocate the memory in one go and efficiently push items. + pub fn extend(&mut self, alloc: &'a dyn Allocator, iter: I) + where + I: IntoIterator + ExactSizeIterator, + { + let len = iter.len(); + self.reserve(alloc, len); + + unsafe { + let mut dst = self.spare_mut_ptr(); + self.len += len; + for value in iter { + (*dst).write(value); + dst = dst.add(1); + } + } + } + + /// This is the bad path of `extend()`. It has a distinct name, because it makes + /// it easy to find. If you use this method, you're not writing ideal code. + pub fn extend_sloppy(&mut self, alloc: &'a dyn Allocator, iter: I) + where + I: IntoIterator, + { + let iterator = iter.into_iter(); + let (lower_bound, _) = iterator.size_hint(); + self.reserve(alloc, lower_bound); + iterator.for_each(move |c| _ = self.push(alloc, c)); + } +} + +impl<'a, T: Copy> BVec<'a, T> { + /// Pushes `total_copies` copies of `value`. It's basically `memset`. + pub fn push_repeat(&mut self, alloc: &'a dyn Allocator, value: T, total_copies: usize) { + if total_copies == 0 { + return; + } + + self.reserve(alloc, total_copies); + + unsafe { + let dst = slice::from_raw_parts_mut(self.spare_mut_ptr(), total_copies); + self.len += total_copies; // Increment first, to turn memset() into a tail call + memset(dst, MaybeUninit::new(value)); + } + } + + /// Appends all elements from a slice. It's basically a `memcpy`-append. + #[allow(clippy::mut_from_ref)] + pub fn extend_from_slice(&mut self, alloc: &'a dyn Allocator, other: &[T]) { + let add = other.len(); + self.reserve(alloc, add); + + unsafe { + let dst = self.spare_mut_ptr(); + self.len += add; + ptr::copy_nonoverlapping(other.as_ptr() as *const _, dst, add); + } + } + + /// [`Self::extend_from_slice`] but for a subslice of the buffer itself. + #[inline] + pub fn extend_from_within(&mut self, alloc: &'a dyn Allocator, src: R) + where + R: RangeBounds, + { + let start = match src.start_bound() { + Bound::Included(&start) => start, + Bound::Excluded(start) => start + 1, + Bound::Unbounded => 0, + }; + let end = match src.end_bound() { + Bound::Included(end) => end + 1, + Bound::Excluded(&end) => end, + Bound::Unbounded => usize::MAX, + }; + self.extend_from_within_impl(alloc, start..end); + } + + fn extend_from_within_impl(&mut self, alloc: &'a dyn Allocator, src: Range) { + let end = src.end.min(self.len); + let beg = src.start.min(end); + let add = end - beg; + + self.reserve(alloc, add); + + unsafe { + let dst = self.spare_mut_ptr(); + let src = self.ptr.as_ptr().add(beg); + self.len += add; + ptr::copy_nonoverlapping(src as *const _, dst, add); + } + } + + /// Replaces the given range with elements from `src`. Efficient `splice` for `Copy` types. + #[inline] + pub fn replace_range(&mut self, alloc: &'a dyn Allocator, range: R, src: &[T]) + where + R: RangeBounds, + { + let start = match range.start_bound() { + Bound::Included(&start) => start, + Bound::Excluded(start) => start + 1, + Bound::Unbounded => 0, + }; + let end = match range.end_bound() { + Bound::Included(end) => end + 1, + Bound::Excluded(&end) => end, + Bound::Unbounded => usize::MAX, + }; + self.replace_range_impl(alloc, start..end, src); + } + + // At the time of writing, this implementation of what's + // essentially `Vec::splice` is vastly more efficient. + fn replace_range_impl(&mut self, alloc: &'a dyn Allocator, range: Range, src: &[T]) { + unsafe { + let dst_len = self.len(); + let src_len = src.len(); + let off = range.start.min(dst_len); + let del_len = range.end.saturating_sub(off).min(dst_len - off); + + if del_len == 0 && src_len == 0 { + return; // nothing to do + } + + let tail_len = dst_len - off - del_len; + let new_len = dst_len - del_len + src_len; + + if src_len > del_len { + self.reserve(alloc, src_len - del_len); + } + + // NOTE: drop_in_place() is not needed here, because T is constrained to Copy. + + // SAFETY: as_mut_ptr() must called after reserve() to ensure that the pointer is valid. + let ptr = self.as_mut_ptr().add(off); + + // Shift the tail. + if tail_len > 0 && src_len != del_len { + ptr::copy(ptr.add(del_len), ptr.add(src_len), tail_len); + } + + // Copy in the replacement. + ptr::copy_nonoverlapping(src.as_ptr(), ptr, src_len); + self.set_len(new_len); + } + } +} + +unsafe extern "system" { + fn MultiByteToWideChar( + CodePage: u32, + dwFlags: u32, + lpMultiByteStr: *const u8, + cbMultiByte: i32, + lpWideCharStr: *mut u16, + cchWideChar: i32, + ) -> i32; +} + +impl<'a> BVec<'a, u16> { + pub fn push_encode_utf16(&mut self, alloc: &'a dyn Allocator, utf8: &[u8]) { + unsafe { + self.reserve(alloc, utf8.len()); // worst case ASCII: 1 byte per char + let dst = self.spare_mut_ptr() as *mut u16; + let len = MultiByteToWideChar( + 65001, + 0, + utf8.as_ptr(), + utf8.len() as i32, + dst, + utf8.len() as i32, + ); + self.len += len.max(0) as usize; + } + } +} + +impl Default for BVec<'_, T> { + fn default() -> Self { + Self::empty() + } +} + +impl Deref for BVec<'_, T> { + type Target = [T]; + + #[inline] + fn deref(&self) -> &[T] { + unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) } + } +} + +impl DerefMut for BVec<'_, T> { + #[inline] + fn deref_mut(&mut self) -> &mut [T] { + unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) } + } +} + +impl PartialEq> for BVec<'_, T> +where + T: PartialEq, +{ + #[inline] + fn eq(&self, other: &BVec) -> bool { + self.deref() == other.deref() + } +} + +impl Eq for BVec<'_, T> where T: PartialEq {} + +impl PartialEq<[T]> for BVec<'_, T> +where + T: PartialEq, +{ + #[inline] + fn eq(&self, other: &[T]) -> bool { + self.deref() == other + } +} + +impl PartialOrd for BVec<'_, T> +where + T: PartialOrd, +{ + #[inline] + fn partial_cmp(&self, other: &BVec) -> Option { + self.deref().partial_cmp(other.deref()) + } +} + +impl Ord for BVec<'_, T> +where + T: Ord, +{ + #[inline] + fn cmp(&self, other: &BVec) -> std::cmp::Ordering { + self.deref().cmp(other.deref()) + } +} + +impl fmt::Debug for BVec<'_, T> +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.deref(), f) + } +} + +impl<'a, T> IntoIterator for BVec<'a, T> { + type Item = T; + type IntoIter = IntoIter<'a, T>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + unsafe { + let ptr = self.ptr; + let end = ptr.add(self.len); + IntoIter { ptr, end, phantom: PhantomData } + } + } +} + +impl<'a, T> IntoIterator for &'a BVec<'a, T> { + type Item = &'a T; + type IntoIter = slice::Iter<'a, T>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, T> IntoIterator for &'a mut BVec<'a, T> { + type Item = &'a mut T; + type IntoIter = slice::IterMut<'a, T>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +/// Owning iterator over the elements of a [`BVec`]. +pub struct IntoIter<'a, T> { + ptr: NonNull, + end: NonNull, + phantom: PhantomData<&'a T>, +} + +impl<'a, T> Iterator for IntoIter<'a, T> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + if self.ptr == self.end { + return None; + } + let ptr = self.ptr; + self.ptr = unsafe { ptr.add(1) }; + Some(unsafe { ptr.read() }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let len = self.len(); + (len, Some(len)) + } + + #[inline] + fn count(self) -> usize { + self.len() + } + + #[inline] + fn last(mut self) -> Option { + self.next_back() + } + + #[inline] + fn nth(&mut self, n: usize) -> Option { + if n >= self.len() { + self.ptr = self.end; + return None; + } + let ptr = self.ptr; + self.ptr = unsafe { ptr.add(n + 1) }; + Some(unsafe { ptr.read() }) + } + + fn fold(mut self, mut accum: B, mut f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + while self.ptr != self.end { + let ptr = self.ptr; + self.ptr = unsafe { ptr.add(1) }; + accum = f(accum, unsafe { self.ptr.read() }); + } + accum + } +} + +impl<'a, T> DoubleEndedIterator for IntoIter<'a, T> { + #[inline] + fn next_back(&mut self) -> Option { + if self.ptr == self.end { + return None; + } + unsafe { + self.end = self.end.sub(1); + Some(self.end.read()) + } + } +} + +impl<'a, T> ExactSizeIterator for IntoIter<'a, T> { + #[inline] + fn len(&self) -> usize { + unsafe { self.end.offset_from_unsigned(self.ptr) } + } +} + +impl<'a, T> FusedIterator for IntoIter<'a, T> {} diff --git a/crates/stdext/src/helpers.rs b/crates/stdext/src/helpers.rs index 556206ae6ed7..e8b7154f09ce 100644 --- a/crates/stdext/src/helpers.rs +++ b/crates/stdext/src/helpers.rs @@ -3,7 +3,6 @@ //! Random assortment of helpers I didn't know where to put. -use std::alloc::Allocator; use std::mem::{self, MaybeUninit}; use std::ops::{Bound, Range, RangeBounds}; use std::{fmt, ptr, slice, str}; @@ -89,7 +88,7 @@ pub trait ReplaceRange { fn replace_range>(&mut self, range: R, src: &[T]); } -impl ReplaceRange for Vec { +impl ReplaceRange for Vec { fn replace_range>(&mut self, range: R, src: &[T]) { let start = match range.start_bound() { Bound::Included(&start) => start, @@ -105,7 +104,7 @@ impl ReplaceRange for Vec { } } -fn vec_replace_impl(dst: &mut Vec, range: Range, src: &[T]) { +fn vec_replace_impl(dst: &mut Vec, range: Range, src: &[T]) { unsafe { let dst_len = dst.len(); let src_len = src.len(); diff --git a/crates/stdext/src/lib.rs b/crates/stdext/src/lib.rs index d7226d7e19c2..5a94e39612ba 100644 --- a/crates/stdext/src/lib.rs +++ b/crates/stdext/src/lib.rs @@ -3,10 +3,11 @@ //! Arena allocators. Small and fast. -#![feature(allocator_api)] - +pub mod alloc; pub mod arena; +pub mod collections; +mod helpers; +pub mod simd; pub mod sys; -mod helpers; pub use helpers::*; diff --git a/crates/edit/src/simd/memset.rs b/crates/stdext/src/simd/memset.rs similarity index 95% rename from crates/edit/src/simd/memset.rs rename to crates/stdext/src/simd/memset.rs index b05e3a0a4980..ab2adf564aed 100644 --- a/crates/edit/src/simd/memset.rs +++ b/crates/stdext/src/simd/memset.rs @@ -15,29 +15,9 @@ use std::mem; -/// A marker trait for types that are safe to `memset`. -/// -/// # Safety -/// -/// Just like with C's `memset`, bad things happen -/// if you use this with non-trivial types. -pub unsafe trait MemsetSafe: Copy {} - -unsafe impl MemsetSafe for u8 {} -unsafe impl MemsetSafe for u16 {} -unsafe impl MemsetSafe for u32 {} -unsafe impl MemsetSafe for u64 {} -unsafe impl MemsetSafe for usize {} - -unsafe impl MemsetSafe for i8 {} -unsafe impl MemsetSafe for i16 {} -unsafe impl MemsetSafe for i32 {} -unsafe impl MemsetSafe for i64 {} -unsafe impl MemsetSafe for isize {} - /// Fills a slice with the given value. -#[inline] -pub fn memset(dst: &mut [T], val: T) { +#[inline(always)] +pub fn memset(dst: &mut [T], val: T) { unsafe { match mem::size_of::() { 1 => { @@ -65,12 +45,12 @@ pub fn memset(dst: &mut [T], val: T) { let val = mem::transmute_copy::<_, u64>(&val); memset_raw(beg as *mut u8, end as *mut u8, val); } - _ => unreachable!(), + _ => dst.fill(val), } } } -#[inline] +#[inline(always)] fn memset_raw(beg: *mut u8, end: *mut u8, val: u64) { #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "loongarch64"))] return unsafe { MEMSET_DISPATCH(beg, end, val) }; @@ -412,7 +392,7 @@ mod tests { fn check_memset(val: T, len: usize) where - T: MemsetSafe + Not + PartialEq + fmt::Debug, + T: Copy + Not + PartialEq + fmt::Debug, { let mut buf = vec![!val; len]; memset(&mut buf, val); diff --git a/crates/stdext/src/simd/mod.rs b/crates/stdext/src/simd/mod.rs new file mode 100644 index 000000000000..86b2d165468d --- /dev/null +++ b/crates/stdext/src/simd/mod.rs @@ -0,0 +1,3 @@ +mod memset; + +pub use memset::*;