Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ build-bin profile="dev": (build-lib profile)
build-lib profile="dev":
cargo build --package rsonpath-lib --profile {{profile}}

build-avx512 profile="dev":
rustup run nightly cargo build --package rsonpath-lib --profile dev

# Build all rsonpath parts, the binary and library.
build-all profile="dev": (build-lib profile) (build-bin profile) (gen-tests)

Expand Down
1 change: 1 addition & 0 deletions crates/rsonpath-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ default = ["simd"]
arbitrary = ["dep:arbitrary"]
simd = []


[[example]]
name = "approx_spans_usage"
path = "examples/approx_spans_usage.rs"
Expand Down
5 changes: 5 additions & 0 deletions crates/rsonpath-lib/src/classification/depth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,15 @@ pub(crate) mod shared;
pub(crate) mod avx2_32;
#[cfg(target_arch = "x86_64")]
pub(crate) mod avx2_64;
#[cfg(target_arch = "x86_64")]
pub(crate) mod avx512_64;
#[cfg(target_arch = "x86")]
pub(crate) mod sse2_32;
#[cfg(target_arch = "x86_64")]
pub(crate) mod sse2_64;
#[cfg(target_arch = "aarch64")]
pub(crate) mod neon_64;


pub(crate) trait DepthImpl {
type Classifier<'i, I, Q>: DepthIterator<'i, I, Q, MaskType, BLOCK_SIZE>
Expand Down
56 changes: 56 additions & 0 deletions crates/rsonpath-lib/src/classification/depth/avx512_64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use super::{
shared::{mask_64::DepthVector64, vector_512::DelimiterClassifierImpl512},
*,
};
use crate::{
classification::{QuoteClassifiedBlock, ResumeClassifierBlockState},
debug,
input::InputBlock,
};
use std::marker::PhantomData;

const SIZE: usize = 64;

shared::depth_classifier!(Avx512VectorIterator64, DelimiterClassifierImpl512, DepthVector64, 64, u64);

#[inline(always)]
fn new_vector<'a, B: InputBlock<'a, SIZE>>(
bytes: QuoteClassifiedBlock<B, u64, SIZE>,
classifier: &DelimiterClassifierImpl512,
) -> DepthVector64<'a, B> {
new_vector_from(bytes, classifier, 0)
}

#[inline(always)]
fn new_vector_from<'a, B: InputBlock<'a, SIZE>>(
bytes: QuoteClassifiedBlock<B, u64, SIZE>,
classifier: &DelimiterClassifierImpl512,
idx: usize,
) -> DepthVector64<'a, B> {
// SAFETY: target_feature invariant
unsafe { new_avx512(bytes, classifier, idx) }
}

#[inline(always)]
unsafe fn new_avx512<'a, B: InputBlock<'a, SIZE>>(
bytes: QuoteClassifiedBlock<B, u64, SIZE>,
classifier: &DelimiterClassifierImpl512,
start_idx: usize,
) -> DepthVector64<'a, B> {
let idx_mask = 0xFFFF_FFFF_FFFF_FFFF_u64 << start_idx;
let block = &bytes.block;
let (opening_mask, closing_mask) = classifier.get_opening_and_closing_masks(block);

let opening_mask = opening_mask & (!bytes.within_quotes_mask) & idx_mask;
let closing_mask = closing_mask & (!bytes.within_quotes_mask) & idx_mask;

DepthVector64 {
quote_classified: bytes,
opening_mask,
closing_mask,
opening_count: opening_mask.count_ones(),
depth: 0,
idx: 0,
phantom: PhantomData,
}
}
62 changes: 62 additions & 0 deletions crates/rsonpath-lib/src/classification/depth/neon_64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use super::{
shared::{mask_neon::DepthVectorNeon, vector_neon::DelimiterClassifierImplNeon},
*,
};
use crate::{
classification::{mask::m64, QuoteClassifiedBlock, ResumeClassifierBlockState},
debug,
input::InputBlock,
};
use std::marker::PhantomData;

const SIZE: usize = 64;

shared::depth_classifier!(NeonVectorIterator, DelimiterClassifierImplNeon, DepthVectorNeon, 64, u64);

#[inline(always)]
fn new_vector<'a, B: InputBlock<'a, SIZE>>(
bytes: QuoteClassifiedBlock<B, u64, SIZE>,
classifier: &DelimiterClassifierImplNeon,
) -> DepthVectorNeon<'a, B> {
new_vector_from(bytes, classifier, 0)
}

#[inline(always)]
fn new_vector_from<'a, B: InputBlock<'a, SIZE>>(
bytes: QuoteClassifiedBlock<B, u64, SIZE>,
classifier: &DelimiterClassifierImplNeon,
idx: usize,
) -> DepthVectorNeon<'a, B> {
// SAFETY: target_feature invariant
unsafe { new_neon(bytes, classifier, idx) }
}

#[inline(always)]
unsafe fn new_neon<'a, B: InputBlock<'a, SIZE>>(
bytes: QuoteClassifiedBlock<B, u64, SIZE>,
classifier: &DelimiterClassifierImplNeon,
start_idx: usize,
) -> DepthVectorNeon<'a, B> {
let idx_mask = 0xFFFF_FFFF_FFFF_FFFF_u64 << start_idx;
let (block1, block2, block3, block4) = bytes.block.quarters();
let (opening_mask1, closing_mask1) = classifier.get_opening_and_closing_masks(block1);
let (opening_mask2, closing_mask2) = classifier.get_opening_and_closing_masks(block2);
let (opening_mask3, closing_mask3) = classifier.get_opening_and_closing_masks(block3);
let (opening_mask4, closing_mask4) = classifier.get_opening_and_closing_masks(block4);

let combined_opening_mask = m64::combine_16(opening_mask1, opening_mask2, opening_mask3, opening_mask4);
let combined_closing_mask = m64::combine_16(closing_mask1, closing_mask2, closing_mask3, closing_mask4);

let opening_mask = combined_opening_mask & (!bytes.within_quotes_mask) & idx_mask;
let closing_mask = combined_closing_mask & (!bytes.within_quotes_mask) & idx_mask;

DepthVectorNeon {
quote_classified: bytes,
opening_mask,
closing_mask,
opening_count: opening_mask.count_ones(),
depth: 0,
idx: 0,
phantom: PhantomData,
}
}
6 changes: 6 additions & 0 deletions crates/rsonpath-lib/src/classification/depth/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@
pub(super) mod mask_32;
#[cfg(target_arch = "x86_64")]
pub(super) mod mask_64;
#[cfg(target_arch = "aarch64")]
pub(super) mod mask_neon;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(super) mod vector_128;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(super) mod vector_256;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(super) mod vector_512;
#[cfg(target_arch = "aarch64")]
pub(super) mod vector_neon;

#[allow(unused_macros)]
macro_rules! depth_classifier {
Expand Down
84 changes: 84 additions & 0 deletions crates/rsonpath-lib/src/classification/depth/shared/mask_neon.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use crate::{
bin_u64,
classification::{depth::DepthBlock, quotes::QuoteClassifiedBlock},
debug,
input::InputBlock,
};
use std::marker::PhantomData;

const SIZE: usize = 64;

/// Works on a 64-byte slice, but uses a heuristic to quickly
/// respond to queries and not count the depth exactly unless
/// needed.
///
/// The heuristic checks if it is possible to achieve the queried
/// depth within the block by counting the number of opening
/// and closing structural characters. This can be done much
/// more quickly than precise depth calculation.
pub(crate) struct DepthVectorNeon<'a, B: InputBlock<'a, SIZE>> {
pub(crate) quote_classified: QuoteClassifiedBlock<B, u64, SIZE>,
pub(crate) opening_mask: u64,
pub(crate) opening_count: u32,
pub(crate) closing_mask: u64,
pub(crate) idx: usize,
pub(crate) depth: i32,
pub(crate) phantom: PhantomData<&'a ()>,
}

// TODO FIXME: consider rewriting training and count_zeros etc. functions.

impl<'a, B: InputBlock<'a, SIZE>> DepthBlock<'a> for DepthVectorNeon<'a, B> {
#[inline(always)]
fn advance_to_next_depth_decrease(&mut self) -> bool {
let next_closing = self.closing_mask.trailing_zeros() as usize;

if next_closing == SIZE {
return false;
}

bin_u64!("opening_mask", self.opening_mask);
bin_u64!("closing_mask", self.closing_mask);

self.opening_mask >>= next_closing;
self.closing_mask >>= next_closing;
self.opening_mask >>= 1;
self.closing_mask >>= 1;

bin_u64!("new opening_mask", self.opening_mask);
bin_u64!("new closing_mask", self.closing_mask);

let new_opening_count = self.opening_mask.count_ones() as i32;
let delta = (self.opening_count as i32) - new_opening_count - 1;
self.opening_count = new_opening_count as u32;

debug!("next_closing: {next_closing}");
debug!("new_opening_count: {new_opening_count}");
debug!("delta: {delta}");

self.depth += delta;
self.idx += next_closing + 1;

true
}

#[inline(always)]
fn get_depth(&self) -> isize {
self.depth as isize
}

#[inline(always)]
fn depth_at_end(&self) -> isize {
(((self.opening_count as i32) - self.closing_mask.count_ones() as i32) + self.depth) as isize
}

#[inline(always)]
fn add_depth(&mut self, depth: isize) {
self.depth += depth as i32;
}

#[inline(always)]
fn estimate_lowest_possible_depth(&self) -> isize {
(self.depth - self.closing_mask.count_ones() as i32) as isize
}
}
46 changes: 46 additions & 0 deletions crates/rsonpath-lib/src/classification/depth/shared/vector_512.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use crate::classification::structural::BracketType;

#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;

pub(crate) struct DelimiterClassifierImpl512 {
opening: i8,
}

impl DelimiterClassifierImpl512 {
pub(crate) fn new(opening: BracketType) -> Self {
let opening = match opening {
BracketType::Square => b'[',
BracketType::Curly => b'{',
};

Self { opening: opening as i8 }
}

#[inline(always)]
unsafe fn opening_mask(&self) -> __m512i {
_mm512_set1_epi8(self.opening)
}

#[inline(always)]
unsafe fn closing_mask(&self) -> __m512i {
_mm512_set1_epi8(self.opening + 2)
}

#[target_feature(enable = "avx512f")]
#[target_feature(enable = "avx512bw")]
#[inline]
pub(crate) unsafe fn get_opening_and_closing_masks(&self, bytes: &[u8]) -> (u64, u64) {
assert_eq!(64, bytes.len());
// SAFETY: target_feature invariant
unsafe {
let byte_vector = _mm512_loadu_si512(bytes.as_ptr().cast::<i32>());
let opening_mask = _mm512_cmpeq_epi8_mask(byte_vector, self.opening_mask());
let closing_mask = _mm512_cmpeq_epi8_mask(byte_vector, self.closing_mask());

(opening_mask, closing_mask)
}
}
}
Loading