From 959fe506c90dbab0fcc9504ee2c95d0fe23401a3 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden Date: Fri, 1 Nov 2024 15:29:58 +0100 Subject: [PATCH] Add condition get_hot_bits --- Cargo.toml | 2 +- .../condition/fast_bit_vec/mod.rs | 13 ++++++- src/fast_automaton/condition/mod.rs | 24 +++++++++++- src/fast_automaton/convert/to_regex/mod.rs | 37 +++++++++++++++---- .../convert/to_regex/transform.rs | 1 + 5 files changed, 66 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 495b6f3..1bbe48e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regexsolver" -version = "0.2.1" +version = "0.2.2" edition = "2021" authors = ["Alexandre van Beurden"] repository = "https://github.com/RegexSolver/regexsolver" diff --git a/src/fast_automaton/condition/fast_bit_vec/mod.rs b/src/fast_automaton/condition/fast_bit_vec/mod.rs index 5769b80..dd23995 100644 --- a/src/fast_automaton/condition/fast_bit_vec/mod.rs +++ b/src/fast_automaton/condition/fast_bit_vec/mod.rs @@ -6,8 +6,9 @@ pub struct FastBitVec { impl std::fmt::Display for FastBitVec { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - for a in &self.bits { - write!(f, "{:b}", a)?; + for i in 0..self.n { + let bit = if self.get(i).unwrap() { 1 } else { 0 }; + write!(f, "{}", bit)?; } Ok(()) } @@ -121,4 +122,12 @@ impl FastBitVec { fn mask_for_bits(bits: usize) -> u64 { (!0) >> ((64 - bits % 64) % 64) } + + pub fn get_hot_bits(&self) -> Vec { + let mut hot_bits = Vec::with_capacity(self.n); + for i in 0..self.n { + hot_bits.push(self.get(i).unwrap()); + } + hot_bits + } } diff --git a/src/fast_automaton/condition/mod.rs b/src/fast_automaton/condition/mod.rs index ab97355..000f53e 100644 --- a/src/fast_automaton/condition/mod.rs +++ b/src/fast_automaton/condition/mod.rs @@ -154,6 +154,10 @@ impl Condition { pub fn get_cardinality(&self, spanning_set: &SpanningSet) -> Result { Ok(self.to_range(spanning_set)?.get_cardinality()) } + + pub fn get_hot_bits(&self) -> Vec { + self.0.get_hot_bits() + } } #[cfg(test)] @@ -190,10 +194,19 @@ mod tests { fn test_empty_total() -> Result<(), String> { let spanning_set = get_spanning_set(); let empty = Condition::empty(&spanning_set); + //println!("{empty}"); assert!(empty.is_empty()); + assert_eq!( + vec![false, false, false, false], + empty.get_hot_bits() + ); let total = Condition::total(&spanning_set); - println!("{total}"); + //println!("{total}"); assert!(total.is_total()); + assert_eq!( + vec![true, true, true, true], + total.get_hot_bits() + ); assert_eq!(Range::empty(), empty.to_range(&spanning_set).unwrap()); assert_eq!(Range::total(), total.to_range(&spanning_set).unwrap()); @@ -221,10 +234,19 @@ mod tests { empty, Condition::from_range(&Range::empty(), &spanning_set).unwrap() ); + assert_eq!( + vec![false], + empty.get_hot_bits() + ); + assert_eq!( total, Condition::from_range(&Range::total(), &spanning_set).unwrap() ); + assert_eq!( + vec![true], + total.get_hot_bits() + ); assert_eq!(empty, total.complement()); assert_eq!(total, empty.complement()); diff --git a/src/fast_automaton/convert/to_regex/mod.rs b/src/fast_automaton/convert/to_regex/mod.rs index 75520af..d515a95 100644 --- a/src/fast_automaton/convert/to_regex/mod.rs +++ b/src/fast_automaton/convert/to_regex/mod.rs @@ -4,7 +4,6 @@ use std::{ }; use ahash::{HashMapExt, HashSetExt}; -use log::error; use nohash_hasher::IntMap; use crate::{error::EngineError, execution_profile::ThreadLocalParams, regex::RegularExpression}; @@ -53,6 +52,7 @@ impl Display for StateEliminationAutomaton { impl StateEliminationAutomaton { //#[cfg(test)] + #[allow(dead_code)] #[inline] pub fn to_dot(&self) { println!("{}", self); @@ -258,28 +258,28 @@ impl FastAutomaton { Ok(automaton) => match self.is_equivalent_of(&automaton) { Ok(result) => { if !result { - println!( + /*println!( "The automaton is not equivalent to the generated regex; automaton={} regex={}", serde_json::to_string(self).unwrap(), regex - ); + );*/ None } else { Some(regex) } } - Err(err) => { - println!("{err}"); + Err(_) => { + //println!("{err}"); None } }, Err(err) => { if let crate::error::EngineError::RegexSyntaxError(_) = err { - error!( + /*error!( "The generated regex can not be converted to automaton to be checked for equivalence (Syntax Error); automaton={} regex={}", serde_json::to_string(self).unwrap(), regex - ); + );*/ } None } @@ -421,4 +421,27 @@ mod tests { Ok(()) } + + /*#[test] + fn test_convert_after_operation_4() -> Result<(), String> { + let automaton1 = RegularExpression::new(".*abc.*") + .unwrap() + .to_automaton() + .unwrap(); + let automaton2 = RegularExpression::new(".*def.*") + .unwrap() + .to_automaton() + .unwrap() + .determinize() + .unwrap(); + + let result = automaton1.subtraction(&automaton2).unwrap(); + result.to_dot(); + + let result = result.to_regex().unwrap(); + + assert_eq!("(x{3})*x{1,2}", result.to_string()); + + Ok(()) + }*/ } diff --git a/src/fast_automaton/convert/to_regex/transform.rs b/src/fast_automaton/convert/to_regex/transform.rs index 4fe9654..9fb2fd6 100644 --- a/src/fast_automaton/convert/to_regex/transform.rs +++ b/src/fast_automaton/convert/to_regex/transform.rs @@ -77,6 +77,7 @@ impl StateEliminationAutomaton { if self.get_number_of_states() < 2 { return Ok(None); } + //self.to_dot(); let mut dot_value = if let Some(dot_value) = self.get_transition(self.start_state, self.start_state) { if let Some(dot_value) = dot_value.get_weight() {