From ac2dc137803523bca64faa41bed5c1b326a12ac7 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden Date: Sat, 28 Dec 2024 15:48:32 +0100 Subject: [PATCH 1/2] Improve dependencies --- .github/workflows/rust.yml | 6 +-- Cargo.toml | 40 +++++++++++------- src/cardinality/mod.rs | 8 ++-- src/fast_automaton/convert/to_regex/mod.rs | 48 +++++++++++++++------- src/fast_automaton/mod.rs | 2 +- src/fast_automaton/serializer.rs | 3 +- src/fast_automaton/spanning_set/mod.rs | 4 +- src/lib.rs | 15 ++++--- src/regex/mod.rs | 1 + 9 files changed, 83 insertions(+), 44 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index a7b762f..7730802 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -17,8 +17,8 @@ jobs: steps: - uses: actions/checkout@v4 - name: Build - run: cargo build --verbose + run: cargo build --all-features --verbose - name: Test & Lint run: | - cargo test - cargo clippy + cargo test --all-features + cargo clippy --all-features diff --git a/Cargo.toml b/Cargo.toml index 79e2280..cd03087 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regexsolver" -version = "0.3.0" +version = "0.3.1" edition = "2021" authors = ["Alexandre van Beurden"] repository = "https://github.com/RegexSolver/regexsolver" @@ -10,28 +10,40 @@ description = "Manipulate regex and automaton as if they were sets." readme = "README.md" [dependencies] -env_logger = "0.11.3" -serde = "1.0.197" -serde_derive = "1.0.197" -serde_json = "1.0.114" -ciborium = "0.2.2" -z85 = "3.0.5" -aes-gcm-siv = "0.11.1" -sha2 = "0.10.8" +serde = { version = "1.0", features = ["derive"], optional = true } +ciborium = { version = "0.2.2", optional = true } +z85 = { version = "3.0.5", optional = true } +aes-gcm-siv = { version = "0.11.1", optional = true } +sha2 = { version = "0.10.8", optional = true } +flate2 = { version = "1.0.30", features = [ + "zlib-ng", +], default-features = false, optional = true } nohash-hasher = "0.2" ahash = "0.8.11" -regex-syntax = "0.8.5" log = "0.4.21" rand = "0.8.5" lazy_static = "1.4.0" -flate2 = { version = "1.0.30", features = [ - "zlib-ng", -], default-features = false } regex = "1.10.3" -regex-charclass = { version = "1.0.3", features = ["serde"] } +regex-syntax = "0.8.5" +regex-charclass = { version = "1.0.3" } [dev-dependencies] criterion = { version = "0.5", features = ["html_reports"] } +env_logger = "0.11.3" +serde_json = "1.0.114" + + +[features] +default = ["serde"] +serde = [ + "regex-charclass/serde", + "dep:serde", + "dep:ciborium", + "dep:z85", + "dep:aes-gcm-siv", + "dep:sha2", + "dep:flate2", +] [[bench]] name = "my_benchmark" diff --git a/src/cardinality/mod.rs b/src/cardinality/mod.rs index 30253b1..08131e0 100644 --- a/src/cardinality/mod.rs +++ b/src/cardinality/mod.rs @@ -1,8 +1,10 @@ -use serde_derive::{Deserialize, Serialize}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; /// Represent a number. -#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type", content = "value")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(PartialEq, Eq, Debug, Clone)] +#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))] pub enum Cardinality { /// An infinite number. Infinite, diff --git a/src/fast_automaton/convert/to_regex/mod.rs b/src/fast_automaton/convert/to_regex/mod.rs index d515a95..4074839 100644 --- a/src/fast_automaton/convert/to_regex/mod.rs +++ b/src/fast_automaton/convert/to_regex/mod.rs @@ -4,6 +4,7 @@ use std::{ }; use ahash::{HashMapExt, HashSetExt}; +use log::warn; use nohash_hasher::IntMap; use crate::{error::EngineError, execution_profile::ThreadLocalParams, regex::RegularExpression}; @@ -258,28 +259,20 @@ impl FastAutomaton { Ok(automaton) => match self.is_equivalent_of(&automaton) { Ok(result) => { if !result { - /*println!( - "The automaton is not equivalent to the generated regex; automaton={} regex={}", - serde_json::to_string(self).unwrap(), - regex - );*/ + warn!("The automaton is not equivalent to the generated regex; automaton={}, regex={}", self, regex); None } else { Some(regex) } } - Err(_) => { - //println!("{err}"); + Err(err) => { + warn!("Engine error while checking for equivalence ({}); automaton={}, regex={}", err, self, regex); None } }, Err(err) => { - if let crate::error::EngineError::RegexSyntaxError(_) = err { - /*error!( - "The generated regex can not be converted to automaton to be checked for equivalence (Syntax Error); automaton={} regex={}", - serde_json::to_string(self).unwrap(), - regex - );*/ + if let crate::error::EngineError::RegexSyntaxError(err) = err { + warn!("The generated regex cannot be converted to automaton to be checked for equivalence ({}); automaton={}, regex={}", err, self, regex); } None } @@ -422,8 +415,33 @@ mod tests { Ok(()) } - /*#[test] + #[test] fn test_convert_after_operation_4() -> Result<(), String> { + let automaton1 = RegularExpression::new(".*abc.*") + .unwrap() + .to_automaton() + .unwrap(); + let automaton2 = RegularExpression::new(".*def.*") + .unwrap() + .to_automaton() + .unwrap(); + + let result = automaton1.intersection(&automaton2).unwrap(); + + let result = result.to_regex().unwrap(); + + assert_eq!(".*(abc.*def|def.*abc).*", result.to_string()); + + Ok(()) + } + + #[test] + fn test_convert_after_operation_5() -> Result<(), String> { + if std::env::var_os("RUST_LOG").is_none() { + std::env::set_var("RUST_LOG", "regexsolver=debug"); + } + env_logger::init(); + let automaton1 = RegularExpression::new(".*abc.*") .unwrap() .to_automaton() @@ -443,5 +461,5 @@ mod tests { assert_eq!("(x{3})*x{1,2}", result.to_string()); Ok(()) - }*/ + } } diff --git a/src/fast_automaton/mod.rs b/src/fast_automaton/mod.rs index 7dc8dcf..6d6fcbc 100644 --- a/src/fast_automaton/mod.rs +++ b/src/fast_automaton/mod.rs @@ -7,7 +7,6 @@ use std::collections::hash_map::Entry; use std::collections::VecDeque; use std::fmt::Display; -use crate::tokenizer::Tokenizer; use crate::{IntMap, IntSet}; pub(crate) type State = usize; @@ -19,6 +18,7 @@ pub mod condition; mod convert; mod generate; mod operation; +#[cfg(feature = "serde")] mod serializer; pub mod spanning_set; diff --git a/src/fast_automaton/serializer.rs b/src/fast_automaton/serializer.rs index a2975ac..017341b 100644 --- a/src/fast_automaton/serializer.rs +++ b/src/fast_automaton/serializer.rs @@ -2,9 +2,10 @@ use super::*; use lazy_static::lazy_static; use rand::Rng; use serde::{de, ser, Deserializer, Serializer}; -use serde_derive::{Deserialize, Serialize}; +use serde::{Deserialize, Serialize}; use std::env; use z85::{decode, encode}; +use crate::tokenizer::Tokenizer; use sha2::{Digest, Sha256}; diff --git a/src/fast_automaton/spanning_set/mod.rs b/src/fast_automaton/spanning_set/mod.rs index f3e0dff..2aa2780 100644 --- a/src/fast_automaton/spanning_set/mod.rs +++ b/src/fast_automaton/spanning_set/mod.rs @@ -2,10 +2,12 @@ use std::slice::Iter; use ahash::AHashSet; use regex_charclass::{char::Char, irange::RangeSet}; +#[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; /// Contains a set of [`RangeSet`] that span all the transition of a [`crate::FastAutomaton`]. -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct SpanningSet(Vec>, RangeSet); impl SpanningSet { diff --git a/src/lib.rs b/src/lib.rs index 9f21c03..91493c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ use fast_automaton::FastAutomaton; use nohash_hasher::NoHashHasher; use regex::RegularExpression; use regex_charclass::{char::Char, irange::RangeSet}; +#[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; pub mod cardinality; @@ -27,12 +28,13 @@ type Range = RangeSet; /// Represents a term that can be either a regular expression or a finite automaton. This term can be manipulated with a wide range of operations. /// /// To put constraint and limitation on the execution of operations please refer to [`execution_profile::ExecutionProfile`]. -#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)] -#[serde(tag = "type", content = "value")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Clone, PartialEq, Eq, Debug)] +#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))] pub enum Term { - #[serde(rename = "regex")] + #[cfg_attr(feature = "serde", serde(rename = "regex"))] RegularExpression(RegularExpression), - #[serde(rename = "fair")] + #[cfg_attr(feature = "serde", serde(rename = "fair"))] Automaton(FastAutomaton), } @@ -319,8 +321,9 @@ impl Term { } /// Represents details about a [Term]. -#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)] -#[serde(tag = "type", rename = "details")] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Clone, PartialEq, Eq, Debug)] +#[cfg_attr(feature = "serde", serde(tag = "type", rename = "details"))] pub struct Details { cardinality: Option>, length: (Option, Option), diff --git a/src/regex/mod.rs b/src/regex/mod.rs index 7f8c59e..176612f 100644 --- a/src/regex/mod.rs +++ b/src/regex/mod.rs @@ -12,6 +12,7 @@ use super::*; mod analyze; mod builder; mod operation; +#[cfg(feature = "serde")] mod serializer; /// Represent a regular expression. From a66f4add02133699912a567c10e7b66ab91d9437 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden Date: Sat, 28 Dec 2024 16:19:19 +0100 Subject: [PATCH 2/2] remove temp test --- src/fast_automaton/convert/to_regex/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fast_automaton/convert/to_regex/mod.rs b/src/fast_automaton/convert/to_regex/mod.rs index 4074839..d9a1dd0 100644 --- a/src/fast_automaton/convert/to_regex/mod.rs +++ b/src/fast_automaton/convert/to_regex/mod.rs @@ -435,7 +435,7 @@ mod tests { Ok(()) } - #[test] + /*#[test] fn test_convert_after_operation_5() -> Result<(), String> { if std::env::var_os("RUST_LOG").is_none() { std::env::set_var("RUST_LOG", "regexsolver=debug"); @@ -461,5 +461,5 @@ mod tests { assert_eq!("(x{3})*x{1,2}", result.to_string()); Ok(()) - } + }*/ }