From dc7362dfcdb2be418fedbf5418706152a68fb574 Mon Sep 17 00:00:00 2001 From: Artur Gontijo Date: Sun, 25 Jan 2026 18:28:27 -0300 Subject: [PATCH 1/6] Implent InMemoryDB. --- src/atomdb/BUILD | 1 + src/atomdb/inmemorydb/BUILD | 35 + src/atomdb/inmemorydb/InMemoryDB.cc | 735 ++++++++++++++++++++ src/atomdb/inmemorydb/InMemoryDB.h | 112 +++ src/atomdb/inmemorydb/InmemoryDBAPITypes.cc | 112 +++ src/atomdb/inmemorydb/InmemoryDBAPITypes.h | 69 ++ src/tests/cpp/BUILD | 16 + src/tests/cpp/inmemorydb_test.cc | 271 ++++++++ 8 files changed, 1351 insertions(+) create mode 100644 src/atomdb/inmemorydb/BUILD create mode 100644 src/atomdb/inmemorydb/InMemoryDB.cc create mode 100644 src/atomdb/inmemorydb/InMemoryDB.h create mode 100644 src/atomdb/inmemorydb/InmemoryDBAPITypes.cc create mode 100644 src/atomdb/inmemorydb/InmemoryDBAPITypes.h create mode 100644 src/tests/cpp/inmemorydb_test.cc diff --git a/src/atomdb/BUILD b/src/atomdb/BUILD index 93cb0fa5b..ede7cfff9 100644 --- a/src/atomdb/BUILD +++ b/src/atomdb/BUILD @@ -11,6 +11,7 @@ cc_library( ":atomdb_cache", ":atomdb_cache_singleton", ":atomdb_singleton", + "//atomdb/inmemorydb:inmemorydb_lib", "//atomdb/morkdb:morkdb_lib", "//atomdb/redis_mongodb:redis_mongodb_lib", ], diff --git a/src/atomdb/inmemorydb/BUILD b/src/atomdb/inmemorydb/BUILD new file mode 100644 index 000000000..c0b1415e2 --- /dev/null +++ b/src/atomdb/inmemorydb/BUILD @@ -0,0 +1,35 @@ +load("@rules_cc//cc:cc_library.bzl", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "inmemorydb_lib", + includes = ["."], + deps = [ + ":inmemorydb", + ":inmemorydb_api_types", + ], +) + +cc_library( + name = "inmemorydb_api_types", + srcs = ["InMemoryDBAPITypes.cc"], + hdrs = ["InMemoryDBAPITypes.h"], + includes = ["."], + deps = [ + "//atomdb:atomdb_api_types", + ], +) + +cc_library( + name = "inmemorydb", + srcs = ["InMemoryDB.cc"], + hdrs = ["InMemoryDB.h"], + includes = ["."], + deps = [ + ":inmemorydb_api_types", + "//atomdb", + "//commons:commons_lib", + "//commons/atoms:atoms_lib", + ], +) diff --git a/src/atomdb/inmemorydb/InMemoryDB.cc b/src/atomdb/inmemorydb/InMemoryDB.cc new file mode 100644 index 000000000..0861abac8 --- /dev/null +++ b/src/atomdb/inmemorydb/InMemoryDB.cc @@ -0,0 +1,735 @@ +#include "InMemoryDB.h" + +#include +#include + +#include "Hasher.h" +#include "InmemoryDBAPITypes.h" +#include "Link.h" +#include "LinkSchema.h" +#include "Node.h" +#include "Utils.h" + +#define LOG_LEVEL INFO_LEVEL +#include "Logger.h" + +using namespace atomdb; +using namespace atomdb_api_types; +using namespace atoms; +using namespace commons; + +// Helper class to wrap Atom in HandleTrie +class AtomTrieValue : public HandleTrie::TrieValue { + public: + AtomTrieValue(Atom* atom) : atom_(atom) {} + ~AtomTrieValue() override { delete atom_; } + void merge(HandleTrie::TrieValue* other) override { + // For now, just replace (could be enhanced later) + delete atom_; + atom_ = dynamic_cast(other)->atom_; + dynamic_cast(other)->atom_ = nullptr; // Prevent double delete + } + Atom* get_atom() { return atom_; } + + private: + Atom* atom_; +}; + +// Helper functions and data structures for traverse callbacks +namespace { +struct QueryPatternData { + InMemoryDB* db; + LinkSchema* link_schema; + HandleSetInMemory* handle_set; +}; + +struct ReIndexData { + InMemoryDB* db; +}; + +struct ClearValueData { + string handle; + bool found; +}; + +bool re_index_visitor(HandleTrie::TrieNode* node, void* data) { + ReIndexData* index_data = static_cast(data); + if (node->value != nullptr) { + auto atom_trie_value = dynamic_cast(node->value); + if (atom_trie_value != nullptr) { + Atom* atom = atom_trie_value->get_atom(); + if (Atom::is_link(*atom)) { + Link* link = dynamic_cast(atom); + string link_handle = link->handle(); + // Index patterns + auto pattern_handles = index_data->db->match_pattern_index_schema(link); + for (const auto& pattern_handle : pattern_handles) { + index_data->db->add_pattern(pattern_handle, link_handle); + } + } + } + } + return false; // Continue traversal +} + +bool clear_value_visitor(HandleTrie::TrieNode* node, void* data) { + ClearValueData* clear_data = static_cast(data); + if (node->value != nullptr) { + auto atom_trie_value = dynamic_cast(node->value); + if (atom_trie_value != nullptr) { + Atom* atom = atom_trie_value->get_atom(); + // Compare handles to find the right node + if (atom->handle() == clear_data->handle) { + // Found it! Delete the value and set to nullptr + delete node->value; + node->value = nullptr; + clear_data->found = true; + return true; // Stop traversal + } + } + } + return false; // Continue traversal +} +} // namespace + +InMemoryDB::InMemoryDB(const string& context) + : context_(context), atoms_trie_(new HandleTrie(HANDLE_HASH_SIZE - 1)) {} + +InMemoryDB::~InMemoryDB() { + // Traverse and delete all atoms + this->atoms_trie_->traverse( + false, + [](HandleTrie::TrieNode* node, void* data) -> bool { + if (node->value != nullptr) { + delete node->value; + node->value = nullptr; + } + return false; // Continue traversal + }, + nullptr); + delete this->atoms_trie_; +} + +bool InMemoryDB::allow_nested_indexing() { return false; } + +shared_ptr InMemoryDB::get_atom(const string& handle) { + lock_guard lock(trie_mutex_); + auto trie_value = this->atoms_trie_->lookup(handle); + if (trie_value == nullptr) { + return nullptr; + } + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value == nullptr) { + return nullptr; + } + // Clone the atom to return a shared_ptr (caller doesn't own the original) + Atom* atom = atom_trie_value->get_atom(); + if (atom->arity() == 0) { + auto node = dynamic_cast(atom); + return make_shared(*node); + } else { + auto link = dynamic_cast(atom); + return make_shared(*link); + } +} + +shared_ptr InMemoryDB::query_for_pattern(const LinkSchema& link_schema) { + lock_guard lock(index_mutex_); + // Create a non-const copy to call match() which is non-const + LinkSchema local_schema(link_schema); + auto pattern_handle = local_schema.handle(); + auto handle_set = make_shared(); + + // Check if we have this pattern indexed + auto it = pattern_index_.find(pattern_handle); + if (it != pattern_index_.end()) { + for (const auto& handle : it->second) { + // Verify the atom still exists and matches the schema + lock_guard trie_lock(trie_mutex_); + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value != nullptr) { + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value != nullptr) { + Atom* atom = atom_trie_value->get_atom(); + if (Atom::is_link(*atom)) { + Assignment assignment; + Link* link = dynamic_cast(atom); + if (local_schema.match(*link, assignment, *this)) { + handle_set->add_handle(handle); + } + } + } + } + } + } + + return handle_set; +} + +shared_ptr InMemoryDB::query_for_targets(const string& handle) { + lock_guard lock(trie_mutex_); + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value == nullptr) { + return nullptr; + } + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value == nullptr) { + return nullptr; + } + Atom* atom = atom_trie_value->get_atom(); + if (!Atom::is_link(*atom)) { + return nullptr; // Not a link, so no targets + } + Link* link = dynamic_cast(atom); + return make_shared(link->targets); +} + +shared_ptr InMemoryDB::query_for_incoming_set(const string& handle) { + lock_guard lock(index_mutex_); + auto handle_set = make_shared(); + auto it = incoming_sets_.find(handle); + if (it != incoming_sets_.end()) { + for (const auto& link_handle : it->second) { + handle_set->add_handle(link_handle); + } + } + return handle_set; +} + +// Stub implementations for AtomDocument methods (to be implemented later) +shared_ptr InMemoryDB::get_atom_document(const string& handle) { + // TODO: Implement in second phase + return nullptr; +} + +shared_ptr InMemoryDB::get_node_document(const string& handle) { + // TODO: Implement in second phase + return nullptr; +} + +shared_ptr InMemoryDB::get_link_document(const string& handle) { + // TODO: Implement in second phase + return nullptr; +} + +vector> InMemoryDB::get_atom_documents(const vector& handles, + const vector& fields) { + // TODO: Implement in second phase + return {}; +} + +vector> InMemoryDB::get_node_documents(const vector& handles, + const vector& fields) { + // TODO: Implement in second phase + return {}; +} + +vector> InMemoryDB::get_link_documents(const vector& handles, + const vector& fields) { + // TODO: Implement in second phase + return {}; +} + +vector> InMemoryDB::get_matching_atoms(bool is_toplevel, Atom& key) { + // TODO: Implement in second phase + return {}; +} + +bool InMemoryDB::atom_exists(const string& handle) { + lock_guard lock(trie_mutex_); + return atoms_trie_->lookup(handle) != nullptr; +} + +bool InMemoryDB::node_exists(const string& handle) { + lock_guard lock(trie_mutex_); + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value == nullptr) { + return false; + } + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value == nullptr) { + return false; + } + Atom* atom = atom_trie_value->get_atom(); + return Atom::is_node(*atom); +} + +bool InMemoryDB::link_exists(const string& handle) { + lock_guard lock(trie_mutex_); + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value == nullptr) { + return false; + } + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value == nullptr) { + return false; + } + Atom* atom = atom_trie_value->get_atom(); + return Atom::is_link(*atom); +} + +set InMemoryDB::atoms_exist(const vector& handles) { + set existing; + lock_guard lock(trie_mutex_); + for (const auto& handle : handles) { + if (atoms_trie_->lookup(handle) != nullptr) { + existing.insert(handle); + } + } + return existing; +} + +set InMemoryDB::nodes_exist(const vector& handles) { + set existing; + lock_guard lock(trie_mutex_); + for (const auto& handle : handles) { + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value != nullptr) { + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value != nullptr) { + Atom* atom = atom_trie_value->get_atom(); + if (Atom::is_node(*atom)) { + existing.insert(handle); + } + } + } + } + return existing; +} + +set InMemoryDB::links_exist(const vector& handles) { + set existing; + lock_guard lock(trie_mutex_); + for (const auto& handle : handles) { + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value != nullptr) { + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value != nullptr) { + Atom* atom = atom_trie_value->get_atom(); + if (Atom::is_link(*atom)) { + existing.insert(handle); + } + } + } + } + return existing; +} + +string InMemoryDB::add_atom(const atoms::Atom* atom, bool throw_if_exists) { + if (atom->arity() == 0) { + return add_node(dynamic_cast(atom), throw_if_exists); + } else { + return add_link(dynamic_cast(atom), throw_if_exists); + } +} + +string InMemoryDB::add_node(const atoms::Node* node, bool throw_if_exists) { + string handle = node->handle(); + + if (throw_if_exists && node_exists(handle)) { + Utils::error("Node already exists: " + handle); + return ""; + } + + lock_guard lock(trie_mutex_); + + // Check if already exists + auto existing = atoms_trie_->lookup(handle); + if (existing != nullptr && !throw_if_exists) { + return handle; // Already exists, return handle + } + + // Clone the node to store in trie + Node* cloned_node = new Node(*node); + auto atom_trie_value = new AtomTrieValue(cloned_node); + atoms_trie_->insert(handle, atom_trie_value); + + return handle; +} + +string InMemoryDB::add_link(const atoms::Link* link, bool throw_if_exists) { + vector links = {const_cast(link)}; + auto handles = add_links(links, throw_if_exists, false); + return handles.empty() ? "" : handles[0]; +} + +vector InMemoryDB::add_atoms(const vector& atoms, + bool throw_if_exists, + bool is_transactional) { + if (atoms.empty()) { + return {}; + } + + vector nodes; + vector links; + for (const auto& atom : atoms) { + LOG_DEBUG("Adding atom: " + atom->to_string()); + if (atom->arity() == 0) { + nodes.push_back(dynamic_cast(atom)); + } else { + links.push_back(dynamic_cast(atom)); + } + } + auto node_handles = add_nodes(nodes, throw_if_exists, is_transactional); + auto link_handles = add_links(links, throw_if_exists, is_transactional); + + node_handles.insert(node_handles.end(), link_handles.begin(), link_handles.end()); + return node_handles; +} + +vector InMemoryDB::add_nodes(const vector& nodes, + bool throw_if_exists, + bool is_transactional) { + if (nodes.empty()) { + return {}; + } + + vector handles; + for (const auto& node : nodes) { + handles.push_back(node->handle()); + } + + if (throw_if_exists) { + auto existing_handles = this->nodes_exist(handles); + if (!existing_handles.empty()) { + vector existing_handles_vector(existing_handles.begin(), existing_handles.end()); + Utils::error("Failed to insert nodes, some nodes already exist: " + + Utils::join(existing_handles_vector, ',')); + return {}; + } + } + + lock_guard lock(trie_mutex_); + for (const auto& node : nodes) { + string handle = node->handle(); + auto existing = atoms_trie_->lookup(handle); + if (existing == nullptr || !throw_if_exists) { + if (existing == nullptr) { + Node* cloned_node = new Node(*node); + auto atom_trie_value = new AtomTrieValue(cloned_node); + atoms_trie_->insert(handle, atom_trie_value); + } + } + } + + return handles; +} + +vector InMemoryDB::add_links(const vector& links, + bool throw_if_exists, + bool is_transactional) { + if (links.empty()) { + return {}; + } + + if (throw_if_exists) { + vector handles; + for (const auto& link : links) { + handles.push_back(link->handle()); + } + auto existing_handles = this->links_exist(handles); + if (!existing_handles.empty()) { + vector existing_handles_vector(existing_handles.begin(), existing_handles.end()); + Utils::error("Failed to insert links, some links already exist: " + + Utils::join(existing_handles_vector, ',')); + return {}; + } + } + + vector handles; + lock_guard trie_lock(trie_mutex_); + lock_guard index_lock(index_mutex_); + + for (const auto& link : links) { + string link_handle = link->handle(); + handles.push_back(link_handle); + + // Check if already exists + auto existing = atoms_trie_->lookup(link_handle); + if (existing == nullptr || !throw_if_exists) { + if (existing == nullptr) { + // Clone the link to store in trie + Link* cloned_link = new Link(*link); + auto atom_trie_value = new AtomTrieValue(cloned_link); + atoms_trie_->insert(link_handle, atom_trie_value); + } + + // Update incoming sets for each target + for (const auto& target_handle : link->targets) { + add_incoming_set(target_handle, link_handle); + } + + // Index pattern + auto pattern_handles = match_pattern_index_schema(link); + for (const auto& pattern_handle : pattern_handles) { + add_pattern(pattern_handle, link_handle); + } + } + } + + return handles; +} + +bool InMemoryDB::delete_atom(const string& handle, bool delete_link_targets) { + if (delete_node(handle, delete_link_targets)) { + return true; + } + return delete_link(handle, delete_link_targets); +} + +bool InMemoryDB::delete_node(const string& handle, bool delete_link_targets) { + lock_guard trie_lock(trie_mutex_); + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value == nullptr) { + return false; + } + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value == nullptr) { + return false; + } + Atom* atom = atom_trie_value->get_atom(); + if (!Atom::is_node(*atom)) { + return false; + } + + // Check incoming set - if this node is referenced by links, handle accordingly + lock_guard index_lock(index_mutex_); + auto incoming_it = incoming_sets_.find(handle); + if (incoming_it != incoming_sets_.end() && !incoming_it->second.empty()) { + if (delete_link_targets) { + // Delete all links that reference this node + auto link_handles = incoming_it->second; + for (const auto& link_handle : link_handles) { + delete_link(link_handle, delete_link_targets); + } + } else { + // Cannot delete node that is referenced by links + return false; + } + } + + // Clear the value in the trie (set to nullptr) + this->clear_atom_value(handle); + incoming_sets_.erase(handle); + + return true; +} + +bool InMemoryDB::delete_link(const string& handle, bool delete_link_targets) { + lock_guard trie_lock(trie_mutex_); + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value == nullptr) { + return false; + } + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value == nullptr) { + return false; + } + Atom* atom = atom_trie_value->get_atom(); + if (!Atom::is_link(*atom)) { + return false; + } + + Link* link = dynamic_cast(atom); + vector targets = link->targets; + + lock_guard index_lock(index_mutex_); + + // Update incoming sets for each target + for (const auto& target_handle : targets) { + this->delete_incoming_set(target_handle, handle); + + if (delete_link_targets) { + // Check if target has other incoming links + auto incoming_it = incoming_sets_.find(target_handle); + if (incoming_it == incoming_sets_.end() || incoming_it->second.empty()) { + // No other references, delete the target + this->delete_atom(target_handle, delete_link_targets); + } + } + } + + // Remove from pattern index + auto pattern_handles = this->match_pattern_index_schema(link); + for (const auto& pattern_handle : pattern_handles) { + this->delete_pattern(pattern_handle, handle); + } + + // Clear the value in the trie (set to nullptr) + this->clear_atom_value(handle); + + return true; +} + +uint InMemoryDB::delete_atoms(const vector& handles, bool delete_link_targets) { + uint deleted_count = 0; + for (const auto& handle : handles) { + if (this->delete_atom(handle, delete_link_targets)) { + deleted_count++; + } + } + return deleted_count; +} + +uint InMemoryDB::delete_nodes(const vector& handles, bool delete_link_targets) { + uint deleted_count = 0; + for (const auto& handle : handles) { + if (this->delete_node(handle, delete_link_targets)) { + deleted_count++; + } + } + return deleted_count; +} + +uint InMemoryDB::delete_links(const vector& handles, bool delete_link_targets) { + uint deleted_count = 0; + for (const auto& handle : handles) { + if (this->delete_link(handle, delete_link_targets)) { + deleted_count++; + } + } + return deleted_count; +} + +void InMemoryDB::re_index_patterns(bool flush_patterns) { + lock_guard trie_lock(this->trie_mutex_); + lock_guard index_lock(this->index_mutex_); + + if (flush_patterns) { + this->pattern_index_.clear(); + } + + // Re-index all links + ReIndexData index_data; + index_data.db = this; + this->atoms_trie_->traverse(false, re_index_visitor, &index_data); +} + +// Helper methods +void InMemoryDB::add_pattern(const string& pattern_handle, const string& atom_handle) { + this->pattern_index_[pattern_handle].insert(atom_handle); +} + +void InMemoryDB::delete_pattern(const string& pattern_handle, const string& atom_handle) { + auto it = this->pattern_index_.find(pattern_handle); + if (it != this->pattern_index_.end()) { + it->second.erase(atom_handle); + if (it->second.empty()) { + this->pattern_index_.erase(it); + } + } +} + +void InMemoryDB::add_incoming_set(const string& target_handle, const string& link_handle) { + this->incoming_sets_[target_handle].insert(link_handle); +} + +void InMemoryDB::delete_incoming_set(const string& target_handle, const string& link_handle) { + auto it = this->incoming_sets_.find(target_handle); + if (it != this->incoming_sets_.end()) { + it->second.erase(link_handle); + if (it->second.empty()) { + this->incoming_sets_.erase(it); + } + } +} + +void InMemoryDB::update_incoming_set(const string& target_handle, const string& link_handle) { + this->delete_incoming_set(target_handle, link_handle); +} + +void InMemoryDB::clear_atom_value(const string& handle) { + // Use traverse to find the node with this handle and clear its value + // NOTE: This method assumes trie_mutex_ is already locked by the caller + ClearValueData clear_data; + clear_data.handle = handle; + clear_data.found = false; + + this->atoms_trie_->traverse(false, clear_value_visitor, &clear_data); +} + +void InMemoryDB::add_pattern_index_schema(const string& tokens, + const vector>& index_entries) { + auto tokens_vector = Utils::split(tokens, ' '); + LinkSchema link_schema(tokens_vector); + + this->pattern_index_schema_map[this->pattern_index_schema_next_priority] = + make_tuple(move(tokens_vector), index_entries); + this->pattern_index_schema_next_priority++; +} + +vector InMemoryDB::match_pattern_index_schema(const Link* link) { + vector pattern_handles; + auto local_map = this->pattern_index_schema_map; + + if (local_map.size() == 0) { + vector tokens = {"LINK_TEMPLATE", "Expression", to_string(link->arity())}; + for (unsigned int i = 0; i < link->arity(); i++) { + tokens.push_back("VARIABLE"); + tokens.push_back("v" + to_string(i + 1)); + } + + auto link_schema = LinkSchema(tokens); + auto index_entries = this->index_entries_combinations(link->arity()); + + local_map[1] = make_tuple(move(tokens), move(index_entries)); + } + + vector sorted_keys; + for (const auto& pair : local_map) { + sorted_keys.push_back(pair.first); + } + std::sort(sorted_keys.begin(), sorted_keys.end(), std::greater()); + + for (const auto& priority : sorted_keys) { + auto value = local_map[priority]; + auto link_schema = LinkSchema(get<0>(value)); + auto index_entries = get<1>(value); + Assignment assignment; + bool match = link_schema.match(*(Link*) link, assignment, *this); + if (match) { + for (const auto& index_entry : index_entries) { + size_t index = 0; + vector hash_entries; + for (const auto& token : index_entry) { + if (token == "_") { + hash_entries.push_back(link->targets[index]); + } else if (token == "*") { + hash_entries.push_back(Atom::WILDCARD_STRING); + } else { + string assignment_value = assignment.get(token); + if (assignment_value == "") { + Utils::error("LinkSchema assignments don't have variable: " + token); + } + hash_entries.push_back(assignment_value); + } + index++; + } + string hash = Hasher::link_handle(link->type, hash_entries); + pattern_handles.push_back(hash); + } + // We only need to find the first match + break; + } + } + return pattern_handles; +} + +// Combination of "vX" and "*" for a given arity +vector> InMemoryDB::index_entries_combinations(unsigned int arity) { + vector> index_entries; + unsigned int total = 1 << arity; // 2^arity + + for (unsigned int mask = 0; mask < total; ++mask) { + vector index_entry; + for (unsigned int i = 0; i < arity; ++i) { + if (mask & (1 << i)) + index_entry.push_back("*"); + else + index_entry.push_back("v" + to_string(i + 1)); + } + index_entries.push_back(index_entry); + } + + return index_entries; +} diff --git a/src/atomdb/inmemorydb/InMemoryDB.h b/src/atomdb/inmemorydb/InMemoryDB.h new file mode 100644 index 000000000..b96949044 --- /dev/null +++ b/src/atomdb/inmemorydb/InMemoryDB.h @@ -0,0 +1,112 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "AtomDB.h" +#include "HandleTrie.h" +#include "InmemoryDBAPITypes.h" +#include "LinkSchema.h" + +using namespace std; +using namespace commons; +using namespace atoms; + +namespace atomdb { + +#define INMEMORYDB_MAX_TRIE_SIZE 1000000000 + +class InMemoryDB : public AtomDB { + public: + InMemoryDB(const string& context = ""); + ~InMemoryDB(); + + bool allow_nested_indexing() override; + + shared_ptr get_atom(const string& handle) override; + + shared_ptr query_for_pattern(const LinkSchema& link_schema) override; + + shared_ptr query_for_targets(const string& handle) override; + + shared_ptr query_for_incoming_set(const string& handle) override; + + shared_ptr get_atom_document(const string& handle) override; + shared_ptr get_node_document(const string& handle) override; + shared_ptr get_link_document(const string& handle) override; + + vector> get_atom_documents( + const vector& handles, const vector& fields) override; + vector> get_node_documents( + const vector& handles, const vector& fields) override; + vector> get_link_documents( + const vector& handles, const vector& fields) override; + + vector> get_matching_atoms(bool is_toplevel, + Atom& key) override; + + bool atom_exists(const string& handle) override; + bool node_exists(const string& handle) override; + bool link_exists(const string& handle) override; + + set atoms_exist(const vector& handles) override; + set nodes_exist(const vector& handles) override; + set links_exist(const vector& handles) override; + + string add_atom(const atoms::Atom* atom, bool throw_if_exists = false) override; + string add_node(const atoms::Node* node, bool throw_if_exists = false) override; + string add_link(const atoms::Link* link, bool throw_if_exists = false) override; + + vector add_atoms(const vector& atoms, + bool throw_if_exists = false, + bool is_transactional = false) override; + vector add_nodes(const vector& nodes, + bool throw_if_exists = false, + bool is_transactional = false) override; + vector add_links(const vector& links, + bool throw_if_exists = false, + bool is_transactional = false) override; + + bool delete_atom(const string& handle, bool delete_link_targets = false) override; + bool delete_node(const string& handle, bool delete_link_targets = false) override; + bool delete_link(const string& handle, bool delete_link_targets = false) override; + + uint delete_atoms(const vector& handles, bool delete_link_targets = false) override; + uint delete_nodes(const vector& handles, bool delete_link_targets = false) override; + uint delete_links(const vector& handles, bool delete_link_targets = false) override; + + void re_index_patterns(bool flush_patterns = true) override; + + private: + string context_; + HandleTrie* atoms_trie_; // Stores handle -> Atom* + map> pattern_index_; // pattern_handle -> set of atom handles + map> incoming_sets_; // target_handle -> set of link handles that reference it + mutex trie_mutex_; + mutex index_mutex_; + + map, vector>>> pattern_index_schema_map; + int pattern_index_schema_next_priority{1}; + + // Helper methods + public: + void add_pattern(const string& pattern_handle, const string& atom_handle); + vector match_pattern_index_schema(const Link* link); + + private: + void delete_pattern(const string& pattern_handle, const string& atom_handle); + void add_incoming_set(const string& target_handle, const string& link_handle); + void delete_incoming_set(const string& target_handle, const string& link_handle); + void update_incoming_set(const string& target_handle, const string& link_handle); + + void add_pattern_index_schema(const string& tokens, const vector>& index_entries); + vector> index_entries_combinations(unsigned int arity); + + // Helper to clear a value in the trie (set to nullptr for deletion) + void clear_atom_value(const string& handle); +}; + +} // namespace atomdb diff --git a/src/atomdb/inmemorydb/InmemoryDBAPITypes.cc b/src/atomdb/inmemorydb/InmemoryDBAPITypes.cc new file mode 100644 index 000000000..3225b5ae5 --- /dev/null +++ b/src/atomdb/inmemorydb/InmemoryDBAPITypes.cc @@ -0,0 +1,112 @@ +#include "InmemoryDBAPITypes.h" + +#include + +#include "Utils.h" + +using namespace atomdb; +using namespace atomdb_api_types; +using namespace commons; +using namespace std; + +// HandleSetInMemory +HandleSetInMemory::HandleSetInMemory() : HandleSet() {} + +HandleSetInMemory::~HandleSetInMemory() {} + +unsigned int HandleSetInMemory::size() { return handles.size(); } + +void HandleSetInMemory::append(shared_ptr other) { + auto handle_set_inmemory = dynamic_pointer_cast(other); + if (handle_set_inmemory) { + for (const auto& handle : handle_set_inmemory->handles) { + handles.insert(handle); + } + // Merge metta expressions and assignments + for (const auto& [handle, exprs] : handle_set_inmemory->metta_expressions_by_handle) { + metta_expressions_by_handle[handle] = exprs; + } + for (const auto& [handle, assignment] : handle_set_inmemory->assignments_by_handle) { + assignments_by_handle[handle] = assignment; + } + } +} + +shared_ptr HandleSetInMemory::get_iterator() { + shared_ptr it(new HandleSetInMemoryIterator(this)); + return it; +} + +map HandleSetInMemory::get_metta_expressions_by_handle(const string& handle) { + auto it = metta_expressions_by_handle.find(handle); + if (it != metta_expressions_by_handle.end()) { + return it->second; + } + return {}; +} + +Assignment HandleSetInMemory::get_assignments_by_handle(const string& handle) { + auto it = assignments_by_handle.find(handle); + if (it != assignments_by_handle.end()) { + return it->second; + } + return Assignment(); +} + +void HandleSetInMemory::add_handle(const string& handle) { handles.insert(handle); } + +// HandleSetInMemoryIterator +HandleSetInMemoryIterator::HandleSetInMemoryIterator(HandleSetInMemory* handle_set) + : handle_set(handle_set), it(handle_set->handles.begin()) {} + +HandleSetInMemoryIterator::~HandleSetInMemoryIterator() { + for (auto ptr : allocated_strings) { + delete[] ptr; + } +} + +char* HandleSetInMemoryIterator::next() { + if (it == handle_set->handles.end()) { + return nullptr; + } + string handle = *it; + ++it; + char* handle_cstr = new char[handle.size() + 1]; + strcpy(handle_cstr, handle.c_str()); + allocated_strings.push_back(handle_cstr); + return handle_cstr; +} + +// HandleListInMemory +HandleListInMemory::HandleListInMemory() : HandleList() {} + +HandleListInMemory::HandleListInMemory(const vector& handles) : HandleList(), handles(handles) { + for (const auto& handle : handles) { + char* handle_cstr = new char[handle.size() + 1]; + strcpy(handle_cstr, handle.c_str()); + allocated_strings.push_back(handle_cstr); + } +} + +HandleListInMemory::~HandleListInMemory() { + for (auto ptr : allocated_strings) { + delete[] ptr; + } +} + +const char* HandleListInMemory::get_handle(unsigned int index) { + if (index >= handles.size()) { + Utils::error("Handle index out of bounds: " + to_string(index) + + " Answer handles size: " + to_string(handles.size())); + } + return allocated_strings[index]; +} + +unsigned int HandleListInMemory::size() { return handles.size(); } + +void HandleListInMemory::add_handle(const string& handle) { + handles.push_back(handle); + char* handle_cstr = new char[handle.size() + 1]; + strcpy(handle_cstr, handle.c_str()); + allocated_strings.push_back(handle_cstr); +} diff --git a/src/atomdb/inmemorydb/InmemoryDBAPITypes.h b/src/atomdb/inmemorydb/InmemoryDBAPITypes.h new file mode 100644 index 000000000..8899a64e3 --- /dev/null +++ b/src/atomdb/inmemorydb/InmemoryDBAPITypes.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include + +#include "Assignment.h" +#include "AtomDBAPITypes.h" + +using namespace std; +using namespace commons; + +namespace atomdb { +namespace atomdb_api_types { + +class HandleSetInMemory : public HandleSet { + friend class HandleSetInMemoryIterator; + + public: + HandleSetInMemory(); + ~HandleSetInMemory(); + + unsigned int size() override; + void append(shared_ptr other) override; + shared_ptr get_iterator() override; + + map get_metta_expressions_by_handle(const string& handle) override; + Assignment get_assignments_by_handle(const string& handle) override; + + void add_handle(const string& handle); + + private: + set handles; + map> metta_expressions_by_handle; + map assignments_by_handle; +}; + +class HandleSetInMemoryIterator : public HandleSetIterator { + public: + HandleSetInMemoryIterator(HandleSetInMemory* handle_set); + ~HandleSetInMemoryIterator(); + + char* next() override; + + private: + HandleSetInMemory* handle_set; + set::iterator it; + vector allocated_strings; +}; + +class HandleListInMemory : public HandleList { + public: + HandleListInMemory(); + HandleListInMemory(const vector& handles); + ~HandleListInMemory(); + + const char* get_handle(unsigned int index) override; + unsigned int size() override; + + void add_handle(const string& handle); + + private: + vector handles; + vector allocated_strings; +}; + +} // namespace atomdb_api_types +} // namespace atomdb diff --git a/src/tests/cpp/BUILD b/src/tests/cpp/BUILD index e59e3e951..8ad0c44b0 100644 --- a/src/tests/cpp/BUILD +++ b/src/tests/cpp/BUILD @@ -729,6 +729,22 @@ cc_test( ], ) +cc_test( + name = "inmemorydb_test", + size = "small", + srcs = ["inmemorydb_test.cc"], + copts = [ + "-Iexternal/gtest/googletest/include", + "-Iexternal/gtest/googletest", + ], + linkstatic = 1, + deps = [ + "//atomdb/inmemorydb:inmemorydb_lib", + "//commons/atoms:atoms_lib", + "@com_github_google_googletest//:gtest_main", + ], +) + cc_test( name = "profiling_test", size = "small", diff --git a/src/tests/cpp/inmemorydb_test.cc b/src/tests/cpp/inmemorydb_test.cc new file mode 100644 index 000000000..069769781 --- /dev/null +++ b/src/tests/cpp/inmemorydb_test.cc @@ -0,0 +1,271 @@ +#include "InMemoryDB.h" + +#include + +#include +#include + +#include "Link.h" +#include "LinkSchema.h" +#include "Node.h" + +using namespace atomdb; +using namespace atoms; +using namespace std; + +class InMemoryDBTest : public ::testing::Test { + protected: + void SetUp() override { db = make_shared("inmemorydb_test_"); } + + void TearDown() override {} + + shared_ptr db; +}; + +TEST_F(InMemoryDBTest, AddNodesAndLinks) { + auto human = new Node("Symbol", "\"human\""); + auto monkey = new Node("Symbol", "\"monkey\""); + auto chimp = new Node("Symbol", "\"chimp\""); + auto mammal = new Node("Symbol", "\"mammal\""); + auto similarity = new Node("Symbol", "Similarity"); + auto inheritance = new Node("Symbol", "Inheritance"); + + string human_handle = db->add_node(human, false); + string monkey_handle = db->add_node(monkey, false); + string chimp_handle = db->add_node(chimp, false); + string mammal_handle = db->add_node(mammal, false); + string similarity_handle = db->add_node(similarity, false); + string inheritance_handle = db->add_node(inheritance, false); + + // Verify nodes were added + EXPECT_TRUE(db->node_exists(human_handle)); + EXPECT_TRUE(db->node_exists(monkey_handle)); + EXPECT_TRUE(db->node_exists(chimp_handle)); + EXPECT_TRUE(db->node_exists(mammal_handle)); + EXPECT_TRUE(db->node_exists(similarity_handle)); + EXPECT_TRUE(db->node_exists(inheritance_handle)); + + auto link1 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + auto link2 = new Link("Expression", {similarity_handle, human_handle, chimp_handle}); + auto link3 = new Link("Expression", {inheritance_handle, human_handle, mammal_handle}); + auto link4 = new Link("Expression", {inheritance_handle, monkey_handle, mammal_handle}); + auto link5 = new Link("Expression", {inheritance_handle, chimp_handle, mammal_handle}); + + string link1_handle = db->add_link(link1, false); + string link2_handle = db->add_link(link2, false); + string link3_handle = db->add_link(link3, false); + string link4_handle = db->add_link(link4, false); + string link5_handle = db->add_link(link5, false); + + // Verify links were added + EXPECT_TRUE(db->link_exists(link1_handle)); + EXPECT_TRUE(db->link_exists(link2_handle)); + EXPECT_TRUE(db->link_exists(link3_handle)); + EXPECT_TRUE(db->link_exists(link4_handle)); + EXPECT_TRUE(db->link_exists(link5_handle)); + + // Verify we can retrieve atoms + auto retrieved_human = db->get_atom(human_handle); + EXPECT_EQ(retrieved_human->handle(), human_handle); + + auto retrieved_link1 = db->get_atom(link1_handle); + EXPECT_EQ(retrieved_link1->handle(), link1_handle); +} + +TEST_F(InMemoryDBTest, QueryForPattern) { + auto human = new Node("Symbol", "\"human\""); + auto monkey = new Node("Symbol", "\"monkey\""); + auto chimp = new Node("Symbol", "\"chimp\""); + auto mammal = new Node("Symbol", "\"mammal\""); + auto inheritance = new Node("Symbol", "Inheritance"); + + string human_handle = db->add_node(human, false); + string monkey_handle = db->add_node(monkey, false); + string chimp_handle = db->add_node(chimp, false); + string mammal_handle = db->add_node(mammal, false); + string inheritance_handle = db->add_node(inheritance, false); + + auto link1 = new Link("Expression", {inheritance_handle, human_handle, mammal_handle}); + auto link2 = new Link("Expression", {inheritance_handle, monkey_handle, mammal_handle}); + auto link3 = new Link("Expression", {inheritance_handle, chimp_handle, mammal_handle}); + + string link1_handle = db->add_link(link1, false); + string link2_handle = db->add_link(link2, false); + string link3_handle = db->add_link(link3, false); + + // Re-index patterns to ensure re_index works + db->re_index_patterns(true); + + LinkSchema link_schema({"LINK_TEMPLATE", + "Expression", + "3", + "NODE", + "Symbol", + "Inheritance", + "VARIABLE", + "x", + "NODE", + "Symbol", + "\"mammal\""}); + + auto result = db->query_for_pattern(link_schema); + EXPECT_EQ(result->size(), 3); + + // Verify we got the expected handles + auto it = result->get_iterator(); + char* handle; + vector handles; + while ((handle = it->next()) != nullptr) { + handles.push_back(handle); + } + + // Check that all three links are in the result + EXPECT_TRUE(find(handles.begin(), handles.end(), link1_handle) != handles.end()); + EXPECT_TRUE(find(handles.begin(), handles.end(), link2_handle) != handles.end()); + EXPECT_TRUE(find(handles.begin(), handles.end(), link3_handle) != handles.end()); +} + +TEST_F(InMemoryDBTest, QueryForPatternWithSpecificMatch) { + auto human = new Node("Symbol", "\"human\""); + auto monkey = new Node("Symbol", "\"monkey\""); + auto similarity = new Node("Symbol", "Similarity"); + + string human_handle = db->add_node(human, false); + string monkey_handle = db->add_node(monkey, false); + string similarity_handle = db->add_node(similarity, false); + + auto link1 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + + string link1_handle = db->add_link(link1, false); + + LinkSchema link_schema({"LINK_TEMPLATE", + "Expression", + "3", + "NODE", + "Symbol", + "Similarity", + "NODE", + "Symbol", + "\"human\"", + "VARIABLE", + "x"}); + + auto result = db->query_for_pattern(link_schema); + + EXPECT_EQ(result->size(), 1); + + auto it = result->get_iterator(); + char* handle = it->next(); + EXPECT_EQ(string(handle), link1_handle); +} + +TEST_F(InMemoryDBTest, QueryForPatternNoMatches) { + LinkSchema link_schema({"LINK_TEMPLATE", + "Expression", + "3", + "NODE", + "Symbol", + "NonExistent", + "VARIABLE", + "x", + "VARIABLE", + "y"}); + + auto result = db->query_for_pattern(link_schema); + EXPECT_EQ(result->size(), 0); +} + +TEST_F(InMemoryDBTest, QueryForTargets) { + auto node1 = new Node("Symbol", "Node1"); + auto node2 = new Node("Symbol", "Node2"); + auto node3 = new Node("Symbol", "Node3"); + auto similarity = new Node("Symbol", "Similarity"); + + string node1_handle = db->add_node(node1, false); + string node2_handle = db->add_node(node2, false); + string node3_handle = db->add_node(node3, false); + string similarity_handle = db->add_node(similarity, false); + + auto node_targets = db->query_for_targets(node1_handle); + EXPECT_EQ(node_targets, nullptr); + + auto link1 = new Link("Expression", {similarity_handle, node1_handle, node2_handle, node3_handle}); + string link1_handle = db->add_link(link1, false); + + auto link1_targets = db->query_for_targets(link1_handle); + EXPECT_EQ(link1_targets->size(), 4); + EXPECT_EQ(string(link1_targets->get_handle(0)), similarity_handle); + EXPECT_EQ(string(link1_targets->get_handle(1)), node1_handle); + EXPECT_EQ(string(link1_targets->get_handle(2)), node2_handle); + EXPECT_EQ(string(link1_targets->get_handle(3)), node3_handle); +} + +TEST_F(InMemoryDBTest, QueryForTargetsNonExistent) { + string non_existent_handle = "00000000000000000000000000000000"; + auto targets = db->query_for_targets(non_existent_handle); + EXPECT_EQ(targets, nullptr); +} + +TEST_F(InMemoryDBTest, QueryForTargetsMultipleLinks) { + auto human = new Node("Symbol", "\"human\""); + auto monkey = new Node("Symbol", "\"monkey\""); + auto chimp = new Node("Symbol", "\"chimp\""); + auto similarity = new Node("Symbol", "Similarity"); + + string human_handle = db->add_node(human, false); + string monkey_handle = db->add_node(monkey, false); + string chimp_handle = db->add_node(chimp, false); + string similarity_handle = db->add_node(similarity, false); + + auto link1 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + auto link2 = new Link("Expression", {similarity_handle, human_handle, chimp_handle}); + auto link3 = new Link("Expression", {similarity_handle, monkey_handle, chimp_handle}); + + string link1_handle = db->add_link(link1, false); + string link2_handle = db->add_link(link2, false); + string link3_handle = db->add_link(link3, false); + + auto link1_targets = db->query_for_targets(link1_handle); + EXPECT_EQ(link1_targets->size(), 3); + EXPECT_EQ(string(link1_targets->get_handle(0)), similarity_handle); + EXPECT_EQ(string(link1_targets->get_handle(1)), human_handle); + EXPECT_EQ(string(link1_targets->get_handle(2)), monkey_handle); + + auto link2_targets = db->query_for_targets(link2_handle); + EXPECT_EQ(link2_targets->size(), 3); + EXPECT_EQ(string(link2_targets->get_handle(0)), similarity_handle); + EXPECT_EQ(string(link2_targets->get_handle(1)), human_handle); + EXPECT_EQ(string(link2_targets->get_handle(2)), chimp_handle); + + auto link3_targets = db->query_for_targets(link3_handle); + EXPECT_EQ(link3_targets->size(), 3); + EXPECT_EQ(string(link3_targets->get_handle(0)), similarity_handle); + EXPECT_EQ(string(link3_targets->get_handle(1)), monkey_handle); + EXPECT_EQ(string(link3_targets->get_handle(2)), chimp_handle); +} + +TEST_F(InMemoryDBTest, QueryForTargetsAfterDeletion) { + auto node1 = new Node("Symbol", "Node1"); + auto node2 = new Node("Symbol", "Node2"); + auto similarity = new Node("Symbol", "Similarity"); + + string node1_handle = db->add_node(node1, false); + string node2_handle = db->add_node(node2, false); + string similarity_handle = db->add_node(similarity, false); + + auto link1 = new Link("Expression", {similarity_handle, node1_handle, node2_handle}); + string link1_handle = db->add_link(link1, false); + + auto targets = db->query_for_targets(link1_handle); + EXPECT_EQ(targets->size(), 3); + + db->delete_link(link1_handle, false); + + targets = db->query_for_targets(link1_handle); + EXPECT_EQ(targets, nullptr); +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From 40b06cf67aad33437ac65713c6426f832fb9131d Mon Sep 17 00:00:00 2001 From: Artur Gontijo Date: Mon, 26 Jan 2026 18:21:42 -0300 Subject: [PATCH 2/6] Implement HandleTrie::remove() --- src/atomdb/inmemorydb/InMemoryDB.cc | 38 ++--------------------------- src/atomdb/inmemorydb/InMemoryDB.h | 3 --- src/commons/HandleTrie.cc | 27 ++++++++++++++------ src/commons/HandleTrie.h | 19 +++++++++++++++ 4 files changed, 41 insertions(+), 46 deletions(-) diff --git a/src/atomdb/inmemorydb/InMemoryDB.cc b/src/atomdb/inmemorydb/InMemoryDB.cc index 0861abac8..2f81f1835 100644 --- a/src/atomdb/inmemorydb/InMemoryDB.cc +++ b/src/atomdb/inmemorydb/InMemoryDB.cc @@ -47,11 +47,6 @@ struct ReIndexData { InMemoryDB* db; }; -struct ClearValueData { - string handle; - bool found; -}; - bool re_index_visitor(HandleTrie::TrieNode* node, void* data) { ReIndexData* index_data = static_cast(data); if (node->value != nullptr) { @@ -71,25 +66,6 @@ bool re_index_visitor(HandleTrie::TrieNode* node, void* data) { } return false; // Continue traversal } - -bool clear_value_visitor(HandleTrie::TrieNode* node, void* data) { - ClearValueData* clear_data = static_cast(data); - if (node->value != nullptr) { - auto atom_trie_value = dynamic_cast(node->value); - if (atom_trie_value != nullptr) { - Atom* atom = atom_trie_value->get_atom(); - // Compare handles to find the right node - if (atom->handle() == clear_data->handle) { - // Found it! Delete the value and set to nullptr - delete node->value; - node->value = nullptr; - clear_data->found = true; - return true; // Stop traversal - } - } - } - return false; // Continue traversal -} } // namespace InMemoryDB::InMemoryDB(const string& context) @@ -509,7 +485,7 @@ bool InMemoryDB::delete_node(const string& handle, bool delete_link_targets) { } // Clear the value in the trie (set to nullptr) - this->clear_atom_value(handle); + this->atoms_trie_->remove(handle); incoming_sets_.erase(handle); return true; @@ -556,7 +532,7 @@ bool InMemoryDB::delete_link(const string& handle, bool delete_link_targets) { } // Clear the value in the trie (set to nullptr) - this->clear_atom_value(handle); + this->atoms_trie_->remove(handle); return true; } @@ -638,16 +614,6 @@ void InMemoryDB::update_incoming_set(const string& target_handle, const string& this->delete_incoming_set(target_handle, link_handle); } -void InMemoryDB::clear_atom_value(const string& handle) { - // Use traverse to find the node with this handle and clear its value - // NOTE: This method assumes trie_mutex_ is already locked by the caller - ClearValueData clear_data; - clear_data.handle = handle; - clear_data.found = false; - - this->atoms_trie_->traverse(false, clear_value_visitor, &clear_data); -} - void InMemoryDB::add_pattern_index_schema(const string& tokens, const vector>& index_entries) { auto tokens_vector = Utils::split(tokens, ' '); diff --git a/src/atomdb/inmemorydb/InMemoryDB.h b/src/atomdb/inmemorydb/InMemoryDB.h index b96949044..1d8b2c71e 100644 --- a/src/atomdb/inmemorydb/InMemoryDB.h +++ b/src/atomdb/inmemorydb/InMemoryDB.h @@ -104,9 +104,6 @@ class InMemoryDB : public AtomDB { void add_pattern_index_schema(const string& tokens, const vector>& index_entries); vector> index_entries_combinations(unsigned int arity); - - // Helper to clear a value in the trie (set to nullptr for deletion) - void clear_atom_value(const string& handle); }; } // namespace atomdb diff --git a/src/commons/HandleTrie.cc b/src/commons/HandleTrie.cc index 6a497015e..ef40925b2 100644 --- a/src/commons/HandleTrie.cc +++ b/src/commons/HandleTrie.cc @@ -161,12 +161,16 @@ HandleTrie::TrieValue* HandleTrie::insert(const string& key, TrieValue* value) { } HandleTrie::TrieValue* HandleTrie::lookup(const string& key) { + TrieNode* node = lookup_node(key); + return node != NULL ? node->value : NULL; +} + +HandleTrie::TrieNode* HandleTrie::lookup_node(const string& key) { if (key.size() != key_size) { Utils::error("Invalid key size: " + to_string(key.size()) + " != " + to_string(key_size)); } TrieNode* tree_cursor = root; - TrieValue* value; unsigned char key_cursor = 0; tree_cursor->trie_node_mutex.lock(); while (tree_cursor != NULL) { @@ -179,13 +183,9 @@ HandleTrie::TrieValue* HandleTrie::lookup(const string& key) { break; } } - if (match) { - value = tree_cursor->value; - } else { - value = NULL; - } + TrieNode* node = match ? tree_cursor : NULL; tree_cursor->trie_node_mutex.unlock(); - return value; + return node; } else { unsigned char c = TLB[(unsigned char) key[key_cursor]]; TrieNode* child = tree_cursor->children[c]; @@ -200,6 +200,19 @@ HandleTrie::TrieValue* HandleTrie::lookup(const string& key) { return NULL; } +bool HandleTrie::remove(const string& key) { + TrieNode* node = lookup_node(key); + if (node == NULL || node->value == NULL) { + return false; + } + node->trie_node_mutex.lock(); + delete node->value; + node->value = NULL; + this->size--; + node->trie_node_mutex.unlock(); + return true; +} + void HandleTrie::traverse(bool keep_root_locked, bool (*visit_function)(TrieNode* node, void* data), void* data) { diff --git a/src/commons/HandleTrie.h b/src/commons/HandleTrie.h index b0f797b64..bb66acbb0 100644 --- a/src/commons/HandleTrie.h +++ b/src/commons/HandleTrie.h @@ -74,6 +74,15 @@ class HandleTrie { */ TrieValue* lookup(const string& key); + /** + * Remove a key from this HandleTrie and its associated value. + * + * @param key Handle being removed. + * + * @return true if the key was found and removed, false otherwise. + */ + bool remove(const string& key); + /** * Traverse all keys (in-order) calling the passed visit_function once per stored value. * @@ -88,6 +97,16 @@ class HandleTrie { unsigned int size; private: + /** + * Lookup for a node containing a given handle. + * Similar to lookup() but returns the node pointer instead of the value. + * + * @param key Handle being searched. + * + * @return The HandleTrie::TrieNode containing the key or NULL if none. + */ + TrieNode* lookup_node(const string& key); + static unsigned char TLB[256]; static bool TLB_INITIALIZED; static void TLB_INIT() { From 0b77a020ac1109c366fa73e1e6f3004d9df91b61 Mon Sep 17 00:00:00 2001 From: Artur Gontijo Date: Tue, 27 Jan 2026 20:15:43 -0300 Subject: [PATCH 3/6] Use HandleTrie for patterns and incoming_set too. --- src/atomdb/inmemorydb/InMemoryDB.cc | 287 ++++++++++++++------ src/atomdb/inmemorydb/InMemoryDB.h | 8 +- src/atomdb/inmemorydb/InmemoryDBAPITypes.cc | 2 +- src/tests/cpp/inmemorydb_test.cc | 248 +++++++++++++++++ 4 files changed, 458 insertions(+), 87 deletions(-) diff --git a/src/atomdb/inmemorydb/InMemoryDB.cc b/src/atomdb/inmemorydb/InMemoryDB.cc index 2f81f1835..e0b84e1a9 100644 --- a/src/atomdb/inmemorydb/InMemoryDB.cc +++ b/src/atomdb/inmemorydb/InMemoryDB.cc @@ -4,7 +4,7 @@ #include #include "Hasher.h" -#include "InmemoryDBAPITypes.h" +#include "InMemoryDBAPITypes.h" #include "Link.h" #include "LinkSchema.h" #include "Node.h" @@ -35,6 +35,27 @@ class AtomTrieValue : public HandleTrie::TrieValue { Atom* atom_; }; +// Helper class to store sets of atom handles in HandleTrie for pattern indexing +class PatternTrieValue : public HandleTrie::TrieValue { + public: + PatternTrieValue() {} + ~PatternTrieValue() override {} + void merge(HandleTrie::TrieValue* other) override { + // Merge sets when the same pattern handle is inserted multiple times + PatternTrieValue* other_value = dynamic_cast(other); + if (other_value != nullptr) { + atom_handles_.insert(other_value->atom_handles_.begin(), other_value->atom_handles_.end()); + } + } + void add_handle(const string& handle) { atom_handles_.insert(handle); } + void remove_handle(const string& handle) { atom_handles_.erase(handle); } + const set& get_handles() const { return atom_handles_; } + bool empty() const { return atom_handles_.empty(); } + + private: + set atom_handles_; +}; + // Helper functions and data structures for traverse callbacks namespace { struct QueryPatternData { @@ -69,7 +90,10 @@ bool re_index_visitor(HandleTrie::TrieNode* node, void* data) { } // namespace InMemoryDB::InMemoryDB(const string& context) - : context_(context), atoms_trie_(new HandleTrie(HANDLE_HASH_SIZE - 1)) {} + : context_(context), + atoms_trie_(new HandleTrie(HANDLE_HASH_SIZE - 1)), + pattern_index_trie_(new HandleTrie(HANDLE_HASH_SIZE - 1)), + incoming_sets_trie_(new HandleTrie(HANDLE_HASH_SIZE - 1)) {} InMemoryDB::~InMemoryDB() { // Traverse and delete all atoms @@ -84,6 +108,32 @@ InMemoryDB::~InMemoryDB() { }, nullptr); delete this->atoms_trie_; + + // Traverse and delete all pattern index entries + this->pattern_index_trie_->traverse( + false, + [](HandleTrie::TrieNode* node, void* data) -> bool { + if (node->value != nullptr) { + delete node->value; + node->value = nullptr; + } + return false; // Continue traversal + }, + nullptr); + delete this->pattern_index_trie_; + + // Traverse and delete all incoming set entries + this->incoming_sets_trie_->traverse( + false, + [](HandleTrie::TrieNode* node, void* data) -> bool { + if (node->value != nullptr) { + delete node->value; + node->value = nullptr; + } + return false; // Continue traversal + }, + nullptr); + delete this->incoming_sets_trie_; } bool InMemoryDB::allow_nested_indexing() { return false; } @@ -116,10 +166,11 @@ shared_ptr InMemoryDB::query_for_pattern(const LinkSchema& link_schem auto pattern_handle = local_schema.handle(); auto handle_set = make_shared(); - // Check if we have this pattern indexed - auto it = pattern_index_.find(pattern_handle); - if (it != pattern_index_.end()) { - for (const auto& handle : it->second) { + // Check if we have this pattern indexed in the HandleTrie + auto pattern_trie_value = + dynamic_cast(pattern_index_trie_->lookup(pattern_handle)); + if (pattern_trie_value != nullptr) { + for (const auto& handle : pattern_trie_value->get_handles()) { // Verify the atom still exists and matches the schema lock_guard trie_lock(trie_mutex_); auto trie_value = atoms_trie_->lookup(handle); @@ -163,9 +214,10 @@ shared_ptr InMemoryDB::query_for_targets(const string& handle) { shared_ptr InMemoryDB::query_for_incoming_set(const string& handle) { lock_guard lock(index_mutex_); auto handle_set = make_shared(); - auto it = incoming_sets_.find(handle); - if (it != incoming_sets_.end()) { - for (const auto& link_handle : it->second) { + auto incoming_set_trie_value = + dynamic_cast(this->incoming_sets_trie_->lookup(handle)); + if (incoming_set_trie_value != nullptr) { + for (const auto& link_handle : incoming_set_trie_value->get_handles()) { handle_set->add_handle(link_handle); } } @@ -454,85 +506,120 @@ bool InMemoryDB::delete_atom(const string& handle, bool delete_link_targets) { } bool InMemoryDB::delete_node(const string& handle, bool delete_link_targets) { - lock_guard trie_lock(trie_mutex_); - auto trie_value = atoms_trie_->lookup(handle); - if (trie_value == nullptr) { - return false; - } - auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value == nullptr) { - return false; - } - Atom* atom = atom_trie_value->get_atom(); - if (!Atom::is_node(*atom)) { - return false; - } + vector link_handles_to_delete; - // Check incoming set - if this node is referenced by links, handle accordingly - lock_guard index_lock(index_mutex_); - auto incoming_it = incoming_sets_.find(handle); - if (incoming_it != incoming_sets_.end() && !incoming_it->second.empty()) { - if (delete_link_targets) { - // Delete all links that reference this node - auto link_handles = incoming_it->second; - for (const auto& link_handle : link_handles) { - delete_link(link_handle, delete_link_targets); - } - } else { - // Cannot delete node that is referenced by links + { + lock_guard trie_lock(trie_mutex_); + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value == nullptr) { + return false; + } + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value == nullptr) { return false; } + Atom* atom = atom_trie_value->get_atom(); + if (!Atom::is_node(*atom)) { + return false; + } + + // Check incoming set - if this node is referenced by links, handle accordingly + lock_guard index_lock(index_mutex_); + auto incoming_set_trie_value = + dynamic_cast(this->incoming_sets_trie_->lookup(handle)); + if (incoming_set_trie_value != nullptr && !incoming_set_trie_value->empty()) { + if (delete_link_targets) { + // Collect all links that reference this node (copy the handles while holding the lock) + link_handles_to_delete = vector(incoming_set_trie_value->get_handles().begin(), + incoming_set_trie_value->get_handles().end()); + } else { + // Cannot delete node that is referenced by links + return false; + } + } + } + + // Release locks before calling delete_link to avoid deadlock + // Delete all links that reference this node + for (const auto& link_handle : link_handles_to_delete) { + delete_link(link_handle, delete_link_targets); } - // Clear the value in the trie (set to nullptr) - this->atoms_trie_->remove(handle); - incoming_sets_.erase(handle); + // Now delete the node itself + { + lock_guard trie_lock(trie_mutex_); + lock_guard index_lock(index_mutex_); + + // Verify the node still exists (it might have been deleted by delete_link if it was a target) + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value == nullptr) { + return true; // Already deleted + } + + // Clear the value in the trie (set to nullptr) + this->atoms_trie_->remove(handle); + this->incoming_sets_trie_->remove(handle); + } return true; } bool InMemoryDB::delete_link(const string& handle, bool delete_link_targets) { - lock_guard trie_lock(trie_mutex_); - auto trie_value = atoms_trie_->lookup(handle); - if (trie_value == nullptr) { - return false; - } - auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value == nullptr) { - return false; - } - Atom* atom = atom_trie_value->get_atom(); - if (!Atom::is_link(*atom)) { - return false; - } + Link* link = nullptr; + vector targets; + vector pattern_handles; + vector targets_to_delete; - Link* link = dynamic_cast(atom); - vector targets = link->targets; + { + lock_guard trie_lock(trie_mutex_); + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value == nullptr) { + return false; + } + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value == nullptr) { + return false; + } + Atom* atom = atom_trie_value->get_atom(); + if (!Atom::is_link(*atom)) { + return false; + } - lock_guard index_lock(index_mutex_); + link = dynamic_cast(atom); + targets = link->targets; - // Update incoming sets for each target - for (const auto& target_handle : targets) { - this->delete_incoming_set(target_handle, handle); + lock_guard index_lock(index_mutex_); - if (delete_link_targets) { - // Check if target has other incoming links - auto incoming_it = incoming_sets_.find(target_handle); - if (incoming_it == incoming_sets_.end() || incoming_it->second.empty()) { - // No other references, delete the target - this->delete_atom(target_handle, delete_link_targets); + // Update incoming sets for each target + for (const auto& target_handle : targets) { + this->delete_incoming_set(target_handle, handle); + + if (delete_link_targets) { + // Check if target has other incoming links + auto incoming_set_trie_value = + dynamic_cast(this->incoming_sets_trie_->lookup(target_handle)); + if (incoming_set_trie_value == nullptr || incoming_set_trie_value->empty()) { + // No other references, mark for deletion + targets_to_delete.push_back(target_handle); + } } } - } - // Remove from pattern index - auto pattern_handles = this->match_pattern_index_schema(link); - for (const auto& pattern_handle : pattern_handles) { - this->delete_pattern(pattern_handle, handle); + // Remove from pattern index + pattern_handles = this->match_pattern_index_schema(link); + for (const auto& pattern_handle : pattern_handles) { + this->delete_pattern(pattern_handle, handle); + } + + // Clear the value in the trie (set to nullptr) + this->atoms_trie_->remove(handle); } - // Clear the value in the trie (set to nullptr) - this->atoms_trie_->remove(handle); + // Release locks before calling delete_atom to avoid deadlock + // Delete targets that have no other incoming links + for (const auto& target_handle : targets_to_delete) { + this->delete_atom(target_handle, delete_link_targets); + } return true; } @@ -572,7 +659,19 @@ void InMemoryDB::re_index_patterns(bool flush_patterns) { lock_guard index_lock(this->index_mutex_); if (flush_patterns) { - this->pattern_index_.clear(); + // Clear all pattern index entries by deleting and recreating the trie + this->pattern_index_trie_->traverse( + false, + [](HandleTrie::TrieNode* node, void* data) -> bool { + if (node->value != nullptr) { + delete node->value; + node->value = nullptr; + } + return false; // Continue traversal + }, + nullptr); + delete this->pattern_index_trie_; + this->pattern_index_trie_ = new HandleTrie(HANDLE_HASH_SIZE - 1); } // Re-index all links @@ -583,29 +682,53 @@ void InMemoryDB::re_index_patterns(bool flush_patterns) { // Helper methods void InMemoryDB::add_pattern(const string& pattern_handle, const string& atom_handle) { - this->pattern_index_[pattern_handle].insert(atom_handle); + auto pattern_trie_value = + dynamic_cast(this->pattern_index_trie_->lookup(pattern_handle)); + if (pattern_trie_value == nullptr) { + // Create new PatternTrieValue + pattern_trie_value = new PatternTrieValue(); + pattern_trie_value->add_handle(atom_handle); + this->pattern_index_trie_->insert(pattern_handle, pattern_trie_value); + } else { + // Add to existing set + pattern_trie_value->add_handle(atom_handle); + } } void InMemoryDB::delete_pattern(const string& pattern_handle, const string& atom_handle) { - auto it = this->pattern_index_.find(pattern_handle); - if (it != this->pattern_index_.end()) { - it->second.erase(atom_handle); - if (it->second.empty()) { - this->pattern_index_.erase(it); + auto pattern_trie_value = + dynamic_cast(this->pattern_index_trie_->lookup(pattern_handle)); + if (pattern_trie_value != nullptr) { + pattern_trie_value->remove_handle(atom_handle); + if (pattern_trie_value->empty()) { + // Remove the pattern entry from the trie + this->pattern_index_trie_->remove(pattern_handle); } } } void InMemoryDB::add_incoming_set(const string& target_handle, const string& link_handle) { - this->incoming_sets_[target_handle].insert(link_handle); + auto incoming_set_trie_value = + dynamic_cast(this->incoming_sets_trie_->lookup(target_handle)); + if (incoming_set_trie_value == nullptr) { + // Create new PatternTrieValue + incoming_set_trie_value = new PatternTrieValue(); + incoming_set_trie_value->add_handle(link_handle); + this->incoming_sets_trie_->insert(target_handle, incoming_set_trie_value); + } else { + // Add to existing set + incoming_set_trie_value->add_handle(link_handle); + } } void InMemoryDB::delete_incoming_set(const string& target_handle, const string& link_handle) { - auto it = this->incoming_sets_.find(target_handle); - if (it != this->incoming_sets_.end()) { - it->second.erase(link_handle); - if (it->second.empty()) { - this->incoming_sets_.erase(it); + auto incoming_set_trie_value = + dynamic_cast(this->incoming_sets_trie_->lookup(target_handle)); + if (incoming_set_trie_value != nullptr) { + incoming_set_trie_value->remove_handle(link_handle); + if (incoming_set_trie_value->empty()) { + // Remove the incoming set entry from the trie + this->incoming_sets_trie_->remove(target_handle); } } } diff --git a/src/atomdb/inmemorydb/InMemoryDB.h b/src/atomdb/inmemorydb/InMemoryDB.h index 1d8b2c71e..51af7f11d 100644 --- a/src/atomdb/inmemorydb/InMemoryDB.h +++ b/src/atomdb/inmemorydb/InMemoryDB.h @@ -8,7 +8,7 @@ #include "AtomDB.h" #include "HandleTrie.h" -#include "InmemoryDBAPITypes.h" +#include "InMemoryDBAPITypes.h" #include "LinkSchema.h" using namespace std; @@ -82,9 +82,9 @@ class InMemoryDB : public AtomDB { private: string context_; - HandleTrie* atoms_trie_; // Stores handle -> Atom* - map> pattern_index_; // pattern_handle -> set of atom handles - map> incoming_sets_; // target_handle -> set of link handles that reference it + HandleTrie* atoms_trie_; // Stores handle -> Atom* + HandleTrie* pattern_index_trie_; // Stores pattern_handle -> set of atom handles + HandleTrie* incoming_sets_trie_; // Stores target_handle -> set of link handles that reference it mutex trie_mutex_; mutex index_mutex_; diff --git a/src/atomdb/inmemorydb/InmemoryDBAPITypes.cc b/src/atomdb/inmemorydb/InmemoryDBAPITypes.cc index 3225b5ae5..681bbcbe3 100644 --- a/src/atomdb/inmemorydb/InmemoryDBAPITypes.cc +++ b/src/atomdb/inmemorydb/InmemoryDBAPITypes.cc @@ -1,4 +1,4 @@ -#include "InmemoryDBAPITypes.h" +#include "InMemoryDBAPITypes.h" #include diff --git a/src/tests/cpp/inmemorydb_test.cc b/src/tests/cpp/inmemorydb_test.cc index 069769781..f71851a28 100644 --- a/src/tests/cpp/inmemorydb_test.cc +++ b/src/tests/cpp/inmemorydb_test.cc @@ -265,6 +265,254 @@ TEST_F(InMemoryDBTest, QueryForTargetsAfterDeletion) { EXPECT_EQ(targets, nullptr); } +TEST_F(InMemoryDBTest, QueryForIncomingSet) { + auto human = new Node("Symbol", "\"human\""); + auto monkey = new Node("Symbol", "\"monkey\""); + auto chimp = new Node("Symbol", "\"chimp\""); + auto mammal = new Node("Symbol", "\"mammal\""); + auto similarity = new Node("Symbol", "Similarity"); + auto inheritance = new Node("Symbol", "Inheritance"); + + string human_handle = db->add_node(human, false); + string monkey_handle = db->add_node(monkey, false); + string chimp_handle = db->add_node(chimp, false); + string mammal_handle = db->add_node(mammal, false); + string similarity_handle = db->add_node(similarity, false); + string inheritance_handle = db->add_node(inheritance, false); + + // Create links that reference human + auto link1 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + auto link2 = new Link("Expression", {similarity_handle, human_handle, chimp_handle}); + auto link3 = new Link("Expression", {inheritance_handle, human_handle, mammal_handle}); + + string link1_handle = db->add_link(link1, false); + string link2_handle = db->add_link(link2, false); + string link3_handle = db->add_link(link3, false); + + // Query incoming set for human + auto incoming_set = db->query_for_incoming_set(human_handle); + EXPECT_EQ(incoming_set->size(), 3); + + // Verify we got the expected link handles + auto it = incoming_set->get_iterator(); + char* handle; + vector handles; + while ((handle = it->next()) != nullptr) { + handles.push_back(handle); + } + + EXPECT_TRUE(find(handles.begin(), handles.end(), link1_handle) != handles.end()); + EXPECT_TRUE(find(handles.begin(), handles.end(), link2_handle) != handles.end()); + EXPECT_TRUE(find(handles.begin(), handles.end(), link3_handle) != handles.end()); + + // Query incoming set for monkey (should have 1 link) + auto monkey_incoming = db->query_for_incoming_set(monkey_handle); + EXPECT_EQ(monkey_incoming->size(), 1); + + // Query incoming set for mammal (should have 1 link) + auto mammal_incoming = db->query_for_incoming_set(mammal_handle); + EXPECT_EQ(mammal_incoming->size(), 1); + + // Query incoming set for non-existent node (should be empty) + string non_existent_handle = "00000000000000000000000000000000"; + auto non_existent_incoming = db->query_for_incoming_set(non_existent_handle); + EXPECT_EQ(non_existent_incoming->size(), 0); +} + +TEST_F(InMemoryDBTest, QueryForIncomingSetAfterDeletion) { + auto human = new Node("Symbol", "\"human\""); + auto monkey = new Node("Symbol", "\"monkey\""); + auto similarity = new Node("Symbol", "Similarity"); + + string human_handle = db->add_node(human, false); + string monkey_handle = db->add_node(monkey, false); + string similarity_handle = db->add_node(similarity, false); + + auto link1 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + string link1_handle = db->add_link(link1, false); + + // Verify incoming set before deletion + auto incoming_set = db->query_for_incoming_set(human_handle); + EXPECT_EQ(incoming_set->size(), 1); + + // Delete the link + db->delete_link(link1_handle, false); + + // Verify incoming set is now empty + incoming_set = db->query_for_incoming_set(human_handle); + EXPECT_EQ(incoming_set->size(), 0); +} + +TEST_F(InMemoryDBTest, DeleteAtom) { + auto human = new Node("Symbol", "\"human\""); + auto monkey = new Node("Symbol", "\"monkey\""); + auto similarity = new Node("Symbol", "Similarity"); + + string human_handle = db->add_node(human, false); + string monkey_handle = db->add_node(monkey, false); + string similarity_handle = db->add_node(similarity, false); + + // Create a link that references human + auto link1 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + string link1_handle = db->add_link(link1, false); + + // Try to delete human atom with delete_link_targets=false (should fail) + bool deleted = db->delete_atom(human_handle, false); + EXPECT_FALSE(deleted); + + // Verify human still exists + EXPECT_TRUE(db->node_exists(human_handle)); + EXPECT_TRUE(db->link_exists(link1_handle)); + + // Create a link that references human + auto link2 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + string link2_handle = db->add_link(link2, false); + + // Delete human atom with delete_link_targets=true (should succeed and delete the link) + deleted = db->delete_atom(human_handle, true); + EXPECT_TRUE(deleted); + + // Verify human is deleted + EXPECT_FALSE(db->node_exists(human_handle)); + + // Verify the link is also deleted + EXPECT_FALSE(db->link_exists(link2_handle)); +} + +TEST_F(InMemoryDBTest, DeleteNode) { + auto human = new Node("Symbol", "\"human\""); + auto monkey = new Node("Symbol", "\"monkey\""); + auto similarity = new Node("Symbol", "Similarity"); + + string human_handle = db->add_node(human, false); + string monkey_handle = db->add_node(monkey, false); + string similarity_handle = db->add_node(similarity, false); + + // Create a link that references human + auto link1 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + string link1_handle = db->add_link(link1, false); + + // Try to delete human with delete_link_targets=false (should fail) + bool deleted = db->delete_node(human_handle, false); + EXPECT_FALSE(deleted); + + // Verify human still exists + EXPECT_TRUE(db->node_exists(human_handle)); + EXPECT_TRUE(db->link_exists(link1_handle)); + + // Verify incoming set still has the link + auto incoming_set = db->query_for_incoming_set(human_handle); + EXPECT_EQ(incoming_set->size(), 1); + + // Create a link that references human + auto link2 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + string link2_handle = db->add_link(link2, false); + + // Delete human with delete_link_targets=true (should succeed and delete the link) + deleted = db->delete_node(human_handle, true); + EXPECT_TRUE(deleted); + + // Verify human is deleted + EXPECT_FALSE(db->node_exists(human_handle)); + + // Verify the link is also deleted + EXPECT_FALSE(db->link_exists(link2_handle)); + + // Verify incoming set is empty + incoming_set = db->query_for_incoming_set(human_handle); + EXPECT_EQ(incoming_set->size(), 0); +} + +TEST_F(InMemoryDBTest, DeleteLink) { + auto human = new Node("Symbol", "\"human\""); + auto monkey = new Node("Symbol", "\"monkey\""); + auto similarity = new Node("Symbol", "Similarity"); + + string human_handle = db->add_node(human, false); + string monkey_handle = db->add_node(monkey, false); + string similarity_handle = db->add_node(similarity, false); + + // Create a link that references human and monkey + auto link1 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + string link1_handle = db->add_link(link1, false); + + // Delete link with delete_link_targets=false (should succeed, targets remain) + bool deleted = db->delete_link(link1_handle, false); + EXPECT_TRUE(deleted); + + // Verify link is deleted + EXPECT_FALSE(db->link_exists(link1_handle)); + + // Verify targets still exist + EXPECT_TRUE(db->node_exists(human_handle)); + EXPECT_TRUE(db->node_exists(monkey_handle)); + EXPECT_TRUE(db->node_exists(similarity_handle)); + + // Verify incoming sets are empty + auto human_incoming = db->query_for_incoming_set(human_handle); + EXPECT_EQ(human_incoming->size(), 0); + auto monkey_incoming = db->query_for_incoming_set(monkey_handle); + EXPECT_EQ(monkey_incoming->size(), 0); + + // Create a link that references human and monkey + auto link2 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + string link2_handle = db->add_link(link2, false); + + // Delete link with delete_link_targets=true (should delete targets if no other references) + deleted = db->delete_link(link2_handle, true); + EXPECT_TRUE(deleted); + + // Verify link is deleted + EXPECT_FALSE(db->link_exists(link2_handle)); + + // Verify targets are deleted (they had no other incoming links) + EXPECT_FALSE(db->node_exists(similarity_handle)); + EXPECT_FALSE(db->node_exists(human_handle)); + EXPECT_FALSE(db->node_exists(monkey_handle)); +} + +TEST_F(InMemoryDBTest, DeleteLinkMultipleReferences) { + auto human = new Node("Symbol", "\"human\""); + auto monkey = new Node("Symbol", "\"monkey\""); + auto chimp = new Node("Symbol", "\"chimp\""); + auto similarity = new Node("Symbol", "Similarity"); + + string human_handle = db->add_node(human, false); + string monkey_handle = db->add_node(monkey, false); + string chimp_handle = db->add_node(chimp, false); + string similarity_handle = db->add_node(similarity, false); + + // Create two links that both reference human + auto link1 = new Link("Expression", {similarity_handle, human_handle, monkey_handle}); + auto link2 = new Link("Expression", {similarity_handle, human_handle, chimp_handle}); + string link1_handle = db->add_link(link1, false); + string link2_handle = db->add_link(link2, false); + + // Verify human has 2 incoming links + auto human_incoming = db->query_for_incoming_set(human_handle); + EXPECT_EQ(human_incoming->size(), 2); + + // Delete link1 with delete_link_targets=true + bool deleted = db->delete_link(link1_handle, true); + EXPECT_TRUE(deleted); + + // Verify link1 is deleted + EXPECT_FALSE(db->link_exists(link1_handle)); + + // Verify human still exists (has another incoming link) + EXPECT_TRUE(db->node_exists(human_handle)); + + // Verify monkey is deleted (no other references) + EXPECT_FALSE(db->node_exists(monkey_handle)); + + // Verify human now has only 1 incoming link + human_incoming = db->query_for_incoming_set(human_handle); + EXPECT_EQ(human_incoming->size(), 1); + + // Verify link2 still exists + EXPECT_TRUE(db->link_exists(link2_handle)); +} + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); From 51ca6c2bb46ce3c12241f8430f34ac91a9ba10d4 Mon Sep 17 00:00:00 2001 From: Artur Gontijo Date: Tue, 27 Jan 2026 20:31:09 -0300 Subject: [PATCH 4/6] Force git update file name. --- src/atomdb/inmemorydb/InmemoryDBAPITypes.cc | 112 -------------------- src/atomdb/inmemorydb/InmemoryDBAPITypes.h | 69 ------------ 2 files changed, 181 deletions(-) delete mode 100644 src/atomdb/inmemorydb/InmemoryDBAPITypes.cc delete mode 100644 src/atomdb/inmemorydb/InmemoryDBAPITypes.h diff --git a/src/atomdb/inmemorydb/InmemoryDBAPITypes.cc b/src/atomdb/inmemorydb/InmemoryDBAPITypes.cc deleted file mode 100644 index 681bbcbe3..000000000 --- a/src/atomdb/inmemorydb/InmemoryDBAPITypes.cc +++ /dev/null @@ -1,112 +0,0 @@ -#include "InMemoryDBAPITypes.h" - -#include - -#include "Utils.h" - -using namespace atomdb; -using namespace atomdb_api_types; -using namespace commons; -using namespace std; - -// HandleSetInMemory -HandleSetInMemory::HandleSetInMemory() : HandleSet() {} - -HandleSetInMemory::~HandleSetInMemory() {} - -unsigned int HandleSetInMemory::size() { return handles.size(); } - -void HandleSetInMemory::append(shared_ptr other) { - auto handle_set_inmemory = dynamic_pointer_cast(other); - if (handle_set_inmemory) { - for (const auto& handle : handle_set_inmemory->handles) { - handles.insert(handle); - } - // Merge metta expressions and assignments - for (const auto& [handle, exprs] : handle_set_inmemory->metta_expressions_by_handle) { - metta_expressions_by_handle[handle] = exprs; - } - for (const auto& [handle, assignment] : handle_set_inmemory->assignments_by_handle) { - assignments_by_handle[handle] = assignment; - } - } -} - -shared_ptr HandleSetInMemory::get_iterator() { - shared_ptr it(new HandleSetInMemoryIterator(this)); - return it; -} - -map HandleSetInMemory::get_metta_expressions_by_handle(const string& handle) { - auto it = metta_expressions_by_handle.find(handle); - if (it != metta_expressions_by_handle.end()) { - return it->second; - } - return {}; -} - -Assignment HandleSetInMemory::get_assignments_by_handle(const string& handle) { - auto it = assignments_by_handle.find(handle); - if (it != assignments_by_handle.end()) { - return it->second; - } - return Assignment(); -} - -void HandleSetInMemory::add_handle(const string& handle) { handles.insert(handle); } - -// HandleSetInMemoryIterator -HandleSetInMemoryIterator::HandleSetInMemoryIterator(HandleSetInMemory* handle_set) - : handle_set(handle_set), it(handle_set->handles.begin()) {} - -HandleSetInMemoryIterator::~HandleSetInMemoryIterator() { - for (auto ptr : allocated_strings) { - delete[] ptr; - } -} - -char* HandleSetInMemoryIterator::next() { - if (it == handle_set->handles.end()) { - return nullptr; - } - string handle = *it; - ++it; - char* handle_cstr = new char[handle.size() + 1]; - strcpy(handle_cstr, handle.c_str()); - allocated_strings.push_back(handle_cstr); - return handle_cstr; -} - -// HandleListInMemory -HandleListInMemory::HandleListInMemory() : HandleList() {} - -HandleListInMemory::HandleListInMemory(const vector& handles) : HandleList(), handles(handles) { - for (const auto& handle : handles) { - char* handle_cstr = new char[handle.size() + 1]; - strcpy(handle_cstr, handle.c_str()); - allocated_strings.push_back(handle_cstr); - } -} - -HandleListInMemory::~HandleListInMemory() { - for (auto ptr : allocated_strings) { - delete[] ptr; - } -} - -const char* HandleListInMemory::get_handle(unsigned int index) { - if (index >= handles.size()) { - Utils::error("Handle index out of bounds: " + to_string(index) + - " Answer handles size: " + to_string(handles.size())); - } - return allocated_strings[index]; -} - -unsigned int HandleListInMemory::size() { return handles.size(); } - -void HandleListInMemory::add_handle(const string& handle) { - handles.push_back(handle); - char* handle_cstr = new char[handle.size() + 1]; - strcpy(handle_cstr, handle.c_str()); - allocated_strings.push_back(handle_cstr); -} diff --git a/src/atomdb/inmemorydb/InmemoryDBAPITypes.h b/src/atomdb/inmemorydb/InmemoryDBAPITypes.h deleted file mode 100644 index 8899a64e3..000000000 --- a/src/atomdb/inmemorydb/InmemoryDBAPITypes.h +++ /dev/null @@ -1,69 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "Assignment.h" -#include "AtomDBAPITypes.h" - -using namespace std; -using namespace commons; - -namespace atomdb { -namespace atomdb_api_types { - -class HandleSetInMemory : public HandleSet { - friend class HandleSetInMemoryIterator; - - public: - HandleSetInMemory(); - ~HandleSetInMemory(); - - unsigned int size() override; - void append(shared_ptr other) override; - shared_ptr get_iterator() override; - - map get_metta_expressions_by_handle(const string& handle) override; - Assignment get_assignments_by_handle(const string& handle) override; - - void add_handle(const string& handle); - - private: - set handles; - map> metta_expressions_by_handle; - map assignments_by_handle; -}; - -class HandleSetInMemoryIterator : public HandleSetIterator { - public: - HandleSetInMemoryIterator(HandleSetInMemory* handle_set); - ~HandleSetInMemoryIterator(); - - char* next() override; - - private: - HandleSetInMemory* handle_set; - set::iterator it; - vector allocated_strings; -}; - -class HandleListInMemory : public HandleList { - public: - HandleListInMemory(); - HandleListInMemory(const vector& handles); - ~HandleListInMemory(); - - const char* get_handle(unsigned int index) override; - unsigned int size() override; - - void add_handle(const string& handle); - - private: - vector handles; - vector allocated_strings; -}; - -} // namespace atomdb_api_types -} // namespace atomdb From b9944b36835b285bf9e44f560cd6f49973480af3 Mon Sep 17 00:00:00 2001 From: Artur Gontijo Date: Tue, 27 Jan 2026 20:31:52 -0300 Subject: [PATCH 5/6] Force git update file name. --- src/atomdb/inmemorydb/InMemoryDBAPITypes.cc | 112 ++++++++++++++++++++ src/atomdb/inmemorydb/InMemoryDBAPITypes.h | 69 ++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 src/atomdb/inmemorydb/InMemoryDBAPITypes.cc create mode 100644 src/atomdb/inmemorydb/InMemoryDBAPITypes.h diff --git a/src/atomdb/inmemorydb/InMemoryDBAPITypes.cc b/src/atomdb/inmemorydb/InMemoryDBAPITypes.cc new file mode 100644 index 000000000..681bbcbe3 --- /dev/null +++ b/src/atomdb/inmemorydb/InMemoryDBAPITypes.cc @@ -0,0 +1,112 @@ +#include "InMemoryDBAPITypes.h" + +#include + +#include "Utils.h" + +using namespace atomdb; +using namespace atomdb_api_types; +using namespace commons; +using namespace std; + +// HandleSetInMemory +HandleSetInMemory::HandleSetInMemory() : HandleSet() {} + +HandleSetInMemory::~HandleSetInMemory() {} + +unsigned int HandleSetInMemory::size() { return handles.size(); } + +void HandleSetInMemory::append(shared_ptr other) { + auto handle_set_inmemory = dynamic_pointer_cast(other); + if (handle_set_inmemory) { + for (const auto& handle : handle_set_inmemory->handles) { + handles.insert(handle); + } + // Merge metta expressions and assignments + for (const auto& [handle, exprs] : handle_set_inmemory->metta_expressions_by_handle) { + metta_expressions_by_handle[handle] = exprs; + } + for (const auto& [handle, assignment] : handle_set_inmemory->assignments_by_handle) { + assignments_by_handle[handle] = assignment; + } + } +} + +shared_ptr HandleSetInMemory::get_iterator() { + shared_ptr it(new HandleSetInMemoryIterator(this)); + return it; +} + +map HandleSetInMemory::get_metta_expressions_by_handle(const string& handle) { + auto it = metta_expressions_by_handle.find(handle); + if (it != metta_expressions_by_handle.end()) { + return it->second; + } + return {}; +} + +Assignment HandleSetInMemory::get_assignments_by_handle(const string& handle) { + auto it = assignments_by_handle.find(handle); + if (it != assignments_by_handle.end()) { + return it->second; + } + return Assignment(); +} + +void HandleSetInMemory::add_handle(const string& handle) { handles.insert(handle); } + +// HandleSetInMemoryIterator +HandleSetInMemoryIterator::HandleSetInMemoryIterator(HandleSetInMemory* handle_set) + : handle_set(handle_set), it(handle_set->handles.begin()) {} + +HandleSetInMemoryIterator::~HandleSetInMemoryIterator() { + for (auto ptr : allocated_strings) { + delete[] ptr; + } +} + +char* HandleSetInMemoryIterator::next() { + if (it == handle_set->handles.end()) { + return nullptr; + } + string handle = *it; + ++it; + char* handle_cstr = new char[handle.size() + 1]; + strcpy(handle_cstr, handle.c_str()); + allocated_strings.push_back(handle_cstr); + return handle_cstr; +} + +// HandleListInMemory +HandleListInMemory::HandleListInMemory() : HandleList() {} + +HandleListInMemory::HandleListInMemory(const vector& handles) : HandleList(), handles(handles) { + for (const auto& handle : handles) { + char* handle_cstr = new char[handle.size() + 1]; + strcpy(handle_cstr, handle.c_str()); + allocated_strings.push_back(handle_cstr); + } +} + +HandleListInMemory::~HandleListInMemory() { + for (auto ptr : allocated_strings) { + delete[] ptr; + } +} + +const char* HandleListInMemory::get_handle(unsigned int index) { + if (index >= handles.size()) { + Utils::error("Handle index out of bounds: " + to_string(index) + + " Answer handles size: " + to_string(handles.size())); + } + return allocated_strings[index]; +} + +unsigned int HandleListInMemory::size() { return handles.size(); } + +void HandleListInMemory::add_handle(const string& handle) { + handles.push_back(handle); + char* handle_cstr = new char[handle.size() + 1]; + strcpy(handle_cstr, handle.c_str()); + allocated_strings.push_back(handle_cstr); +} diff --git a/src/atomdb/inmemorydb/InMemoryDBAPITypes.h b/src/atomdb/inmemorydb/InMemoryDBAPITypes.h new file mode 100644 index 000000000..8899a64e3 --- /dev/null +++ b/src/atomdb/inmemorydb/InMemoryDBAPITypes.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include + +#include "Assignment.h" +#include "AtomDBAPITypes.h" + +using namespace std; +using namespace commons; + +namespace atomdb { +namespace atomdb_api_types { + +class HandleSetInMemory : public HandleSet { + friend class HandleSetInMemoryIterator; + + public: + HandleSetInMemory(); + ~HandleSetInMemory(); + + unsigned int size() override; + void append(shared_ptr other) override; + shared_ptr get_iterator() override; + + map get_metta_expressions_by_handle(const string& handle) override; + Assignment get_assignments_by_handle(const string& handle) override; + + void add_handle(const string& handle); + + private: + set handles; + map> metta_expressions_by_handle; + map assignments_by_handle; +}; + +class HandleSetInMemoryIterator : public HandleSetIterator { + public: + HandleSetInMemoryIterator(HandleSetInMemory* handle_set); + ~HandleSetInMemoryIterator(); + + char* next() override; + + private: + HandleSetInMemory* handle_set; + set::iterator it; + vector allocated_strings; +}; + +class HandleListInMemory : public HandleList { + public: + HandleListInMemory(); + HandleListInMemory(const vector& handles); + ~HandleListInMemory(); + + const char* get_handle(unsigned int index) override; + unsigned int size() override; + + void add_handle(const string& handle); + + private: + vector handles; + vector allocated_strings; +}; + +} // namespace atomdb_api_types +} // namespace atomdb From 63f4e40ddd869873ab022a311386672961d91bc1 Mon Sep 17 00:00:00 2001 From: Artur Gontijo Date: Sat, 31 Jan 2026 09:50:35 -0300 Subject: [PATCH 6/6] Code review. --- src/atomdb/inmemorydb/InMemoryDB.cc | 398 +++++++++++----------------- src/atomdb/inmemorydb/InMemoryDB.h | 5 - 2 files changed, 158 insertions(+), 245 deletions(-) diff --git a/src/atomdb/inmemorydb/InMemoryDB.cc b/src/atomdb/inmemorydb/InMemoryDB.cc index e0b84e1a9..45c43cbcc 100644 --- a/src/atomdb/inmemorydb/InMemoryDB.cc +++ b/src/atomdb/inmemorydb/InMemoryDB.cc @@ -27,7 +27,7 @@ class AtomTrieValue : public HandleTrie::TrieValue { // For now, just replace (could be enhanced later) delete atom_; atom_ = dynamic_cast(other)->atom_; - dynamic_cast(other)->atom_ = nullptr; // Prevent double delete + dynamic_cast(other)->atom_ = NULL; // Prevent double delete } Atom* get_atom() { return atom_; } @@ -35,44 +35,38 @@ class AtomTrieValue : public HandleTrie::TrieValue { Atom* atom_; }; -// Helper class to store sets of atom handles in HandleTrie for pattern indexing -class PatternTrieValue : public HandleTrie::TrieValue { +// Helper class to store sets of atom handles in HandleTrie for pattern/incoming set indexing +class HandleSetTrieValue : public HandleTrie::TrieValue { public: - PatternTrieValue() {} - ~PatternTrieValue() override {} + HandleSetTrieValue() {} + ~HandleSetTrieValue() override {} void merge(HandleTrie::TrieValue* other) override { - // Merge sets when the same pattern handle is inserted multiple times - PatternTrieValue* other_value = dynamic_cast(other); - if (other_value != nullptr) { - atom_handles_.insert(other_value->atom_handles_.begin(), other_value->atom_handles_.end()); + // Merge sets when the same handle is inserted multiple times + HandleSetTrieValue* other_value = dynamic_cast(other); + if (other_value != NULL) { + handle_set_.insert(other_value->handle_set_.begin(), other_value->handle_set_.end()); } } - void add_handle(const string& handle) { atom_handles_.insert(handle); } - void remove_handle(const string& handle) { atom_handles_.erase(handle); } - const set& get_handles() const { return atom_handles_; } - bool empty() const { return atom_handles_.empty(); } + void add_handle(const string& handle) { handle_set_.insert(handle); } + void remove_handle(const string& handle) { handle_set_.erase(handle); } + const set& get_handles() const { return handle_set_; } + bool empty() const { return handle_set_.empty(); } private: - set atom_handles_; + set handle_set_; }; // Helper functions and data structures for traverse callbacks namespace { -struct QueryPatternData { - InMemoryDB* db; - LinkSchema* link_schema; - HandleSetInMemory* handle_set; -}; - struct ReIndexData { InMemoryDB* db; }; bool re_index_visitor(HandleTrie::TrieNode* node, void* data) { ReIndexData* index_data = static_cast(data); - if (node->value != nullptr) { + if (node->value != NULL) { auto atom_trie_value = dynamic_cast(node->value); - if (atom_trie_value != nullptr) { + if (atom_trie_value != NULL) { Atom* atom = atom_trie_value->get_atom(); if (Atom::is_link(*atom)) { Link* link = dynamic_cast(atom); @@ -100,52 +94,51 @@ InMemoryDB::~InMemoryDB() { this->atoms_trie_->traverse( false, [](HandleTrie::TrieNode* node, void* data) -> bool { - if (node->value != nullptr) { + if (node->value != NULL) { delete node->value; - node->value = nullptr; + node->value = NULL; } return false; // Continue traversal }, - nullptr); + NULL); delete this->atoms_trie_; // Traverse and delete all pattern index entries this->pattern_index_trie_->traverse( false, [](HandleTrie::TrieNode* node, void* data) -> bool { - if (node->value != nullptr) { + if (node->value != NULL) { delete node->value; - node->value = nullptr; + node->value = NULL; } return false; // Continue traversal }, - nullptr); + NULL); delete this->pattern_index_trie_; // Traverse and delete all incoming set entries this->incoming_sets_trie_->traverse( false, [](HandleTrie::TrieNode* node, void* data) -> bool { - if (node->value != nullptr) { + if (node->value != NULL) { delete node->value; - node->value = nullptr; + node->value = NULL; } return false; // Continue traversal }, - nullptr); + NULL); delete this->incoming_sets_trie_; } bool InMemoryDB::allow_nested_indexing() { return false; } shared_ptr InMemoryDB::get_atom(const string& handle) { - lock_guard lock(trie_mutex_); auto trie_value = this->atoms_trie_->lookup(handle); - if (trie_value == nullptr) { + if (trie_value == NULL) { return nullptr; } auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value == nullptr) { + if (atom_trie_value == NULL) { return nullptr; } // Clone the atom to return a shared_ptr (caller doesn't own the original) @@ -160,33 +153,14 @@ shared_ptr InMemoryDB::get_atom(const string& handle) { } shared_ptr InMemoryDB::query_for_pattern(const LinkSchema& link_schema) { - lock_guard lock(index_mutex_); - // Create a non-const copy to call match() which is non-const - LinkSchema local_schema(link_schema); - auto pattern_handle = local_schema.handle(); auto handle_set = make_shared(); // Check if we have this pattern indexed in the HandleTrie auto pattern_trie_value = - dynamic_cast(pattern_index_trie_->lookup(pattern_handle)); - if (pattern_trie_value != nullptr) { + dynamic_cast(pattern_index_trie_->lookup(link_schema.handle())); + if (pattern_trie_value != NULL) { for (const auto& handle : pattern_trie_value->get_handles()) { - // Verify the atom still exists and matches the schema - lock_guard trie_lock(trie_mutex_); - auto trie_value = atoms_trie_->lookup(handle); - if (trie_value != nullptr) { - auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value != nullptr) { - Atom* atom = atom_trie_value->get_atom(); - if (Atom::is_link(*atom)) { - Assignment assignment; - Link* link = dynamic_cast(atom); - if (local_schema.match(*link, assignment, *this)) { - handle_set->add_handle(handle); - } - } - } - } + handle_set->add_handle(handle); } } @@ -194,13 +168,12 @@ shared_ptr InMemoryDB::query_for_pattern(const LinkSchema& link_schem } shared_ptr InMemoryDB::query_for_targets(const string& handle) { - lock_guard lock(trie_mutex_); auto trie_value = atoms_trie_->lookup(handle); - if (trie_value == nullptr) { + if (trie_value == NULL) { return nullptr; } auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value == nullptr) { + if (atom_trie_value == NULL) { return nullptr; } Atom* atom = atom_trie_value->get_atom(); @@ -212,11 +185,10 @@ shared_ptr InMemoryDB::query_for_targets(const string& handle) { } shared_ptr InMemoryDB::query_for_incoming_set(const string& handle) { - lock_guard lock(index_mutex_); auto handle_set = make_shared(); auto incoming_set_trie_value = - dynamic_cast(this->incoming_sets_trie_->lookup(handle)); - if (incoming_set_trie_value != nullptr) { + dynamic_cast(this->incoming_sets_trie_->lookup(handle)); + if (incoming_set_trie_value != NULL) { for (const auto& link_handle : incoming_set_trie_value->get_handles()) { handle_set->add_handle(link_handle); } @@ -226,56 +198,59 @@ shared_ptr InMemoryDB::query_for_incoming_set(const string& handle) { // Stub implementations for AtomDocument methods (to be implemented later) shared_ptr InMemoryDB::get_atom_document(const string& handle) { - // TODO: Implement in second phase + // TODO: Must be removed from AtomDB.h + Utils::error("get_atom_document is not implemented for InMemoryDB"); return nullptr; } shared_ptr InMemoryDB::get_node_document(const string& handle) { - // TODO: Implement in second phase + // TODO: Must be removed from AtomDB.h + Utils::error("get_node_document is not implemented for InMemoryDB"); return nullptr; } shared_ptr InMemoryDB::get_link_document(const string& handle) { - // TODO: Implement in second phase + // TODO: Must be removed from AtomDB.h + Utils::error("get_link_document is not implemented for InMemoryDB"); return nullptr; } vector> InMemoryDB::get_atom_documents(const vector& handles, const vector& fields) { - // TODO: Implement in second phase + // TODO: Must be removed from AtomDB.h + Utils::error("get_atom_documents is not implemented for InMemoryDB"); return {}; } vector> InMemoryDB::get_node_documents(const vector& handles, const vector& fields) { - // TODO: Implement in second phase + // TODO: Must be removed from AtomDB.h + Utils::error("get_node_documents is not implemented for InMemoryDB"); return {}; } vector> InMemoryDB::get_link_documents(const vector& handles, const vector& fields) { - // TODO: Implement in second phase + // TODO: Must be removed from AtomDB.h + Utils::error("get_link_documents is not implemented for InMemoryDB"); return {}; } vector> InMemoryDB::get_matching_atoms(bool is_toplevel, Atom& key) { - // TODO: Implement in second phase + // TODO: Must be removed from AtomDB.h + Utils::error("get_matching_atoms is not implemented for InMemoryDB"); return {}; } -bool InMemoryDB::atom_exists(const string& handle) { - lock_guard lock(trie_mutex_); - return atoms_trie_->lookup(handle) != nullptr; -} +bool InMemoryDB::atom_exists(const string& handle) { return atoms_trie_->lookup(handle) != NULL; } bool InMemoryDB::node_exists(const string& handle) { - lock_guard lock(trie_mutex_); auto trie_value = atoms_trie_->lookup(handle); - if (trie_value == nullptr) { + if (trie_value == NULL) { return false; } auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value == nullptr) { + if (atom_trie_value == NULL) { return false; } Atom* atom = atom_trie_value->get_atom(); @@ -283,13 +258,12 @@ bool InMemoryDB::node_exists(const string& handle) { } bool InMemoryDB::link_exists(const string& handle) { - lock_guard lock(trie_mutex_); auto trie_value = atoms_trie_->lookup(handle); - if (trie_value == nullptr) { + if (trie_value == NULL) { return false; } auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value == nullptr) { + if (atom_trie_value == NULL) { return false; } Atom* atom = atom_trie_value->get_atom(); @@ -298,9 +272,8 @@ bool InMemoryDB::link_exists(const string& handle) { set InMemoryDB::atoms_exist(const vector& handles) { set existing; - lock_guard lock(trie_mutex_); for (const auto& handle : handles) { - if (atoms_trie_->lookup(handle) != nullptr) { + if (atoms_trie_->lookup(handle) != NULL) { existing.insert(handle); } } @@ -309,17 +282,9 @@ set InMemoryDB::atoms_exist(const vector& handles) { set InMemoryDB::nodes_exist(const vector& handles) { set existing; - lock_guard lock(trie_mutex_); for (const auto& handle : handles) { - auto trie_value = atoms_trie_->lookup(handle); - if (trie_value != nullptr) { - auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value != nullptr) { - Atom* atom = atom_trie_value->get_atom(); - if (Atom::is_node(*atom)) { - existing.insert(handle); - } - } + if (this->node_exists(handle)) { + existing.insert(handle); } } return existing; @@ -327,17 +292,9 @@ set InMemoryDB::nodes_exist(const vector& handles) { set InMemoryDB::links_exist(const vector& handles) { set existing; - lock_guard lock(trie_mutex_); for (const auto& handle : handles) { - auto trie_value = atoms_trie_->lookup(handle); - if (trie_value != nullptr) { - auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value != nullptr) { - Atom* atom = atom_trie_value->get_atom(); - if (Atom::is_link(*atom)) { - existing.insert(handle); - } - } + if (this->link_exists(handle)) { + existing.insert(handle); } } return existing; @@ -354,16 +311,14 @@ string InMemoryDB::add_atom(const atoms::Atom* atom, bool throw_if_exists) { string InMemoryDB::add_node(const atoms::Node* node, bool throw_if_exists) { string handle = node->handle(); - if (throw_if_exists && node_exists(handle)) { + if (throw_if_exists && this->node_exists(handle)) { Utils::error("Node already exists: " + handle); return ""; } - lock_guard lock(trie_mutex_); - // Check if already exists auto existing = atoms_trie_->lookup(handle); - if (existing != nullptr && !throw_if_exists) { + if (existing != NULL && !throw_if_exists) { return handle; // Already exists, return handle } @@ -377,7 +332,7 @@ string InMemoryDB::add_node(const atoms::Node* node, bool throw_if_exists) { string InMemoryDB::add_link(const atoms::Link* link, bool throw_if_exists) { vector links = {const_cast(link)}; - auto handles = add_links(links, throw_if_exists, false); + auto handles = this->add_links(links, throw_if_exists, false); return handles.empty() ? "" : handles[0]; } @@ -398,8 +353,8 @@ vector InMemoryDB::add_atoms(const vector& atoms, links.push_back(dynamic_cast(atom)); } } - auto node_handles = add_nodes(nodes, throw_if_exists, is_transactional); - auto link_handles = add_links(links, throw_if_exists, is_transactional); + auto node_handles = this->add_nodes(nodes, throw_if_exists, is_transactional); + auto link_handles = this->add_links(links, throw_if_exists, is_transactional); node_handles.insert(node_handles.end(), link_handles.begin(), link_handles.end()); return node_handles; @@ -427,17 +382,8 @@ vector InMemoryDB::add_nodes(const vector& nodes, } } - lock_guard lock(trie_mutex_); for (const auto& node : nodes) { - string handle = node->handle(); - auto existing = atoms_trie_->lookup(handle); - if (existing == nullptr || !throw_if_exists) { - if (existing == nullptr) { - Node* cloned_node = new Node(*node); - auto atom_trie_value = new AtomTrieValue(cloned_node); - atoms_trie_->insert(handle, atom_trie_value); - } - } + handles.push_back(this->add_node(node, throw_if_exists)); } return handles; @@ -465,17 +411,14 @@ vector InMemoryDB::add_links(const vector& links, } vector handles; - lock_guard trie_lock(trie_mutex_); - lock_guard index_lock(index_mutex_); - for (const auto& link : links) { string link_handle = link->handle(); handles.push_back(link_handle); // Check if already exists auto existing = atoms_trie_->lookup(link_handle); - if (existing == nullptr || !throw_if_exists) { - if (existing == nullptr) { + if (existing == NULL || !throw_if_exists) { + if (existing == NULL) { // Clone the link to store in trie Link* cloned_link = new Link(*link); auto atom_trie_value = new AtomTrieValue(cloned_link); @@ -484,13 +427,13 @@ vector InMemoryDB::add_links(const vector& links, // Update incoming sets for each target for (const auto& target_handle : link->targets) { - add_incoming_set(target_handle, link_handle); + this->add_incoming_set(target_handle, link_handle); } // Index pattern - auto pattern_handles = match_pattern_index_schema(link); + auto pattern_handles = this->match_pattern_index_schema(link); for (const auto& pattern_handle : pattern_handles) { - add_pattern(pattern_handle, link_handle); + this->add_pattern(pattern_handle, link_handle); } } } @@ -499,122 +442,97 @@ vector InMemoryDB::add_links(const vector& links, } bool InMemoryDB::delete_atom(const string& handle, bool delete_link_targets) { - if (delete_node(handle, delete_link_targets)) { + if (this->delete_node(handle, delete_link_targets)) { return true; } - return delete_link(handle, delete_link_targets); + return this->delete_link(handle, delete_link_targets); } bool InMemoryDB::delete_node(const string& handle, bool delete_link_targets) { + auto trie_value = this->atoms_trie_->lookup(handle); + if (trie_value == NULL) { + return false; + } + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value == NULL) { + return false; + } + Atom* atom = atom_trie_value->get_atom(); + if (!Atom::is_node(*atom)) { + return false; + } + vector link_handles_to_delete; - { - lock_guard trie_lock(trie_mutex_); - auto trie_value = atoms_trie_->lookup(handle); - if (trie_value == nullptr) { - return false; - } - auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value == nullptr) { - return false; - } - Atom* atom = atom_trie_value->get_atom(); - if (!Atom::is_node(*atom)) { + // Check incoming set - if this node is referenced by links, handle accordingly + auto incoming_set_trie_value = + dynamic_cast(this->incoming_sets_trie_->lookup(handle)); + if (incoming_set_trie_value != NULL && !incoming_set_trie_value->empty()) { + if (delete_link_targets) { + // Collect all links that reference this node (copy the handles while holding the lock) + link_handles_to_delete = vector(incoming_set_trie_value->get_handles().begin(), + incoming_set_trie_value->get_handles().end()); + } else { + // Cannot delete node that is referenced by links return false; } - - // Check incoming set - if this node is referenced by links, handle accordingly - lock_guard index_lock(index_mutex_); - auto incoming_set_trie_value = - dynamic_cast(this->incoming_sets_trie_->lookup(handle)); - if (incoming_set_trie_value != nullptr && !incoming_set_trie_value->empty()) { - if (delete_link_targets) { - // Collect all links that reference this node (copy the handles while holding the lock) - link_handles_to_delete = vector(incoming_set_trie_value->get_handles().begin(), - incoming_set_trie_value->get_handles().end()); - } else { - // Cannot delete node that is referenced by links - return false; - } - } } - // Release locks before calling delete_link to avoid deadlock // Delete all links that reference this node for (const auto& link_handle : link_handles_to_delete) { - delete_link(link_handle, delete_link_targets); + this->delete_link(link_handle, delete_link_targets); } - // Now delete the node itself - { - lock_guard trie_lock(trie_mutex_); - lock_guard index_lock(index_mutex_); - - // Verify the node still exists (it might have been deleted by delete_link if it was a target) - auto trie_value = atoms_trie_->lookup(handle); - if (trie_value == nullptr) { - return true; // Already deleted - } - - // Clear the value in the trie (set to nullptr) - this->atoms_trie_->remove(handle); - this->incoming_sets_trie_->remove(handle); - } + // Clear the value in the trie (set to NULL) + this->atoms_trie_->remove(handle); + this->incoming_sets_trie_->remove(handle); return true; } bool InMemoryDB::delete_link(const string& handle, bool delete_link_targets) { - Link* link = nullptr; - vector targets; - vector pattern_handles; - vector targets_to_delete; - - { - lock_guard trie_lock(trie_mutex_); - auto trie_value = atoms_trie_->lookup(handle); - if (trie_value == nullptr) { - return false; - } - auto atom_trie_value = dynamic_cast(trie_value); - if (atom_trie_value == nullptr) { - return false; - } - Atom* atom = atom_trie_value->get_atom(); - if (!Atom::is_link(*atom)) { - return false; - } - - link = dynamic_cast(atom); - targets = link->targets; + auto trie_value = atoms_trie_->lookup(handle); + if (trie_value == NULL) { + return false; + } + auto atom_trie_value = dynamic_cast(trie_value); + if (atom_trie_value == NULL) { + return false; + } + Atom* atom = atom_trie_value->get_atom(); + if (!Atom::is_link(*atom)) { + return false; + } - lock_guard index_lock(index_mutex_); + auto link = dynamic_cast(atom); + auto targets = link->targets; - // Update incoming sets for each target - for (const auto& target_handle : targets) { - this->delete_incoming_set(target_handle, handle); + vector targets_to_delete; - if (delete_link_targets) { - // Check if target has other incoming links - auto incoming_set_trie_value = - dynamic_cast(this->incoming_sets_trie_->lookup(target_handle)); - if (incoming_set_trie_value == nullptr || incoming_set_trie_value->empty()) { - // No other references, mark for deletion - targets_to_delete.push_back(target_handle); - } + // Update incoming sets for each target + for (const auto& target_handle : targets) { + this->delete_incoming_set(target_handle, handle); + + if (delete_link_targets) { + // Check if target has other incoming links + auto incoming_set_trie_value = + dynamic_cast(this->incoming_sets_trie_->lookup(target_handle)); + if (incoming_set_trie_value == NULL || incoming_set_trie_value->empty()) { + // No other references, mark for deletion + targets_to_delete.push_back(target_handle); } } + } - // Remove from pattern index - pattern_handles = this->match_pattern_index_schema(link); - for (const auto& pattern_handle : pattern_handles) { - this->delete_pattern(pattern_handle, handle); - } - - // Clear the value in the trie (set to nullptr) - this->atoms_trie_->remove(handle); + // Remove from pattern index + vector pattern_handles = this->match_pattern_index_schema(link); + for (const auto& pattern_handle : pattern_handles) { + this->delete_pattern(pattern_handle, handle); } + // Clear the value in the trie (set to NULL) + this->atoms_trie_->remove(handle); + // Release locks before calling delete_atom to avoid deadlock // Delete targets that have no other incoming links for (const auto& target_handle : targets_to_delete) { @@ -655,21 +573,18 @@ uint InMemoryDB::delete_links(const vector& handles, bool delete_link_ta } void InMemoryDB::re_index_patterns(bool flush_patterns) { - lock_guard trie_lock(this->trie_mutex_); - lock_guard index_lock(this->index_mutex_); - if (flush_patterns) { // Clear all pattern index entries by deleting and recreating the trie this->pattern_index_trie_->traverse( false, [](HandleTrie::TrieNode* node, void* data) -> bool { - if (node->value != nullptr) { + if (node->value != NULL) { delete node->value; - node->value = nullptr; + node->value = NULL; } return false; // Continue traversal }, - nullptr); + NULL); delete this->pattern_index_trie_; this->pattern_index_trie_ = new HandleTrie(HANDLE_HASH_SIZE - 1); } @@ -683,10 +598,10 @@ void InMemoryDB::re_index_patterns(bool flush_patterns) { // Helper methods void InMemoryDB::add_pattern(const string& pattern_handle, const string& atom_handle) { auto pattern_trie_value = - dynamic_cast(this->pattern_index_trie_->lookup(pattern_handle)); - if (pattern_trie_value == nullptr) { - // Create new PatternTrieValue - pattern_trie_value = new PatternTrieValue(); + dynamic_cast(this->pattern_index_trie_->lookup(pattern_handle)); + if (pattern_trie_value == NULL) { + // Create new HandleSetTrieValue + pattern_trie_value = new HandleSetTrieValue(); pattern_trie_value->add_handle(atom_handle); this->pattern_index_trie_->insert(pattern_handle, pattern_trie_value); } else { @@ -697,8 +612,8 @@ void InMemoryDB::add_pattern(const string& pattern_handle, const string& atom_ha void InMemoryDB::delete_pattern(const string& pattern_handle, const string& atom_handle) { auto pattern_trie_value = - dynamic_cast(this->pattern_index_trie_->lookup(pattern_handle)); - if (pattern_trie_value != nullptr) { + dynamic_cast(this->pattern_index_trie_->lookup(pattern_handle)); + if (pattern_trie_value != NULL) { pattern_trie_value->remove_handle(atom_handle); if (pattern_trie_value->empty()) { // Remove the pattern entry from the trie @@ -709,10 +624,10 @@ void InMemoryDB::delete_pattern(const string& pattern_handle, const string& atom void InMemoryDB::add_incoming_set(const string& target_handle, const string& link_handle) { auto incoming_set_trie_value = - dynamic_cast(this->incoming_sets_trie_->lookup(target_handle)); - if (incoming_set_trie_value == nullptr) { - // Create new PatternTrieValue - incoming_set_trie_value = new PatternTrieValue(); + dynamic_cast(this->incoming_sets_trie_->lookup(target_handle)); + if (incoming_set_trie_value == NULL) { + // Create new HandleSetTrieValue + incoming_set_trie_value = new HandleSetTrieValue(); incoming_set_trie_value->add_handle(link_handle); this->incoming_sets_trie_->insert(target_handle, incoming_set_trie_value); } else { @@ -723,8 +638,8 @@ void InMemoryDB::add_incoming_set(const string& target_handle, const string& lin void InMemoryDB::delete_incoming_set(const string& target_handle, const string& link_handle) { auto incoming_set_trie_value = - dynamic_cast(this->incoming_sets_trie_->lookup(target_handle)); - if (incoming_set_trie_value != nullptr) { + dynamic_cast(this->incoming_sets_trie_->lookup(target_handle)); + if (incoming_set_trie_value != NULL) { incoming_set_trie_value->remove_handle(link_handle); if (incoming_set_trie_value->empty()) { // Remove the incoming set entry from the trie @@ -749,29 +664,32 @@ void InMemoryDB::add_pattern_index_schema(const string& tokens, vector InMemoryDB::match_pattern_index_schema(const Link* link) { vector pattern_handles; - auto local_map = this->pattern_index_schema_map; - if (local_map.size() == 0) { + const auto& map_ref = this->pattern_index_schema_map; + + const map, vector>>>* iter_map = &map_ref; + + // When map is empty, use a default map + map, vector>>> default_map; + if (map_ref.empty()) { vector tokens = {"LINK_TEMPLATE", "Expression", to_string(link->arity())}; for (unsigned int i = 0; i < link->arity(); i++) { tokens.push_back("VARIABLE"); tokens.push_back("v" + to_string(i + 1)); } - - auto link_schema = LinkSchema(tokens); auto index_entries = this->index_entries_combinations(link->arity()); - - local_map[1] = make_tuple(move(tokens), move(index_entries)); + default_map = {{1, make_tuple(move(tokens), move(index_entries))}}; + iter_map = &default_map; } vector sorted_keys; - for (const auto& pair : local_map) { + for (const auto& pair : *iter_map) { sorted_keys.push_back(pair.first); } std::sort(sorted_keys.begin(), sorted_keys.end(), std::greater()); for (const auto& priority : sorted_keys) { - auto value = local_map[priority]; + const auto& value = (*iter_map).at(priority); auto link_schema = LinkSchema(get<0>(value)); auto index_entries = get<1>(value); Assignment assignment; diff --git a/src/atomdb/inmemorydb/InMemoryDB.h b/src/atomdb/inmemorydb/InMemoryDB.h index 51af7f11d..01a0b6eb9 100644 --- a/src/atomdb/inmemorydb/InMemoryDB.h +++ b/src/atomdb/inmemorydb/InMemoryDB.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -17,8 +16,6 @@ using namespace atoms; namespace atomdb { -#define INMEMORYDB_MAX_TRIE_SIZE 1000000000 - class InMemoryDB : public AtomDB { public: InMemoryDB(const string& context = ""); @@ -85,8 +82,6 @@ class InMemoryDB : public AtomDB { HandleTrie* atoms_trie_; // Stores handle -> Atom* HandleTrie* pattern_index_trie_; // Stores pattern_handle -> set of atom handles HandleTrie* incoming_sets_trie_; // Stores target_handle -> set of link handles that reference it - mutex trie_mutex_; - mutex index_mutex_; map, vector>>> pattern_index_schema_map; int pattern_index_schema_next_priority{1};