diff --git a/poetry.lock b/poetry.lock index 6fd1d7b42..28d389e9a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -63,7 +63,7 @@ version = "2.6.1" description = "Happy Eyeballs for asyncio" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"}, {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"}, @@ -75,7 +75,7 @@ version = "3.13.2" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "aiohttp-3.13.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2372b15a5f62ed37789a6b383ff7344fc5b9f243999b0cd9b629d8bc5f5b4155"}, {file = "aiohttp-3.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e7f8659a48995edee7229522984bd1009c1213929c769c2daa80b40fe49a180c"}, @@ -233,7 +233,7 @@ version = "1.4.0" description = "aiosignal: a list of registered asynchronous callbacks" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"}, {file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"}, @@ -249,7 +249,7 @@ version = "6.0.0" description = "Vega-Altair: A declarative statistical visualization library for Python." optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "altair-6.0.0-py3-none-any.whl", hash = "sha256:09ae95b53d5fe5b16987dccc785a7af8588f2dca50de1e7a156efa8a461515f8"}, {file = "altair-6.0.0.tar.gz", hash = "sha256:614bf5ecbe2337347b590afb111929aa9c16c9527c4887d96c9bc7f6640756b4"}, @@ -274,7 +274,7 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -297,7 +297,7 @@ version = "4.12.0" description = "High-level concurrency and networking framework on top of asyncio or Trio" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb"}, {file = "anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0"}, @@ -358,7 +358,7 @@ version = "4.0.3" description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.7" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] markers = "python_version == \"3.10\"" files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, @@ -371,7 +371,7 @@ version = "25.4.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"}, {file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"}, @@ -475,7 +475,7 @@ version = "2.2.1" description = "Function decoration for backoff and retry" optional = false python-versions = ">=3.7,<4.0" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, @@ -530,7 +530,7 @@ version = "1.9.0" description = "Fast, simple object-to-object and broadcast signaling" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc"}, {file = "blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf"}, @@ -542,7 +542,7 @@ version = "1.42.8" description = "The AWS SDK for Python" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "boto3-1.42.8-py3-none-any.whl", hash = "sha256:747acc83488fc80b0e7d1c4ff0c533039ff3ede21bdbd4e89544e25b010b070c"}, {file = "boto3-1.42.8.tar.gz", hash = "sha256:e967706af5887339407481562c389c612d5eae641eb854ddd59026d049df740e"}, @@ -562,7 +562,7 @@ version = "1.42.8" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "botocore-1.42.8-py3-none-any.whl", hash = "sha256:4cb89c74dd9083d16e45868749b999265a91309b2499907c84adeffa0a8df89b"}, {file = "botocore-1.42.8.tar.gz", hash = "sha256:4921aa454f82fed0880214eab21126c98a35fe31ede952693356f9c85ce3574b"}, @@ -582,7 +582,7 @@ version = "6.2.2" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "cachetools-6.2.2-py3-none-any.whl", hash = "sha256:6c09c98183bf58560c97b2abfcedcbaf6a896a490f534b031b661d3723b45ace"}, {file = "cachetools-6.2.2.tar.gz", hash = "sha256:8e6d266b25e539df852251cfd6f990b4bc3a141db73b939058d809ebd2590fc6"}, @@ -594,7 +594,7 @@ version = "2025.11.12" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b"}, {file = "certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316"}, @@ -715,7 +715,7 @@ version = "3.4.4" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d"}, {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8"}, @@ -838,7 +838,7 @@ version = "8.2.1" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.10" -groups = ["main", "docs", "nomad", "perception", "simbench"] +groups = ["main", "docs", "nomad", "perception", "semap", "simbench"] files = [ {file = "click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b"}, {file = "click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202"}, @@ -894,12 +894,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", nomad = "sys_platform == \"win32\" or platform_system == \"Windows\"", perception = "platform_system == \"Windows\"", s2s = "sys_platform == \"win32\" or platform_system == \"Windows\"", simbench = "platform_system == \"Windows\""} +markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", nomad = "sys_platform == \"win32\" or platform_system == \"Windows\"", perception = "platform_system == \"Windows\"", s2s = "sys_platform == \"win32\" or platform_system == \"Windows\"", semap = "platform_system == \"Windows\"", simbench = "platform_system == \"Windows\""} [[package]] name = "coloredlogs" @@ -907,7 +907,7 @@ version = "15.0.1" description = "Colored terminal output for Python's logging module" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -groups = ["main", "perception", "s2s", "simbench"] +groups = ["main", "perception", "s2s", "semap", "simbench"] files = [ {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, @@ -1397,7 +1397,7 @@ version = "0.6.7" description = "Easily serialize dataclasses to and from JSON." optional = false python-versions = "<4.0,>=3.7" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"}, {file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"}, @@ -1521,7 +1521,7 @@ version = "1.3.1" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "deprecated-1.3.1-py2.py3-none-any.whl", hash = "sha256:597bfef186b6f60181535a29fbe44865ce137a5079f295b479886c82729d5f3f"}, {file = "deprecated-1.3.1.tar.gz", hash = "sha256:b1b50e0ff0c1fddaa5708a2c6b0a6588bb09b892825ab2b214ac9ea9d92a5223"}, @@ -1656,7 +1656,7 @@ version = "1.9.0" description = "Distro - an OS platform information API" optional = false python-versions = ">=3.6" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, @@ -1765,7 +1765,7 @@ version = "1.3.1" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598"}, @@ -1961,7 +1961,7 @@ version = "1.8.0" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "frozenlist-1.8.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b37f6d31b3dcea7deb5e9696e529a6aa4a898adc33db82da12e4c60a7c4d2011"}, {file = "frozenlist-1.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ef2b7b394f208233e471abc541cc6991f907ffd47dc72584acee3147899d6565"}, @@ -2207,7 +2207,7 @@ version = "4.0.12" description = "Git Object Database" optional = false python-versions = ">=3.7" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf"}, {file = "gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571"}, @@ -2222,7 +2222,7 @@ version = "3.1.45" description = "GitPython is a Python library used to interact with Git repositories" optional = false python-versions = ">=3.7" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "gitpython-3.1.45-py3-none-any.whl", hash = "sha256:8908cb2e02fb3b93b7eb0f2827125cb699869470432cc885f019b8fd0fccff77"}, {file = "gitpython-3.1.45.tar.gz", hash = "sha256:85b0ee964ceddf211c41b9f27a49086010a190fd8132a24e21f362a4b36a791c"}, @@ -2359,7 +2359,7 @@ version = "3.3.0" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.10" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\"" files = [ {file = "greenlet-3.3.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:6f8496d434d5cb2dce025773ba5597f71f5410ae499d5dd9533e0653258cdb3d"}, @@ -2557,7 +2557,7 @@ version = "0.16.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false python-versions = ">=3.8" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"}, {file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"}, @@ -2658,7 +2658,7 @@ version = "1.0.9" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"}, {file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"}, @@ -2680,7 +2680,7 @@ version = "0.28.1" description = "The next generation HTTP client." optional = false python-versions = ">=3.8" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, @@ -2705,7 +2705,7 @@ version = "0.4.3" description = "Consume Server-Sent Event (SSE) messages with HTTPX." optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc"}, {file = "httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d"}, @@ -2756,7 +2756,7 @@ version = "10.0" description = "Human friendly output for text interfaces using Python" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -groups = ["main", "perception", "s2s", "simbench"] +groups = ["main", "perception", "s2s", "semap", "simbench"] files = [ {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, @@ -2803,7 +2803,7 @@ version = "3.11" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.8" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"}, {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"}, @@ -3199,7 +3199,7 @@ version = "3.1.6" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, @@ -3217,7 +3217,7 @@ version = "0.12.0" description = "Fast iterable JSON parser." optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "jiter-0.12.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:e7acbaba9703d5de82a2c98ae6a0f59ab9770ab5af5fa35e43a303aee962cf65"}, {file = "jiter-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:364f1a7294c91281260364222f535bc427f56d4de1d8ffd718162d21fbbd602e"}, @@ -3329,7 +3329,7 @@ version = "1.0.1" description = "JSON Matching Expressions" optional = false python-versions = ">=3.7" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, @@ -3353,7 +3353,7 @@ version = "1.33" description = "Apply JSON-Patches (RFC 6902)" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, @@ -3368,7 +3368,7 @@ version = "3.0.0" description = "Identify specific nodes in a JSON document (RFC 6901)" optional = false python-versions = ">=3.7" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"}, {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"}, @@ -3380,7 +3380,7 @@ version = "4.25.1" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"}, {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"}, @@ -3402,7 +3402,7 @@ version = "2025.9.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe"}, {file = "jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d"}, @@ -3594,7 +3594,7 @@ version = "0.3.27" description = "Building applications with LLMs through composability" optional = false python-versions = "<4.0,>=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langchain-0.3.27-py3-none-any.whl", hash = "sha256:7b20c4f338826acb148d885b20a73a16e410ede9ee4f19bb02011852d5f98798"}, {file = "langchain-0.3.27.tar.gz", hash = "sha256:aa6f1e6274ff055d0fd36254176770f356ed0a8994297d1df47df341953cec62"}, @@ -3635,7 +3635,7 @@ version = "0.2.35" description = "An integration package connecting AWS and LangChain" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langchain_aws-0.2.35-py3-none-any.whl", hash = "sha256:8ddb10f3c29f6d52bcbaa4d7f4f56462acf01f608adc7c70f41e5a476899a6bc"}, {file = "langchain_aws-0.2.35.tar.gz", hash = "sha256:45793a34fe45d365f4292cc768db74669ca24601d2c5da1ac6f44403750d70af"}, @@ -3659,7 +3659,7 @@ version = "0.3.31" description = "Community contributed LangChain integrations." optional = false python-versions = "<4.0.0,>=3.9.0" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langchain_community-0.3.31-py3-none-any.whl", hash = "sha256:1c727e3ebbacd4d891b07bd440647668001cea3e39cbe732499ad655ec5cb569"}, {file = "langchain_community-0.3.31.tar.gz", hash = "sha256:250e4c1041539130f6d6ac6f9386cb018354eafccd917b01a4cff1950b80fd81"}, @@ -3685,7 +3685,7 @@ version = "0.3.80" description = "Building applications with LLMs through composability" optional = false python-versions = "<4.0.0,>=3.9.0" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langchain_core-0.3.80-py3-none-any.whl", hash = "sha256:2141e3838d100d17dce2359f561ec0df52c526bae0de6d4f469f8026c5747456"}, {file = "langchain_core-0.3.80.tar.gz", hash = "sha256:29636b82513ab49e834764d023c4d18554d3d719a185d37b019d0a8ae948c6bb"}, @@ -3706,7 +3706,7 @@ version = "0.3.10" description = "An integration package connecting Ollama and LangChain" optional = false python-versions = "<4.0.0,>=3.9.0" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langchain_ollama-0.3.10-py3-none-any.whl", hash = "sha256:7550792872e8f86d362568e9ceb0f8085428bc59946c7b44e726358ba4b280f9"}, {file = "langchain_ollama-0.3.10.tar.gz", hash = "sha256:5d942d331c44351bae5c5c5965603ceb20b0ee4d70082290f4b15bc638559756"}, @@ -3722,7 +3722,7 @@ version = "0.3.35" description = "An integration package connecting OpenAI and LangChain" optional = false python-versions = "<4.0.0,>=3.9.0" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langchain_openai-0.3.35-py3-none-any.whl", hash = "sha256:76d5707e6e81fd461d33964ad618bd326cb661a1975cef7c1cb0703576bdada5"}, {file = "langchain_openai-0.3.35.tar.gz", hash = "sha256:fa985fd041c3809da256a040c98e8a43e91c6d165b96dcfeb770d8bd457bf76f"}, @@ -3739,7 +3739,7 @@ version = "0.3.11" description = "LangChain text splitting utilities" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langchain_text_splitters-0.3.11-py3-none-any.whl", hash = "sha256:cf079131166a487f1372c8ab5d0bfaa6c0a4291733d9c43a34a16ac9bcd6a393"}, {file = "langchain_text_splitters-0.3.11.tar.gz", hash = "sha256:7a50a04ada9a133bbabb80731df7f6ddac51bc9f1b9cab7fa09304d71d38a6cc"}, @@ -3754,7 +3754,7 @@ version = "2.60.10" description = "A client library for accessing langfuse" optional = false python-versions = "<4.0,>=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langfuse-2.60.10-py3-none-any.whl", hash = "sha256:815c6369194aa5b2a24f88eb9952f7c3fc863272c41e90642a71f3bc76f4a11f"}, {file = "langfuse-2.60.10.tar.gz", hash = "sha256:a26d0d927a28ee01b2d12bb5b862590b643cc4e60a28de6e2b0c2cfff5dbfc6a"}, @@ -3781,7 +3781,7 @@ version = "1.0.1" description = "Building stateful, multi-actor applications with LLMs" optional = false python-versions = ">=3.10" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langgraph-1.0.1-py3-none-any.whl", hash = "sha256:892f04f64f4889abc80140265cc6bd57823dd8e327a5eef4968875f2cd9013bd"}, {file = "langgraph-1.0.1.tar.gz", hash = "sha256:4985b32ceabb046a802621660836355dfcf2402c5876675dc353db684aa8f563"}, @@ -3801,7 +3801,7 @@ version = "3.0.1" description = "Library with base interfaces for LangGraph checkpoint savers." optional = false python-versions = ">=3.10" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langgraph_checkpoint-3.0.1-py3-none-any.whl", hash = "sha256:9b04a8d0edc0474ce4eaf30c5d731cee38f11ddff50a6177eead95b5c4e4220b"}, {file = "langgraph_checkpoint-3.0.1.tar.gz", hash = "sha256:59222f875f85186a22c494aedc65c4e985a3df27e696e5016ba0b98a5ed2cee0"}, @@ -3817,7 +3817,7 @@ version = "1.0.1" description = "Library with high-level APIs for creating and executing LangGraph agents and tools." optional = false python-versions = ">=3.10" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langgraph_prebuilt-1.0.1-py3-none-any.whl", hash = "sha256:8c02e023538f7ef6ad5ed76219ba1ab4f6de0e31b749e4d278f57a8a95eec9f7"}, {file = "langgraph_prebuilt-1.0.1.tar.gz", hash = "sha256:ecbfb9024d9d7ed9652dde24eef894650aaab96bf79228e862c503e2a060b469"}, @@ -3833,7 +3833,7 @@ version = "0.2.15" description = "SDK for interacting with LangGraph API" optional = false python-versions = ">=3.10" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langgraph_sdk-0.2.15-py3-none-any.whl", hash = "sha256:746566a5d89aa47160eccc17d71682a78771c754126f6c235a68353d61ed7462"}, {file = "langgraph_sdk-0.2.15.tar.gz", hash = "sha256:8faaafe2c1193b89f782dd66c591060cd67862aa6aaf283749b7846f331d5334"}, @@ -3849,7 +3849,7 @@ version = "0.4.59" description = "Client library to connect to the LangSmith Observability and Evaluation Platform." optional = false python-versions = ">=3.10" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "langsmith-0.4.59-py3-none-any.whl", hash = "sha256:97c26399286441a7b7b06b912e2801420fbbf3a049787e609d49dc975ab10bc5"}, {file = "langsmith-0.4.59.tar.gz", hash = "sha256:6b143214c2303dafb29ab12dcd05ac50bdfc60dac01c6e0450e50cee1d2415e0"}, @@ -3891,7 +3891,7 @@ version = "1.3.1" description = "a modern parsing library" optional = false python-versions = ">=3.8" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12"}, {file = "lark-1.3.1.tar.gz", hash = "sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905"}, @@ -4008,7 +4008,7 @@ version = "3.0.3" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "markupsafe-3.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559"}, {file = "markupsafe-3.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419"}, @@ -4107,7 +4107,7 @@ version = "3.26.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c"}, {file = "marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6"}, @@ -4542,7 +4542,7 @@ version = "6.7.0" description = "multidict implementation" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "multidict-6.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9f474ad5acda359c8758c8accc22032c6abe6dc87a8be2440d097785e27a9349"}, {file = "multidict-6.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4b7a9db5a870f780220e931d0002bbfd88fb53aceb6293251e2c839415c1b20e"}, @@ -4733,7 +4733,7 @@ version = "1.1.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false python-versions = ">=3.8" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, @@ -4745,7 +4745,7 @@ version = "2.13.0" description = "Extremely lightweight compatibility layer between dataframe libraries" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "narwhals-2.13.0-py3-none-any.whl", hash = "sha256:9b795523c179ca78204e3be53726da374168f906e38de2ff174c2363baaaf481"}, {file = "narwhals-2.13.0.tar.gz", hash = "sha256:ee94c97f4cf7cfeebbeca8d274784df8b3d7fd3f955ce418af998d405576fdd9"}, @@ -4959,7 +4959,7 @@ version = "1.26.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, @@ -5172,7 +5172,7 @@ version = "0.6.1" description = "The official Python client for Ollama." optional = false python-versions = ">=3.8" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "ollama-0.6.1-py3-none-any.whl", hash = "sha256:fc4c984b345735c5486faeee67d8a265214a31cbb828167782dc642ce0a2bf8c"}, {file = "ollama-0.6.1.tar.gz", hash = "sha256:478c67546836430034b415ed64fa890fd3d1ff91781a9d548b3325274e69d7c6"}, @@ -5276,7 +5276,7 @@ version = "2.11.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "openai-2.11.0-py3-none-any.whl", hash = "sha256:21189da44d2e3d027b08c7a920ba4454b8b7d6d30ae7e64d9de11dbe946d4faa"}, {file = "openai-2.11.0.tar.gz", hash = "sha256:b3da01d92eda31524930b6ec9d7167c535e843918d7ba8a76b1c38f1104f321e"}, @@ -5356,7 +5356,7 @@ version = "4.11.0.86" description = "Wrapper package for OpenCV python bindings." optional = false python-versions = ">=3.6" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "opencv-python-4.11.0.86.tar.gz", hash = "sha256:03d60ccae62304860d232272e4a4fda93c39d595780cb40b161b310244b736a4"}, {file = "opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:432f67c223f1dc2824f5e73cdfcd9db0efc8710647d4e813012195dc9122a52a"}, @@ -5514,7 +5514,7 @@ version = "3.11.5" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "orjson-3.11.5-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:df9eadb2a6386d5ea2bfd81309c505e125cfc9ba2b1b99a97e60985b0b3665d1"}, {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ccc70da619744467d8f1f49a8cadae5ec7bbe054e5232d95f92ed8737f8c5870"}, @@ -5611,7 +5611,7 @@ version = "1.12.0" description = "" optional = false python-versions = ">=3.10" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "ormsgpack-1.12.0-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e08904c232358b94a682ccfbb680bc47d3fd5c424bb7dccb65974dd20c95e8e1"}, {file = "ormsgpack-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9ed7a4b0037d69c8ba7e670e03ee65ae8d5c5114a409e73c5770d7fb5e4b895"}, @@ -5669,7 +5669,7 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -5697,7 +5697,7 @@ version = "2.3.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c"}, {file = "pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a"}, @@ -5879,7 +5879,7 @@ version = "11.3.0" description = "Python Imaging Library (Fork)" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "pillow-11.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b9c17fd4ace828b3003dfd1e30bff24863e0eb59b535e8f80194d9cc7ecf860"}, {file = "pillow-11.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:65dc69160114cdd0ca0f35cb434633c75e8e7fad4cf855177a05bf38678f73ad"}, @@ -6170,7 +6170,7 @@ version = "0.4.1" description = "Accelerated property cache" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "propcache-0.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c2d1fa3201efaf55d730400d945b5b3ab6e672e100ba0f9a409d950ab25d7db"}, {file = "propcache-0.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1eb2994229cc8ce7fe9b3db88f5465f5fd8651672840b2e426b88cdb1a30aac8"}, @@ -6320,7 +6320,7 @@ version = "6.33.2" description = "" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"}, {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"}, @@ -6422,7 +6422,7 @@ version = "22.0.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.10" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88"}, {file = "pyarrow-22.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:44d2d26cda26d18f7af7db71453b7b783788322d756e81730acb98f24eb90ace"}, @@ -6574,7 +6574,7 @@ version = "2.12.5" description = "Data validation using Python type hints" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d"}, {file = "pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49"}, @@ -6596,7 +6596,7 @@ version = "2.41.5" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146"}, {file = "pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2"}, @@ -6730,7 +6730,7 @@ version = "2.12.0" description = "Settings management using Pydantic" optional = false python-versions = ">=3.10" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809"}, {file = "pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0"}, @@ -6754,7 +6754,7 @@ version = "0.9.1" description = "Widget for deck.gl maps" optional = false python-versions = ">=3.8" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "pydeck-0.9.1-py2.py3-none-any.whl", hash = "sha256:b3f75ba0d273fc917094fa61224f3f6076ca8752b93d46faf3bcfd9f9d59b038"}, {file = "pydeck-0.9.1.tar.gz", hash = "sha256:f74475ae637951d63f2ee58326757f8d4f9cd9f2a457cf42950715003e2cb605"}, @@ -6774,7 +6774,7 @@ version = "0.25.1" description = "Manipulate audio with an simple and easy high level interface" optional = false python-versions = "*" -groups = ["main", "perception", "s2s", "simbench"] +groups = ["main", "perception", "s2s", "semap", "simbench"] files = [ {file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"}, {file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"}, @@ -6952,7 +6952,7 @@ version = "3.5.4" description = "A python implementation of GNU readline." optional = false python-versions = ">=3.8" -groups = ["main", "perception", "s2s", "simbench"] +groups = ["main", "perception", "s2s", "semap", "simbench"] markers = "sys_platform == \"win32\"" files = [ {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"}, @@ -7040,7 +7040,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -7055,7 +7055,7 @@ version = "1.2.1" description = "Read key-value pairs from a .env file and set them as environment variables" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61"}, {file = "python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6"}, @@ -7093,7 +7093,7 @@ version = "2025.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, @@ -7136,7 +7136,7 @@ version = "6.0.3" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"}, {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"}, @@ -7359,7 +7359,7 @@ version = "2.5.10" description = "Core functionality for RAI framework" optional = false python-versions = "^3.10, <3.13" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [] develop = true @@ -7471,6 +7471,23 @@ whisper = ["openai-whisper (>=20231117,<20231118)"] type = "directory" url = "src/rai_s2s" +[[package]] +name = "rai-semap" +version = "0.1.0" +description = "Semantic mapping and spatial memory for RAI" +optional = false +python-versions = "^3.10, <3.13" +groups = ["semap"] +files = [] +develop = true + +[package.dependencies] +rai_core = {path = "../rai_core", develop = true} + +[package.source] +type = "directory" +url = "src/rai_semap" + [[package]] name = "rai-sim" version = "0.0.2" @@ -7615,7 +7632,7 @@ version = "0.37.0" description = "JSON Referencing + Python" optional = false python-versions = ">=3.10" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231"}, {file = "referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8"}, @@ -7632,7 +7649,7 @@ version = "2025.11.3" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "regex-2025.11.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2b441a4ae2c8049106e8b39973bfbddfb25a179dda2bdb99b0eeb60c40a6a3af"}, {file = "regex-2025.11.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2fa2eed3f76677777345d2f81ee89f5de2f5745910e805f7af7386a920fa7313"}, @@ -7757,7 +7774,7 @@ version = "2.32.5" description = "Python HTTP for Humans." optional = false python-versions = ">=3.9" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"}, {file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"}, @@ -7780,7 +7797,7 @@ version = "1.0.0" description = "A utility belt for advanced users of python-requests" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, @@ -7834,7 +7851,7 @@ version = "0.30.0" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.10" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "rpds_py-0.30.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:679ae98e00c0e8d68a7fda324e16b90fd5260945b45d3b824c892cec9eea3288"}, {file = "rpds_py-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4cc2206b76b4f576934f0ed374b10d7ca5f457858b157ca52064bdfc26b9fc00"}, @@ -7974,7 +7991,7 @@ version = "0.16.0" description = "An Amazon S3 Transfer Manager" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe"}, {file = "s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920"}, @@ -8549,7 +8566,7 @@ version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -8587,7 +8604,7 @@ version = "5.0.2" description = "A pure Python implementation of a sliding window memory map manager" optional = false python-versions = ">=3.7" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e"}, {file = "smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5"}, @@ -8599,7 +8616,7 @@ version = "1.3.1" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, @@ -8709,7 +8726,7 @@ version = "2.0.45" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "sqlalchemy-2.0.45-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ae64ebf7657395824a19bca98ab10eb9a3ecb026bf09524014f1bb81cb598d4"}, {file = "sqlalchemy-2.0.45-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f02325709d1b1a1489f23a39b318e175a171497374149eae74d612634b234c0"}, @@ -8816,7 +8833,7 @@ version = "1.52.1" description = "A faster way to build and share data apps" optional = false python-versions = ">=3.10" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "streamlit-1.52.1-py3-none-any.whl", hash = "sha256:97fee2c3421d350fd65548e45a20f506ec1b651d78f95ecacbc0c2f9f838081c"}, {file = "streamlit-1.52.1.tar.gz", hash = "sha256:b036a71866b893c97fdebaa2a2ebd21ebf2af7daea4b3abe783a57b26f55b3ca"}, @@ -8931,7 +8948,7 @@ version = "9.1.2" description = "Retry code until it succeeds" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138"}, {file = "tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb"}, @@ -9075,7 +9092,7 @@ version = "0.12.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "s2s", "simbench"] +groups = ["main", "perception", "s2s", "semap", "simbench"] files = [ {file = "tiktoken-0.12.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3de02f5a491cfd179aec916eddb70331814bd6bf764075d39e21d5862e533970"}, {file = "tiktoken-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6cfb6d9b7b54d20af21a912bfe63a2727d9cfa8fbda642fd8322c70340aad16"}, @@ -9201,7 +9218,7 @@ version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, @@ -9213,7 +9230,7 @@ version = "2.3.0" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"}, {file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"}, @@ -9265,7 +9282,7 @@ version = "1.2.0" description = "A lil' TOML writer" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90"}, {file = "tomli_w-1.2.0.tar.gz", hash = "sha256:2dd14fac5a47c27be9cd4c976af5a12d87fb1f0b4512f81d69cce3b35ae25021"}, @@ -9403,7 +9420,7 @@ version = "6.5.3" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "tornado-6.5.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2dd7d7e8d3e4635447a8afd4987951e3d4e8d1fb9ad1908c54c4002aabab0520"}, {file = "tornado-6.5.3-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:5977a396f83496657779f59a48c38096ef01edfe4f42f1c0634b791dde8165d0"}, @@ -9425,7 +9442,7 @@ version = "4.67.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, @@ -9538,7 +9555,7 @@ version = "0.4.2" description = "Functions for 3D coordinate transformations" optional = false python-versions = ">=3.6" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "transforms3d-0.4.2-py3-none-any.whl", hash = "sha256:1c70399d9e9473ecc23311fd947f727f7c69ed0b063244828c383aa1aefa5941"}, {file = "transforms3d-0.4.2.tar.gz", hash = "sha256:e8b5df30eaedbee556e81c6938e55aab5365894e47d0a17615d7db7fd2393680"}, @@ -9593,7 +9610,7 @@ version = "4.15.0" description = "Backported and Experimental Type Hints for Python 3.9+" optional = false python-versions = ">=3.9" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, @@ -9606,7 +9623,7 @@ version = "0.9.0" description = "Runtime inspection utilities for typing module." optional = false python-versions = "*" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, @@ -9622,7 +9639,7 @@ version = "0.4.2" description = "Runtime typing introspection tools" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"}, {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"}, @@ -9637,7 +9654,7 @@ version = "2025.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, @@ -9661,7 +9678,7 @@ version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" -groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"] +groups = ["main", "docs", "nomad", "perception", "s2s", "semap", "simbench"] files = [ {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, @@ -9679,7 +9696,7 @@ version = "0.12.0" description = "Drop-in replacement for Python UUID with bindings in Rust" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "uuid_utils-0.12.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:3b9b30707659292f207b98f294b0e081f6d77e1fbc760ba5b41331a39045f514"}, {file = "uuid_utils-0.12.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:add3d820c7ec14ed37317375bea30249699c5d08ff4ae4dbee9fc9bce3bfbf65"}, @@ -9823,7 +9840,7 @@ version = "6.0.0" description = "Filesystem events monitoring" optional = false python-versions = ">=3.9" -groups = ["main", "docs", "perception", "simbench"] +groups = ["main", "docs", "perception", "semap", "simbench"] files = [ {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26"}, {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112"}, @@ -9856,7 +9873,7 @@ files = [ {file = "watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f"}, {file = "watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282"}, ] -markers = {main = "platform_system != \"Darwin\"", perception = "platform_system != \"Darwin\"", simbench = "platform_system != \"Darwin\""} +markers = {main = "platform_system != \"Darwin\"", perception = "platform_system != \"Darwin\"", semap = "platform_system != \"Darwin\"", simbench = "platform_system != \"Darwin\""} [package.extras] watchmedo = ["PyYAML (>=3.10)"] @@ -9976,7 +9993,7 @@ version = "1.17.3" description = "Module for decorators, wrappers and monkey patching." optional = false python-versions = ">=3.8" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "wrapt-1.17.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04"}, {file = "wrapt-1.17.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b13af258d6a9ad602d57d889f83b9d5543acd471eee12eb51f5b01f8eb1bc2"}, @@ -10067,7 +10084,7 @@ version = "3.6.0" description = "Python binding for xxHash" optional = false python-versions = ">=3.7" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "xxhash-3.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:87ff03d7e35c61435976554477a7f4cd1704c3596a89a8300d5ce7fc83874a71"}, {file = "xxhash-3.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f572dfd3d0e2eb1a57511831cf6341242f5a9f8298a45862d085f5b93394a27d"}, @@ -10249,7 +10266,7 @@ version = "1.22.0" description = "Yet another URL library" optional = false python-versions = ">=3.9" -groups = ["main", "nomad", "perception", "simbench"] +groups = ["main", "nomad", "perception", "semap", "simbench"] files = [ {file = "yarl-1.22.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c7bd6683587567e5a49ee6e336e0612bec8329be1b7d4c8af5687dcdeb67ee1e"}, {file = "yarl-1.22.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5cdac20da754f3a723cceea5b3448e1a2074866406adeb4ef35b469d089adb8f"}, @@ -10467,7 +10484,7 @@ version = "0.25.0" description = "Zstandard bindings for Python" optional = false python-versions = ">=3.9" -groups = ["main", "perception", "simbench"] +groups = ["main", "perception", "semap", "simbench"] files = [ {file = "zstandard-0.25.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e59fdc271772f6686e01e1b3b74537259800f57e24280be3f29c8a0deb1904dd"}, {file = "zstandard-0.25.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4d441506e9b372386a5271c64125f72d5df6d2a8e8a2a45a0ae09b03cb781ef7"}, @@ -10576,4 +10593,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt [metadata] lock-version = "2.1" python-versions = "^3.10, <3.13" -content-hash = "13b04415fd768a2d73f2bc1cd01c5ca80371112693cfd8086c4466281fcc24f2" +content-hash = "8310e435a13798915c0b82bc377d1ae261974806fdf91d972de43d6e921158b7" diff --git a/pyproject.toml b/pyproject.toml index a78a2389f..0cb6cb616 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,13 @@ optional = true [tool.poetry.group.s2s.dependencies] rai_s2s = {path = "src/rai_s2s", develop = true, extras = ["all"]} +[tool.poetry.group.semap] +optional = true + +[tool.poetry.group.semap.dependencies] +rai_semap = {path = "src/rai_semap", develop = true} + + [tool.poetry.group.simbench] optional = true diff --git a/src/rai_semap/README.md b/src/rai_semap/README.md new file mode 100644 index 000000000..3fd803b6e --- /dev/null +++ b/src/rai_semap/README.md @@ -0,0 +1,139 @@ +# RAI Semantic Map Memory + +⚠️ **Experimental Module**: This module is in active development. Features may change and some functionality is still in progress. + +## Overview + +Imagine your robot exploring a new warehouse or office building. Using SLAM (Simultaneous Localization and Mapping), it builds a geometric map showing walls and open areas, but it doesn't remember what objects it saw—like that tool cart or equipment in the storage area. + +RAI Semantic Map Memory solves this by adding a memory layer. As the robot explores, it remembers not just where walls are, but also what objects it detected and where they were located. Later, you can ask questions like "where did I see a pallet?" or "what objects are near the loading dock?" and the robot can answer using its stored memory. + +This module provides persistent storage of semantic annotations—linking object identities (like "shelf", "cart", "pallet") to their 3D locations in the map. It enables spatial-semantic queries that combine "what" and "where" information. + +## Some Usage Examples + +- Store object detections with their locations as the robot explores +- Query objects by location: "what's near point (x, y)?" +- Visualize stored annotations overlaid on the SLAM map + +For detailed design and architecture, see [design.md](../design.md). + +## Quick Start + +The following examples use the ROSBot XL demo to illustrate how to use rai_semap. + +### Prerequisites + +- ROS2 environment configured +- rai_semap installed: `poetry install --with semap` +- ROSBot XL demo configured (see [ROSBot XL demo](../../docs/demos/rosbot_xl.md)) + +### Step 0: Launch the ROSBot XL demo + +Follow the instruction from [ROSBot XL demo](../../docs/demos/rosbot_xl.md). + +### Step 1: Launch the Semantic Map Node + +Start the semantic map node to begin collecting and storing detections: + +```bash +ros2 launch src/rai_semap/rai_semap/scripts/semap.launch.py +``` + +This uses default configuration files from `rai_semap/ros2/config/`. The default configs assume depth topic `/camera/depth/image_rect_raw` and camera info topic `/camera/color/camera_info`. If your topics use different names, create custom config files or override parameters. + +To use custom configs: + +```bash +ros2 launch src/rai_semap/rai_semap/scripts/semap.launch.py \ + node_config:=/path/to/node.yaml \ + detection_publisher_config:=/path/to/detection_publisher.yaml \ + perception_utils_config:=/path/to/perception_utils.yaml +``` + +### Step 2: Collect Detections + +In a separate terminal, run the navigation script to move the robot through waypoints and collect detections: + +```bash +python -m rai_semap.scripts.navigate_collect \ + --waypoints 2.0 0.0 4.0 0.0 2.0 2.0 \ + --collect-duration 10.0 \ + --use-sim-time +``` + +The script navigates to each waypoint and waits to allow detections to be collected and stored in the semantic map. + +### Step 3: Validate Stored Data + +After navigation completes, verify what was stored: + +```bash +python -m rai_semap.scripts.validate_semap \ + --database-path semantic_map.db \ + --location-id default_location +``` + +The validation script shows total annotation count, annotations grouped by object class, confidence scores, and spatial distribution. + +## Configuration + +Configuration parameters (database_path, location_id, topics, etc.) are set in YAML config files. If config files are not provided, default configs in `rai_semap/ros2/config/` are used. + +## Visualization + +View your semantic map annotations overlaid on the SLAM map in RViz2. + +### Start the Visualizer + +```bash +python -m rai_semap.ros2.visualizer \ + --ros-args \ + -p database_path:=semantic_map.db \ + -p location_id:=default_location \ + -p update_rate:=1.0 \ + -p marker_scale:=0.3 \ + -p show_text_labels:=true +``` + +### Setup RViz2 + +Launch RViz2 with the provided config file: + +```bash +rviz2 -d src/rai_semap/rai_semap/scripts/semantic_map.rviz +``` + +The config file includes: + +- Map display subscribed to `/map` topic +- MarkerArray display subscribed to `/semantic_map_markers` topic +- Fixed Frame set to `map` + +The visualizer shows color-coded markers by object class (bed=blue, chair=green, door=orange, shelf=purple, table=violet). Marker transparency scales with confidence score, and optional text labels show object class names. + +## Querying the Semantic Map + +Query stored annotations programmatically using the Python API: + +```python +from geometry_msgs.msg import Point +from rai_semap.core.backend.sqlite_backend import SQLiteBackend +from rai_semap.core.semantic_map_memory import SemanticMapMemory + +# Initialize memory +backend = SQLiteBackend("semantic_map.db") +memory = SemanticMapMemory( + backend=backend, + location_id="default_location", + map_frame_id="map", + resolution=0.05, +) + +# Query annotations near a location +center = Point(x=2.0, y=0.0, z=0.0) +annotations = memory.query_by_location(center, radius=2.0) + +for ann in annotations: + print(f"Found {ann.object_class} at ({ann.pose.position.x}, {ann.pose.position.y})") +``` diff --git a/src/rai_semap/design.md b/src/rai_semap/design.md new file mode 100644 index 000000000..2e3cf15d1 --- /dev/null +++ b/src/rai_semap/design.md @@ -0,0 +1,553 @@ +# RAI's Agent Memory System + +## Table of Contents + +- [Problem Definition](#problem-definition) + - [Q & A](#q--a) +- [Concepts](#concepts) + - [High-Level Concepts](#high-level-concepts) + - [Data Models](#data-models) + - [Relationships](#relationships) + - [Non Goals](#non-goals) +- [Design Proposal](#design-proposal) + - [BaseMemory Interface](#basememory-interface) + - [SemanticMapMemory Interface](#semanticmapmemory-interface) + - [Database Backend Abstraction](#database-backend-abstraction) + - [New Component: rai_semap](#new-component-rai_semap) + - [Usage Patterns from Other Layers](#usage-patterns-from-other-layers) + - [Implementation Phases](#implementation-phases) +- [Reusability](#reusability) +- [External Memory Systems](#external-memory-systems) +- [Appendix](#appendix) + +## Problem Definition + +[Issue#225 Design SLAM RAI features](https://github.com/RobotecAI/rai/issues/225) presents an explorative SLAM/semantic mapping integration task: + +> "Robots often need to find out about their environment first, building a map and localizing themselves on it. During this process, RAI can be used to guide exploration for mapping or to build a semantic map during the SLAM which adds knowledge and memory, which can be used to reason about the area itself and tasks that are to be given in the area. A great start is to design solutions for RAI." + +Based on RAI's current capabilities (perception, navigation, multi-modal interaction), this maps to three areas that build on each other: perception feeds the semantic map, exploration uses it to guide decisions, and memory enables task reasoning. + +1. Semantic Perception Layer: Built on `rai_perception` (open-set detection) with GroundingDINO integration, creating a semantic annotation pipeline that tags SLAM features/points with object identities during mapping + +2. Agent Guided Exploration Strategy: Built on `rai_nomad` (navigation) where the agent decides where to explore based on goals ("find the kitchen", "map storage areas") rather than frontier-based exploration. Frontier-based exploration navigates to boundaries between known and unknown map regions to expand coverage. + +3. Spatial Memory System: Provides persistent semantic map storage that agents can query ("where did I see tools?") and reason over ("this room is suitable for assembly tasks"). The word _spatial_ refers to 3D location/position information in map coordinates. + + - The connection between spatial memory and other RAI memory systems (artifact_database.pkl, rai_whoami vector store, StateBasedAgent state) needs exploration: spatial memory could be queried by these systems to provide spatial context, rather than serving as storage. For example, artifacts could be annotated with spatial locations, embodiment docs could reference spatial locations that spatial memory grounds, or recent spatial queries could be included in StateBasedAgent state for spatial awareness during conversations. + +### Q & A + +Q: why "map storage areas" is not a frontier-based exploration? + +"Map storage areas" is goal-based, not frontier-based, because frontier-based chooses the nearest boundary between known and unknown regions, regardless of what might be there. It's geometry-driven. Goal-based ("map storage areas") uses semantic reasoning to prioritize exploration. The agent might: + +- Use partial detections ("I saw a shelf, explore that direction") +- Reason about room layouts ("storage areas are often in basements or corners") +- Query the semantic map for hints about where storage might be + +Both may explore unknown regions, but the decision differs: frontier-based picks the nearest unknown boundary whereas goal-based uses semantic cues to target likely locations. + +## Concepts + +### High-Level Concepts + +#### Semantic Annotation + +**A spatial-semantic record linking an object identity (class label, e.g., "red cup", "tool") to a 3D position (centroid/center) in the map frame, with metadata (timestamp, confidence, detection source).** + +Unlike pure geometric SLAM, semantic annotations enable querying "what" and "where" simultaneously, allowing agents to reason about object locations for task planning. The combination of semantic labels and 3D positions bridges perception and spatial memory. The position is stored as a `Pose` object, where `pose.position` is the 3D point (computed from bounding box center or point cloud centroid when available) and `pose.orientation` is typically identity (not meaningful). + +```python +# Example: Semantic annotation structure (simplified) +{ + "object_class": "red cup", + "pose": {"position": {"x": 2.5, "y": 1.3, "z": 0.8}, "orientation": {"x": 0.0, "y": 0.0, "z": 0.0, "w": 1.0}}, + "confidence": 0.92, + "timestamp": "2025-01-15T10:23:00", + "detection_source": "GroundingDINO", + "source_frame": "camera_frame" +} +# Note: pose.position is a 3D point (centroid/center) in map frame, computed from bounding box center +# or point cloud centroid when available. pose.orientation is typically identity (not meaningful). +``` + +#### Spatial Memory + +**A conceptual system that provides persistent storage of semantic annotations indexed by both spatial coordinates (3D: x, y, z) and semantic labels.** + +Storage is implemented via database backends (SQLite/PostGIS) accessed through the `SemanticMapMemory` interface. Dual indexing enables efficient queries like "find objects near (x,y)" (2D projection when z is not needed) and "where did I see X?" by combining spatial indexing with semantic search. Without spatial memory, agents cannot recall where objects were seen, limiting task planning. + +```python +# Example: Spatial query +from geometry_msgs.msg import Point +center = Point(x=2.5, y=1.3, z=0.0) +results = memory.query_by_location(center, radius=2.0) +# Returns: List[SemanticAnnotation] with objects within radius + +# Example: Semantic query +results = memory.query_by_class("red cup") +# Returns: List[SemanticAnnotation] with all "red cup" annotations +``` + +#### Camera-to-map Transformation + +**Converting detections from camera frame to map frame using TF transforms.** + +The perception layer provides detections with 3D positions (GroundingDINO provides 2D bounding boxes; we compute 3D poses from bounding box centers using depth images and camera intrinsics). These positions are initially in the camera frame. The system transforms them to the map frame using TF transforms (camera → base_link → map). This is critical for building a consistent spatial-semantic map across robot movements. Without proper frame transformation, detections from different robot positions would be stored in inconsistent coordinate systems, making spatial queries unreliable. + +```python +# Example: Frame transformation flow (pseudo code) +# Detection in camera frame +camera_pose = (x=0.3, y=0.1, z=1.2) # relative to camera + +# Transform to map frame via TF +map_pose = transform_pose( + camera_pose, + source_frame="camera_frame", + target_frame="map" +) +# Result: (x=2.5, y=1.3, z=0.8) # absolute map coordinates + +# Same object detected from different angle → same map coordinates +``` + +#### Temporal Consistency + +**Handling multiple detections of the same object instance over time by merging duplicates based on spatial proximity.** + +Tracks individual instances (by spatial location), not object classes. Without temporal consistency, repeated detections of the same object would create duplicate records, making queries like "where did I see the red cup?" return multiple locations for the same object, rendering the database inconsistent. Temporal consistency merges repeated detections of the same physical object (same location within a threshold), not different objects even if they share the same class label. A key challenge is distinguishing a moved object (same instance, new location) from a new object instance (different instance, similar appearance). + +```python +# Example: Multiple detections of same object +# Detection 1 at t=0: "red cup" at (2.5, 1.3, 0.8), confidence=0.85 +# Detection 2 at t=5: "red cup" at (2.52, 1.31, 0.81), confidence=0.92 +# → Merged into single annotation with max confidence (0.92) and latest timestamp + +# Example: Different objects (same class, different locations) +# Detection 1: "box" at (1.0, 2.0, 0.5) # Box A +# Detection 2: "box" at (3.0, 4.0, 0.5) # Box B (different instance) +# → Two separate annotations stored +``` + +#### Deduplication Strategies + +**Multiple techniques work together to prevent duplicate annotations: spatial merging, point cloud-based matching, confidence filtering, and bounding box size filtering.** + +These strategies work together to ensure database consistency and query reliability. Confidence and size filtering happen first to remove low-quality detections, then spatial merging with point cloud validation occurs during storage. Without deduplication, the database would be polluted with duplicate entries, making spatial queries return incorrect results and wasting storage. + +**1. Spatial Merging** + +Detections of the same class within a merge threshold (distance in meters) are merged into a single annotation. The merge threshold is class-specific to handle objects of different sizes. When merging, the system keeps the maximum confidence score and updates the timestamp to the latest detection. + +```python +# Example: Class-specific merge thresholds +merge_thresholds = { + "couch": 2.5, # Large objects + "table": 1.5, + "shelf": 1.5, + "chair": 0.8, # Medium objects + "cup": 0.5 # Small objects (default) +} + +# Two "cup" detections within 0.5m → merged +# Two "couch" detections within 2.5m → merged +``` + +**2. Point Cloud-Based Matching** + +When depth images are available, the system extracts 3D point clouds from bounding box regions and uses them for more accurate deduplication. Point cloud centroid is more accurate than bounding box center for spatial matching. Size validation compares 3D point cloud sizes to avoid merging objects of very different sizes. If point cloud sizes differ by >50% and >0.5m, detections are treated as different objects even if spatially close. + +```python +# Example: Point cloud validation +detection1 = { + "centroid": (2.5, 1.3, 0.8), + "size_3d": 0.15, # meters + "point_count": 1250 +} +detection2 = { + "centroid": (2.52, 1.31, 0.81), # Close spatially + "size_3d": 0.25, # 67% larger → different object + "point_count": 2100 +} +# Result: Not merged (size difference >50% and >0.5m) +``` + +**3. Confidence Filtering** + +Only detections above a confidence threshold are stored. The threshold is class-specific to handle high false-positive classes. This prevents low-confidence false positives from polluting the database. + +```python +# Example: Class-specific confidence thresholds +confidence_thresholds = { + "person": 0.7, # High false-positive rate + "window": 0.6, + "door": 0.5, + "cup": 0.5 # Default threshold +} +``` + +**4. Bounding Box Size Filtering** + +Very small bounding boxes (below minimum area threshold, default: 100 pixels²) are filtered out as they are often false positives from partial occlusions or detection artifacts. + +```python +# Example: Size filtering +bbox_area = width * height # pixels² +if bbox_area < min_bbox_area: # default: 100 pixels² + # Filtered out (likely false positive) + return None +``` + +#### Query Patterns + +**Primary query types: spatial queries (objects near a location), semantic queries (locations of object classes), and hybrid queries (combining both).** + +These query patterns enable agents to retrieve spatial-semantic information for task planning. Spatial queries support navigation and proximity-based reasoning. Semantic queries enable object retrieval tasks. Hybrid queries combine both for complex scenarios like "find tools in the workshop." + +```python +# Spatial query: "What objects are within 2m of (x,y)?" +from geometry_msgs.msg import Point +center = Point(x=2.5, y=1.3, z=0.0) +results = memory.query_by_location(center, radius=2.0) + +# Semantic query: "Where did I see a red cup?" +results = memory.query_by_class("red cup") + +# Hybrid query: "Find tools in the workshop" (semantic + spatial region) +results = memory.query_by_region( + bbox=(x1, y1, x2, y2), # (min_x, min_y, max_x, max_y) + object_class="tool" +) +``` + +### Data Models + +#### SemanticAnnotation + +**A data structure representing a single semantic annotation with object identity, 3D pose, confidence, and metadata.** + +This is the core data model storing all semantic-spatial information. Each annotation links a detected object to its location in the map frame, enabling spatial queries and temporal consistency tracking. The metadata field allows extensibility for point cloud features and other attributes without changing the core schema. + +```python +class SemanticAnnotation: + id: str # Unique identifier (UUID string) + object_class: str # e.g., "red cup", "tool" + pose: Pose # 3D pose in map frame (x, y, z, orientation) + confidence: float # 0.0-1.0 + timestamp: float # Unix timestamp in seconds (timezone-naive) + detection_source: str # e.g., "GroundingDINO", "GroundedSAM" + source_frame: str # Original camera frame_id + location_id: str # Identifier for the physical location + vision_detection_id: Optional[str] # ID from Detection2D for debugging + metadata: Dict[str, Any] # Optional JSON with pointcloud info when available + # metadata.pointcloud: {centroid, size_3d, point_count} +``` + +Regarding the `timestamp` field, there is a choice of using Python `datetime`. While it provides type safety and easy comparison, using `datetime` has a few drawbacks. For example: + +1. Precision loss: ROS `rclpy.time.Time` has nanosecond precision (`sec` + `nanosec`), while Python `datetime` has microsecond precision. + +2. Conversion overhead: We'll need to convert `rclpy.time.Time` → `datetime` at the ROS boundary. Example: + +```python +# Conversion needed +ros_time = rclpy.time.Time.from_msg(msg.header.stamp) +# Convert to datetime (loses nanosec precision) +dt = datetime.fromtimestamp(ros_time.nanoseconds / 1e9) +``` + +3. Database storage: Both SQLite and PostgreSQL still require conversion: + - SQLite: No native datetime type; stored as TEXT (ISO format) or REAL/INTEGER (Unix timestamp) + - PostgreSQL: Has TIMESTAMP, but you still need to handle timezone (naive vs aware) + +Implementation decision: We use `float` (Unix timestamp in seconds) for timezone-naive, PostgreSQL-compatible storage. Convert from `rclpy.time.Time` using `timestamp.nanoseconds / 1e9`. Nanosecond precision is lost but acceptable for timestamps. + +#### SpatialIndex + +**Database-level spatial index (R-tree) for efficient spatial queries.** + +Spatial indexing is essential for performance when querying large numbers of annotations. Without it, spatial queries would require scanning all records, which becomes prohibitively slow as the map grows. SQLite uses SpatiaLite extension; PostGIS uses native GIST indexes. Both provide sub-linear query performance for spatial operations. + +```sql +-- Example: Spatial index creation (SpatiaLite) +CREATE VIRTUAL TABLE annotations_rtree USING rtree( + id, minx, maxx, miny, maxy +); + +-- Example: Efficient spatial query using index +SELECT * FROM annotations +WHERE id IN ( + SELECT id FROM annotations_rtree + WHERE minx <= x+radius AND maxx >= x-radius + AND miny <= y+radius AND maxy >= y-radius +); +``` + +#### MapMetadata + +**Metadata about the SLAM map including frame ID, resolution, origin, and last update timestamp.** + +This metadata enables correct interpretation of spatial coordinates and consistency with the underlying SLAM map. Resolution and origin convert between map coordinates and pixel coordinates for visualization. The last_updated timestamp tracks map freshness and coordinate system changes. + +```python +class MapMetadata: + location_id: str # Identifier for the physical location + map_frame_id: str # Frame ID of the SLAM map + resolution: float # OccupancyGrid resolution (meters/pixel) + origin: Optional[Pose] # Optional map origin pose + last_updated: Optional[float] # Unix timestamp (seconds) of last annotation +``` + +### Relationships + +#### Perception Layer → Memory System + +**`RAIDetectionArray` messages flow from `rai_perception` services (GroundingDINO, GroundedSAM) into `rai_semap`, which projects detections to map frame and stores them.** + +The `detection_publisher` node bridges the service-based perception layer to topic-based messaging by subscribing to camera images, calling DINO service, and publishing `RAIDetectionArray` messages to `/detection_array`. This decoupling allows the memory system to work with any perception service that publishes `RAIDetectionArray` messages, not just GroundingDINO. The topic-based interface enables multiple consumers and easier debugging. + +```python +# Flow: Camera → detection_publisher → RAIDetectionArray → semantic_map_node +# detection_publisher subscribes to /camera/camera/color/image_raw (configurable) +# Calls /grounding_dino_classify service (configurable) +# Publishes RAIDetectionArray to /detection_array +# semantic_map_node subscribes to /detection_array and stores annotations +``` + +#### Exploration Layer → Memory System + +**Agent-guided exploration uses semantic map queries to find unexplored regions with specific semantic properties.** + +The memory system returns candidate locations for exploration goals like "find areas with storage furniture", enabling goal-based exploration rather than purely geometric frontier-based exploration. The exploration layer can query for semantic hints ("I saw a shelf, explore that direction") and use coverage tracking to prioritize unexplored regions. + +```python +# Example: Goal-based exploration query +candidates = memory.query_by_class("shelf") +unexplored_regions = exploration.find_unexplored_near(candidates) +# Agent navigates to unexplored regions near detected shelves +``` + +#### Memory System → Agent Tools + +**Agents query semantic map via `QuerySemanticMapTool` to retrieve object locations for task planning.** + +This integration enables multi-step task planning: query object locations, navigate to them, verify presence, and manipulate objects. Without this connection, agents would have no persistent spatial memory and would need to re-detect objects every time, limiting task capabilities. + +```python +# Example: Agent tool usage +tool = QuerySemanticMapTool(memory=memory) +result = tool.invoke({"query": "red cup in kitchen", "room": "kitchen"}) +# Returns: String with object locations +# Agent uses NavigateToPoseTool to go to location +``` + +### Non Goals + +Future Integration Points: + +- `artifact_database.pkl`: Could store semantic annotations alongside multimodal artifacts +- `rai_whoami` vector store: Could index semantic annotations for LLM-based reasoning +- `StateBasedAgent` state: Could include recent semantic map queries in conversation context + +## Design Proposal + +### BaseMemory Interface + +**A minimal abstract interface for memory systems that allows future memory systems (conversational, vector-based, etc.) to share a common API.** + +Since no `BaseMemory` interface exists in RAI, we define this interface to enable consistent memory system integration across RAI components. It allows `SemanticMapMemory` and future memory systems to share a common API while each extends it with domain-specific methods. See `base_memory.py` for the interface definition. + +```python +# Example: BaseMemory interface structure +class BaseMemory(ABC): + @abstractmethod + def store(self, key: str, value: Any, metadata: Optional[Dict[str, Any]] = None) -> str: + """Store a value with optional metadata. Returns storage ID.""" + pass + + @abstractmethod + def retrieve(self, query: str, filters: Optional[Dict[str, Any]] = None) -> List[Any]: + """Retrieve values matching query and filters. + + Designed for vector database use cases where query is text to embed + for similarity search, and filters are metadata constraints. + Not suitable for spatial databases which require concrete query methods + (e.g., query_by_location, query_by_region). + """ + pass + + @abstractmethod + def delete(self, key: str) -> bool: + """Delete a stored value. Returns success status.""" + pass +``` + +### SemanticMapMemory Interface + +**`SemanticMapMemory` extends `BaseMemory` with spatial query capabilities for semantic annotations.** + +This interface provides the contract for spatial-semantic memory operations, enabling agent tools and exploration layers to query object locations without depending on the specific database backend implementation. The interface abstracts away backend details (SQLite vs PostGIS) while providing spatial query methods. See `semantic_map_memory.py` for the interface definition. + +```python +# Example: SemanticMapMemory interface methods +class SemanticMapMemory(BaseMemory): + def query_by_location(self, center: Point, radius: float, object_class: Optional[str] = None, location_id: Optional[str] = None) -> List[SemanticAnnotation]: + """Query annotations within radius of center point.""" + pass + + def query_by_class(self, object_class: str, confidence_threshold: float = 0.5, limit: Optional[int] = None, location_id: Optional[str] = None) -> List[SemanticAnnotation]: + """Query annotations by object class.""" + pass + + def query_by_region(self, bbox: Tuple[float, float, float, float], object_class: Optional[str] = None, location_id: Optional[str] = None) -> List[SemanticAnnotation]: + """Query annotations within bounding box region.""" + pass +``` + +### Database Backend Abstraction + +**A backend abstraction layer that supports both SQLite (Phase I) and PostGIS (future) implementations.** + +This abstraction enables switching between database backends without changing the `SemanticMapMemory` interface or agent tools. SQLite provides a lightweight, single-file solution for Phase I; PostGIS enables advanced features for future multi-robot deployments. See `spatial_db_backend.py` for the interface definition. + +**SQLiteBackend (Phase I):** + +Uses SpatiaLite extension for spatial indexing. Single-file database with no external dependencies. Can be deployed on-board the robot (no network or separate server required). Sufficient for single-robot deployments. + +```python +# Example: SQLiteBackend usage +backend = SQLiteBackend("semantic_map.db") +backend.init_schema() # Initialize database schema +memory = SemanticMapMemory(backend, location_id="default_location") +# Single file, no server needed +``` + +**PostGISBackend (future):** + +Full PostgreSQL + PostGIS for advanced spatial operations. Supports multi-robot coordination via shared database (cloud or local network server). Better performance for large-scale maps. + +```python +# Example: PostGISBackend usage (future) +backend = PostGISBackend(connection_string="postgresql://...") +backend.init_schema() # Initialize database schema +memory = SemanticMapMemory(backend, location_id="warehouse_a") +# Shared database for multi-robot coordination +``` + +Backend selection is configurable via `backend_type` parameter (currently supports "sqlite"; PostGIS backend not yet implemented). + +### New Component: `rai_semap` + +Architecture: + +`rai_semap` consists of a core library, a ROS2 node wrapper and tools. + +1. Core Library (`rai_semap.core`): + + - Frame Projection: Transform 3D poses from camera frame to map frame using TF transforms + - Temporal Filtering: Multi-strategy deduplication (spatial clustering, point cloud-based matching, confidence/size filtering) to merge duplicate detections + - Storage: Persist annotations to `SemanticMapMemory` backend with point cloud features in metadata + - Pure Python library with no ROS2 dependencies + +2. ROS2 Node Wrapper (`rai_semap.ros2`): + + - `detection_publisher` node: Subscribes to camera images, calls GroundingDINO service, publishes `RAIDetectionArray` messages with configurable throttling + - `node` (semantic map node): Subscribes to `RAIDetectionArray` and `/map` topics, handles TF transforms, converts ROS2 messages to core library data structures, calls core processing functions + - `visualizer` node: Publishes semantic map annotations as RViz2 markers for real-time visualization, querying the database at configurable intervals + +3. Tools/Services: + - `QuerySemanticMapTool`: LangChain tool for agent queries + - ROS2 service for programmatic access (not in current scope) + +Dependency Flow: + +``` +Camera Images → detection_publisher → RAIDetectionArray → semantic_map_node → rai_semap.core → SemanticMapMemory → Agent Tools + ↓ ↓ ↓ ↓ ↓ ↓ + /camera/image_raw DINO Service Detection2D ROS2 Wrapper Frame Transform SQLite/PostGIS + (service call) (3D pose, class) (msg conversion) (TF transform) Spatial Queries +``` + +### Usage Patterns from Other Layers + +**Perception Layer**: The `detection_publisher` node bridges service-based perception (GroundingDINO) to topic-based messaging, processing camera images and publishing `RAIDetectionArray` messages with configurable confidence filtering and detection rate throttling. + +**Exploration Layer** (preliminary): Future integration could support coverage tracking (identifying annotated map regions), goal-based queries (finding unexplored regions with specific semantic properties), and frontier detection (boundaries between mapped/unmapped regions). These features would enable agent-guided exploration beyond geometric frontier-based methods. + +Agent Tool Integration: + +- Natural language queries: `QuerySemanticMapTool("red cup in kitchen")` → spatial query +- Multi-step planning: Query → Navigate → Verify → Manipulate +- Temporal reasoning: "Where did I see X yesterday?" (requires timestamp filtering) + +### Implementation Phases + +Phase I (SQLite): + +- Implement `SQLiteBackend` with SpatiaLite +- Basic `SemanticMapMemory` with spatial queries +- `rai_semap` node with frame projection +- `QuerySemanticMapTool` for agent integration +- Single-robot deployment +- Validation demo using rosbot-xl: Build semantic map during navigation, query object locations (e.g., "Where did I see the bed?"), verify detections are correctly stored and retrieved + +Future direction (PostGIS Migration): + +- Implement `PostGISBackend` with same interface +- Configuration-based backend switching +- Multi-robot coordination support +- Advanced spatial operations (polygon queries, distance calculations) + +## Reusability + +`rai_semap` may be reusable for object retrieval scenarios where the robot finds and retrieves objects it saw earlier. For example, after initial mapping, user asks: _"Bring me the red cup I saw in the kitchen"_ + +- Flow: + +1. Agent queries semantic map: `QuerySemanticMapTool.invoke({"query": "red cup", "room": "kitchen"})` +2. Semantic map returns: String with object locations including pose information +3. Agent uses `NavigateToPoseTool` to go to that location +4. Agent uses `GetDetectionTool` to confirm object presence +5. Agent uses manipulation tools to grab and return the cup + +- Benefits: + - Persistent memory: remembers objects across sessions + - Spatial reasoning: knows where things are, not just what they are + - Task planning: can plan multi-step retrieval tasks + +More scenarios. These are yet to be explored, listed here just for future revisit of the design. + +- Inventory tracking: "What tools are in the workshop?" +- Change detection: "Did anything move in the living room?" +- Multi-robot coordination: share semantic map between robots +- Long-term monitoring: track object locations over days/weeks + +## External Memory Systems + +### mem0 + +[mem0](https://github.com/mem0ai/mem0) is a mature implementation (43.8k stars, production-ready). It's not a good fit for RAI. mem0 targets conversational memory, while RAI needs spatial-semantic storage with pose queries. + +### ROS semantic_mapping + +C++/ROS1, no Python API or SQLite/PostGIS backend, [source reference](https://github.com/fdayoub/ros-semantic-mapper). + +### KnowRob + +Knowledge reasoning, not spatial-semantic storage, [source reference](https://github.com/knowrob/knowrob) + +### SEGO (Semantic Graph Ontology) + +Research framework, no production storage backend, [paper](https://arxiv.org/abs/2506.13149) + +### Semantic SLAM projects + +Mostly C++ (ORB-SLAM2 etc) not Python with database backends. [orb-slam2 source reference](https://github.com/appliedAI-Initiative/orb_slam_2_ros) +The frame viewer from this [post](https://records.sigmm.org/?open-source-item=openvslam-a-versatile-visual-slam-framework) is fantastic for visualization. + +## Appendix + +### PostGIS + +PostGIS is an extension for PostgreSQL, not a separate database. It adds spatial data types and functions (geometry, geography, spatial indexing, spatial queries). After installing PostgreSQL, then enable the PostGIS extension with `CREATE EXTENSION postgis;`. diff --git a/src/rai_semap/pyproject.toml b/src/rai_semap/pyproject.toml new file mode 100644 index 000000000..e3963066d --- /dev/null +++ b/src/rai_semap/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "rai_semap" +version = "0.1.0" +description = "Semantic mapping and spatial memory for RAI" +authors = ["Julia Jia"] +readme = "README.md" +classifiers = [ + "Programming Language :: Python :: 3", + "Development Status :: 4 - Beta", + "License :: OSI Approved :: Apache Software License", +] +packages = [ + { include = "rai_semap", from = "." }, +] + +[tool.poetry.dependencies] +python = "^3.10, <3.13" +rai_core = {path = "../rai_core", develop = true} + +[tool.poetry.group.dev.dependencies] +pytest = "^8.2.0" diff --git a/src/rai_semap/rai_semap/__init__.py b/src/rai_semap/rai_semap/__init__.py new file mode 100644 index 000000000..c8b4abf7f --- /dev/null +++ b/src/rai_semap/rai_semap/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/rai_semap/rai_semap/core/__init__.py b/src/rai_semap/rai_semap/core/__init__.py new file mode 100644 index 000000000..c8b4abf7f --- /dev/null +++ b/src/rai_semap/rai_semap/core/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/rai_semap/rai_semap/core/backend/__init__.py b/src/rai_semap/rai_semap/core/backend/__init__.py new file mode 100644 index 000000000..c8b4abf7f --- /dev/null +++ b/src/rai_semap/rai_semap/core/backend/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/rai_semap/rai_semap/core/backend/spatial_db_backend.py b/src/rai_semap/rai_semap/core/backend/spatial_db_backend.py new file mode 100644 index 000000000..c6dd54255 --- /dev/null +++ b/src/rai_semap/rai_semap/core/backend/spatial_db_backend.py @@ -0,0 +1,80 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from rai.types import Point + +if TYPE_CHECKING: + from rai_semap.core.semantic_map_memory import SemanticAnnotation + + +class SpatialDBBackend(ABC): + """Abstract backend for spatial database operations.""" + + @abstractmethod + def init_schema(self) -> None: + """Initialize database schema with spatial extensions.""" + pass + + @abstractmethod + def insert_annotation(self, annotation: "SemanticAnnotation") -> str: + """Insert annotation with spatial indexing.""" + pass + + @abstractmethod + def spatial_query( + self, center: Point, radius: float, filters: Optional[Dict[str, Any]] = None + ) -> List["SemanticAnnotation"]: + """Execute spatial query with optional filters.""" + pass + + @abstractmethod + def delete_annotation(self, annotation_id: str) -> bool: + """Delete annotation by ID. Returns success status.""" + pass + + @abstractmethod + def delete_all_annotations(self, location_id: Optional[str] = None) -> int: + """Delete all annotations, optionally filtered by location_id. + + Args: + location_id: If provided, only delete annotations for this location. + If None, delete all annotations. + + Returns: + Number of annotations deleted. + """ + pass + + @abstractmethod + def update_annotation(self, annotation: "SemanticAnnotation") -> bool: + """Update existing annotation by ID. Returns success status.""" + pass + + @abstractmethod + def get_distinct_object_classes( + self, location_id: Optional[str] = None + ) -> List[str]: + """Get list of distinct object classes seen in a location. + + Args: + location_id: If provided, only return classes for this location. + If None, return classes for all locations. + + Returns: + List of unique object class names. + """ + pass diff --git a/src/rai_semap/rai_semap/core/backend/sqlite_backend.py b/src/rai_semap/rai_semap/core/backend/sqlite_backend.py new file mode 100644 index 000000000..d1801bee4 --- /dev/null +++ b/src/rai_semap/rai_semap/core/backend/sqlite_backend.py @@ -0,0 +1,362 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +import re +import sqlite3 +from typing import Any, Dict, List, Optional + +from rai.types import Point, Pose + +from rai_semap.core.backend.spatial_db_backend import SpatialDBBackend +from rai_semap.core.semantic_map_memory import SemanticAnnotation + +logger = logging.getLogger(__name__) + + +class SQLiteBackend(SpatialDBBackend): + """SQLite backend with SpatiaLite extension for spatial indexing.""" + + def __init__(self, database_path: str): + self.database_path = database_path + self.conn: Optional[sqlite3.Connection] = None + + def _get_connection(self) -> sqlite3.Connection: + """Get or create database connection.""" + if self.conn is None: + logger.info(f"Creating SQLite connection to: {self.database_path}") + self.conn = sqlite3.connect(self.database_path, check_same_thread=False) + self.conn.row_factory = sqlite3.Row + # Enable WAL mode for better concurrency and ensure data is written + journal_mode = self.conn.execute("PRAGMA journal_mode=WAL").fetchone()[0] + logger.debug(f"SQLite journal mode: {journal_mode}") + # Use NORMAL synchronous mode (balance between safety and performance) + # FULL is safer but slower, OFF is faster but riskier + self.conn.execute("PRAGMA synchronous=NORMAL") + logger.info(f"SQLite connection established to {self.database_path}") + return self.conn + + def init_schema(self) -> None: + """Initialize database schema with spatial extensions.""" + conn = self._get_connection() + cursor = conn.cursor() + + logger.info("Initializing database schema") + cursor.execute(""" + CREATE TABLE IF NOT EXISTS annotations ( + id TEXT PRIMARY KEY, + object_class TEXT NOT NULL, + x REAL NOT NULL, + y REAL NOT NULL, + z REAL NOT NULL, + qx REAL, + qy REAL, + qz REAL, + qw REAL, + confidence REAL NOT NULL, + timestamp REAL NOT NULL, + detection_source TEXT NOT NULL, + source_frame TEXT NOT NULL, + location_id TEXT NOT NULL, + vision_detection_id TEXT, + metadata TEXT + ) + """) + + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_object_class ON annotations(object_class) + """) + + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_confidence ON annotations(confidence) + """) + + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_location_id ON annotations(location_id) + """) + + conn.commit() + logger.info("Database schema initialized successfully") + + def insert_annotation(self, annotation: SemanticAnnotation) -> str: + """Insert annotation with spatial indexing.""" + conn = self._get_connection() + cursor = conn.cursor() + + x = annotation.pose.position.x + y = annotation.pose.position.y + z = annotation.pose.position.z + qx = annotation.pose.orientation.x + qy = annotation.pose.orientation.y + qz = annotation.pose.orientation.z + qw = annotation.pose.orientation.w + + metadata_json = json.dumps(annotation.metadata) if annotation.metadata else None + + logger.info( + f"Inserting annotation: id={annotation.id}, class={annotation.object_class}, " + f"pos=({x:.2f}, {y:.2f}, {z:.2f}), confidence={annotation.confidence:.3f}, " + f"location_id={annotation.location_id}" + ) + + try: + cursor.execute( + """ + INSERT INTO annotations ( + id, object_class, x, y, z, qx, qy, qz, qw, + confidence, timestamp, detection_source, source_frame, + location_id, vision_detection_id, metadata + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + annotation.id, + annotation.object_class, + x, + y, + z, + qx, + qy, + qz, + qw, + annotation.confidence, + annotation.timestamp, + annotation.detection_source, + annotation.source_frame, + annotation.location_id, + annotation.vision_detection_id, + metadata_json, + ), + ) + + conn.commit() + logger.debug(f"Committed annotation {annotation.id} to database") + + # Verify the annotation was actually stored + cursor.execute( + "SELECT COUNT(*) FROM annotations WHERE id = ?", (annotation.id,) + ) + count = cursor.fetchone()[0] + if count == 1: + logger.info(f"✓ Verified annotation {annotation.id} stored in database") + else: + logger.error( + f"✗ FAILED to verify annotation {annotation.id} in database " + f"(found {count} rows, expected 1)" + ) + + return annotation.id + except sqlite3.Error as e: + logger.error(f"SQLite error inserting annotation {annotation.id}: {e}") + conn.rollback() + raise + + def spatial_query( + self, center: Point, radius: float, filters: Optional[Dict[str, Any]] = None + ) -> List[SemanticAnnotation]: + """Execute spatial query with optional filters.""" + conn = self._get_connection() + cursor = conn.cursor() + + query = """ + SELECT * FROM annotations + WHERE ((x - ?) * (x - ?) + (y - ?) * (y - ?) + (z - ?) * (z - ?)) <= (? * ?) + """ + params = [ + center.x, + center.x, + center.y, + center.y, + center.z, + center.z, + radius, + radius, + ] + + if filters: + if "object_class" in filters: + query += " AND object_class = ?" + params.append(filters["object_class"]) + if "confidence_threshold" in filters: + query += " AND confidence >= ?" + params.append(filters["confidence_threshold"]) + if "location_id" in filters: + query += " AND location_id = ?" + params.append(filters["location_id"]) + + cursor.execute(query, params) + rows = cursor.fetchall() + + annotations = [] + for row in rows: + pose = Pose() + pose.position.x = row["x"] + pose.position.y = row["y"] + pose.position.z = row["z"] + pose.orientation.x = row["qx"] or 0.0 + pose.orientation.y = row["qy"] or 0.0 + pose.orientation.z = row["qz"] or 0.0 + pose.orientation.w = row["qw"] or 1.0 + + metadata = json.loads(row["metadata"]) if row["metadata"] else {} + + # Convert timestamp to float (handle ROS Time string representation) + timestamp = row["timestamp"] + if isinstance(timestamp, str): + # Extract nanoseconds from ROS Time string representation + # Format: "Time(nanoseconds=3119172..., clock_type=ROS_TIME)" + match = re.search(r"nanoseconds=(\d+)", timestamp) + if match: + nanoseconds = int(match.group(1)) + timestamp = nanoseconds / 1e9 + else: + raise ValueError(f"Unable to parse timestamp: {timestamp}") + + annotation = SemanticAnnotation( + id=row["id"], + object_class=row["object_class"], + pose=pose, + confidence=row["confidence"], + timestamp=float(timestamp), + detection_source=row["detection_source"], + source_frame=row["source_frame"], + location_id=row["location_id"], + vision_detection_id=row["vision_detection_id"], + metadata=metadata, + ) + annotations.append(annotation) + + return annotations + + def delete_annotation(self, annotation_id: str) -> bool: + """Delete annotation by ID. Returns success status.""" + conn = self._get_connection() + cursor = conn.cursor() + + logger.info(f"Deleting annotation: id={annotation_id}") + cursor.execute("DELETE FROM annotations WHERE id = ?", (annotation_id,)) + rows_deleted = cursor.rowcount + conn.commit() + + if rows_deleted > 0: + logger.info(f"✓ Deleted annotation {annotation_id} from database") + else: + logger.warning(f"✗ Annotation {annotation_id} not found for deletion") + + return rows_deleted > 0 + + def delete_all_annotations(self, location_id: Optional[str] = None) -> int: + """Delete all annotations, optionally filtered by location_id. + + Args: + location_id: If provided, only delete annotations for this location. + If None, delete all annotations. + + Returns: + Number of annotations deleted. + """ + conn = self._get_connection() + cursor = conn.cursor() + + if location_id: + logger.info(f"Deleting all annotations for location_id={location_id}") + cursor.execute( + "DELETE FROM annotations WHERE location_id = ?", (location_id,) + ) + else: + logger.info("Deleting all annotations from database") + cursor.execute("DELETE FROM annotations") + + rows_deleted = cursor.rowcount + conn.commit() + + logger.info(f"✓ Deleted {rows_deleted} annotation(s) from database") + return rows_deleted + + def update_annotation(self, annotation: SemanticAnnotation) -> bool: + """Update existing annotation by ID. Returns success status.""" + conn = self._get_connection() + cursor = conn.cursor() + + x = annotation.pose.position.x + y = annotation.pose.position.y + z = annotation.pose.position.z + qx = annotation.pose.orientation.x + qy = annotation.pose.orientation.y + qz = annotation.pose.orientation.z + qw = annotation.pose.orientation.w + + metadata_json = json.dumps(annotation.metadata) if annotation.metadata else None + + logger.info( + f"Updating annotation: id={annotation.id}, class={annotation.object_class}, " + f"pos=({x:.2f}, {y:.2f}, {z:.2f}), confidence={annotation.confidence:.3f}" + ) + + try: + cursor.execute( + """ + UPDATE annotations SET + object_class = ?, + x = ?, y = ?, z = ?, + qx = ?, qy = ?, qz = ?, qw = ?, + confidence = ?, + timestamp = ?, + detection_source = ?, + source_frame = ?, + location_id = ?, + vision_detection_id = ?, + metadata = ? + WHERE id = ? + """, + ( + annotation.object_class, + x, + y, + z, + qx, + qy, + qz, + qw, + annotation.confidence, + annotation.timestamp, + annotation.detection_source, + annotation.source_frame, + annotation.location_id, + annotation.vision_detection_id, + metadata_json, + annotation.id, + ), + ) + + rows_updated = cursor.rowcount + conn.commit() + + if rows_updated > 0: + logger.info(f"✓ Updated annotation {annotation.id} in database") + else: + logger.warning(f"✗ Annotation {annotation.id} not found for update") + + return rows_updated > 0 + except sqlite3.Error as e: + logger.error(f"SQLite error updating annotation {annotation.id}: {e}") + conn.rollback() + raise + + def get_distinct_object_classes( + self, location_id: Optional[str] = None + ) -> List[str]: + """Get list of distinct object classes seen in a location.""" + + pass diff --git a/src/rai_semap/rai_semap/core/base_memory.py b/src/rai_semap/rai_semap/core/base_memory.py new file mode 100644 index 000000000..44b11815f --- /dev/null +++ b/src/rai_semap/rai_semap/core/base_memory.py @@ -0,0 +1,45 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional + + +class BaseMemory(ABC): + """Abstract base class for agent memory systems.""" + + @abstractmethod + def store( + self, key: str, value: Any, metadata: Optional[Dict[str, Any]] = None + ) -> str: + """Store a value with optional metadata. Returns storage ID.""" + pass + + @abstractmethod + def retrieve( + self, query: str, filters: Optional[Dict[str, Any]] = None + ) -> List[Any]: + """Retrieve values matching query and filters. + + Designed for vector database use cases where query is text to embed + for similarity search, and filters are metadata constraints. + Not suitable for spatial databases which require concrete query methods + (e.g., query_by_location, query_by_region). + """ + pass + + @abstractmethod + def delete(self, key: str) -> bool: + """Delete a stored value. Returns success status.""" + pass diff --git a/src/rai_semap/rai_semap/core/semantic_map_memory.py b/src/rai_semap/rai_semap/core/semantic_map_memory.py new file mode 100644 index 000000000..e6e2936c7 --- /dev/null +++ b/src/rai_semap/rai_semap/core/semantic_map_memory.py @@ -0,0 +1,397 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid +from typing import Any, Dict, List, Optional, Tuple + +from pydantic import BaseModel, ConfigDict, Field, field_validator +from rai.types import Point + +from rai_semap.core.backend.spatial_db_backend import SpatialDBBackend +from rai_semap.core.base_memory import BaseMemory + +# Type alias for Pose - accepts both rai.types.Pose (Pydantic model) and geometry_msgs.msg.Pose (ROS2 message) +# With arbitrary_types_allowed=True, Pydantic accepts ROS2 messages even though type annotation is rai.types.Pose +# ROS2 messages are required because ROS2 transform functions (tf2_geometry_msgs) require and return ROS2 message types +PoseType = Any + + +class SemanticAnnotation(BaseModel): + """Spatial-semantic annotation data model.""" + + # Allow ROS2 message types (e.g., Pose) that Pydantic doesn't validate natively. + # Other fields are still validated; ROS2 types are validated by ROS2. + model_config = ConfigDict(arbitrary_types_allowed=True) + + id: str + object_class: str + pose: PoseType + confidence: float = Field( + ge=0.0, le=1.0, description="Confidence score between 0 and 1" + ) + timestamp: float = Field(description="Unix timestamp in seconds (timezone-naive)") + detection_source: str + source_frame: str + location_id: str + vision_detection_id: Optional[str] = Field( + default=None, description="ID from vision pipeline, mostly for debugging" + ) + metadata: Dict[str, Any] = Field(default_factory=dict) + + @field_validator("confidence") + @classmethod + def validate_confidence(cls, v: float) -> float: + """Validate confidence is in valid range.""" + if not 0.0 <= v <= 1.0: + raise ValueError(f"confidence must be between 0 and 1, got {v}") + return v + + @field_validator("metadata", mode="before") + @classmethod + def normalize_metadata(cls, v: Any) -> Dict[str, Any]: + """Convert None metadata to empty dict.""" + if v is None: + return {} + return v + + +class MapMetadata(BaseModel): + """Metadata structure for a SLAM map (one per location, not per annotation). + + Tracks properties of the underlying SLAM map and semantic annotation activity. + """ + + # Allow ROS2 message types (e.g., Pose) that Pydantic doesn't validate natively. + # Other fields are still validated; ROS2 types are validated by ROS2. + model_config = ConfigDict(arbitrary_types_allowed=True) + + location_id: str = Field( + description="Identifier for the physical location (e.g., 'warehouse_a', 'warehouse_b')" + ) + map_frame_id: str = Field(description="Frame ID of the SLAM map") + resolution: float = Field( + default=0.05, + gt=0.0, + description="OccupancyGrid resolution (meters/pixel) from SLAM map configuration", + ) + origin: Optional[PoseType] = Field( + default=None, + description="Optional map origin pose (rai.types.Pose or geometry_msgs.msg.Pose). Only needed for coordinate transformations between map frame and world frame. Not required for semantic annotations that are already stored in map frame.", + ) + last_updated: Optional[float] = Field( + default=None, + description="Optional Unix timestamp (seconds) of last semantic annotation update to this map", + ) + + @field_validator("resolution") + @classmethod + def validate_resolution(cls, v: float) -> float: + """Validate resolution is positive.""" + if v <= 0.0: + raise ValueError(f"resolution must be positive, got {v}") + return v + + +class SemanticMapMemory(BaseMemory): + """Spatial-semantic memory for storing and querying object annotations.""" + + def __init__( + self, + backend: SpatialDBBackend, + location_id: str, + map_frame_id: str = "map", + resolution: float = 0.05, + ): + self.backend = backend + self.location_id = location_id + self.map_frame_id = map_frame_id + self.resolution = resolution + + def store( + self, key: str, value: Any, metadata: Optional[Dict[str, Any]] = None + ) -> str: + """Store a value with optional metadata. Returns storage ID. + + Uses temporal consistency: if nearby annotation of same class exists, + updates it; otherwise inserts new. The key parameter is ignored + (annotation ID is determined by temporal consistency). + + Args: + key: Ignored (kept for BaseMemory interface compatibility) + value: Dict containing required fields including 'object_class' + metadata: Optional additional metadata + """ + if not isinstance(value, dict): + raise TypeError(f"value must be a dict, got {type(value).__name__}") + + required_fields = [ + "object_class", + "pose", + "confidence", + "timestamp", + "detection_source", + "source_frame", + "location_id", + ] + missing = [field for field in required_fields if field not in value] + if missing: + raise ValueError(f"Missing required fields in value: {missing}") + + return self.store_or_update_annotation( + object_class=value["object_class"], + pose=value["pose"], + confidence=value["confidence"], + timestamp=value["timestamp"], + detection_source=value["detection_source"], + source_frame=value["source_frame"], + location_id=value["location_id"], + vision_detection_id=value.get("vision_detection_id"), + metadata=metadata, + ) + + def retrieve( + self, query: str, filters: Optional[Dict[str, Any]] = None + ) -> List[Any]: + """Retrieve values matching query and filters. + + Not implemented. Use concrete query methods instead: + - query_by_class: Query by object class + - query_by_location: Query by center point and radius + - query_by_region: Query by bounding box + """ + raise NotImplementedError( + "Use concrete query methods: query_by_class, query_by_location, or query_by_region" + ) + + def delete(self, key: str) -> bool: + """Delete a stored value by annotation ID. Returns success status.""" + return self.backend.delete_annotation(key) + + def delete_all_annotations(self, location_id: Optional[str] = None) -> int: + """Delete all annotations, optionally filtered by location_id. + + Args: + location_id: If provided, only delete annotations for this location. + If None, delete all annotations. If not provided and + self.location_id is set, defaults to self.location_id. + + Returns: + Number of annotations deleted. + """ + if location_id is None: + location_id = self.location_id + return self.backend.delete_all_annotations(location_id) + + def store_annotation( + self, + object_class: str, + pose: PoseType, + confidence: float, + timestamp: float, + detection_source: str, + source_frame: str, + location_id: str, + vision_detection_id: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + annotation_id: Optional[str] = None, + ) -> str: + """Store a semantic annotation. Returns annotation ID.""" + if annotation_id is None: + annotation_id = str(uuid.uuid4()) + annotation = SemanticAnnotation( + id=annotation_id, + object_class=object_class, + pose=pose, + confidence=confidence, + timestamp=timestamp, + detection_source=detection_source, + source_frame=source_frame, + location_id=location_id, + vision_detection_id=vision_detection_id, + metadata=metadata, + ) + return self.backend.insert_annotation(annotation) + + def update_annotation(self, annotation: SemanticAnnotation) -> bool: + """Update an existing annotation by ID. Returns success status.""" + return self.backend.update_annotation(annotation) + + def store_or_update_annotation( + self, + object_class: str, + pose: PoseType, + confidence: float, + timestamp: float, + detection_source: str, + source_frame: str, + location_id: str, + vision_detection_id: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + merge_threshold: float = 0.5, + ) -> str: + """Store or update annotation with temporal consistency. + + Queries nearby annotations of the same class and location. If found within merge_threshold, + updates existing annotation. Otherwise, inserts new annotation. + + Args: + location_id: Identifier for the physical location + merge_threshold: Distance threshold (meters) for merging duplicate detections + Other args: Same as store_annotation + + Returns: + Annotation ID (existing if updated, new if inserted) + """ + center = Point(x=pose.position.x, y=pose.position.y, z=pose.position.z) + nearby = self.query_by_location( + center, + radius=merge_threshold, + object_class=object_class, + location_id=location_id, + ) + + if nearby: + # Update first match (closest) + existing = nearby[0] + updated = SemanticAnnotation( + id=existing.id, + object_class=object_class, + pose=pose, + confidence=max( + existing.confidence, confidence + ), # Keep higher confidence + timestamp=timestamp, # Update to latest timestamp + detection_source=detection_source, + source_frame=source_frame, + location_id=location_id, + vision_detection_id=vision_detection_id, + metadata=metadata or existing.metadata, + ) + self.backend.update_annotation(updated) + return existing.id + else: + # Insert new + return self.store_annotation( + object_class=object_class, + pose=pose, + confidence=confidence, + timestamp=timestamp, + detection_source=detection_source, + source_frame=source_frame, + location_id=location_id, + vision_detection_id=vision_detection_id, + metadata=metadata, + ) + + def query_by_class( + self, + object_class: str, + confidence_threshold: float = 0.5, + limit: Optional[int] = None, + location_id: Optional[str] = None, + ) -> List[SemanticAnnotation]: + """Query annotations by object class.""" + filters = { + "object_class": object_class, + "confidence_threshold": confidence_threshold, + "location_id": location_id or self.location_id, + } + center = Point(x=0.0, y=0.0, z=0.0) + results = self.backend.spatial_query(center, radius=1e10, filters=filters) + if limit is not None: + results = results[:limit] + return results + + def query_by_location( + self, + center: Point, + radius: float, + object_class: Optional[str] = None, + location_id: Optional[str] = None, + ) -> List[SemanticAnnotation]: + """Query annotations within radius of center point.""" + filters = {"location_id": location_id or self.location_id} + if object_class: + filters["object_class"] = object_class + return self.backend.spatial_query(center, radius, filters=filters) + + def query_by_region( + self, + bbox: Tuple[float, float, float, float], # (min_x, min_y, max_x, max_y) + object_class: Optional[str] = None, + location_id: Optional[str] = None, + ) -> List[SemanticAnnotation]: + """Query annotations within bounding box region.""" + min_x, min_y, max_x, max_y = bbox + center_x = (min_x + max_x) / 2.0 + center_y = (min_y + max_y) / 2.0 + radius = max(max_x - min_x, max_y - min_y) / 2.0 + + center = Point(x=center_x, y=center_y, z=0.0) + filters = {"location_id": location_id or self.location_id} + if object_class: + filters["object_class"] = object_class + + results = self.backend.spatial_query(center, radius, filters=filters) + + filtered_results = [] + for annotation in results: + x = annotation.pose.position.x + y = annotation.pose.position.y + if min_x <= x <= max_x and min_y <= y <= max_y: + filtered_results.append(annotation) + + return filtered_results + + def get_map_metadata(self) -> MapMetadata: + """Get metadata for the current SLAM map. + + Returns one MapMetadata instance per location (not per annotation). + Computes last_updated from the most recent annotation timestamp for this location. + map_frame_id and resolution come from instance configuration. + """ + # Get most recent annotation timestamp for this location + center = Point(x=0.0, y=0.0, z=0.0) + filters = {"location_id": self.location_id} + all_annotations = self.backend.spatial_query( + center, radius=1e10, filters=filters + ) + + last_updated = None + if all_annotations: + timestamps = [ + ann.timestamp for ann in all_annotations if ann.timestamp is not None + ] + if timestamps: + last_updated = max(timestamps) + + return MapMetadata( + location_id=self.location_id, + map_frame_id=self.map_frame_id, + resolution=self.resolution, + origin=None, + last_updated=last_updated, + ) + + def get_seen_object_classes(self, location_id: Optional[str] = None) -> List[str]: + """Get list of distinct object classes seen in a location. + + Args: + location_id: If provided, only return classes for this location. + If None, use the instance's location_id. + + Returns: + List of unique object class names, sorted alphabetically. + """ + pass diff --git a/src/rai_semap/rai_semap/ros2/__init__.py b/src/rai_semap/rai_semap/ros2/__init__.py new file mode 100644 index 000000000..c8b4abf7f --- /dev/null +++ b/src/rai_semap/rai_semap/ros2/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/rai_semap/rai_semap/ros2/config/detection_publisher.yaml b/src/rai_semap/rai_semap/ros2/config/detection_publisher.yaml new file mode 100644 index 000000000..cb5f9d62d --- /dev/null +++ b/src/rai_semap/rai_semap/ros2/config/detection_publisher.yaml @@ -0,0 +1,14 @@ +detection_publisher: + ros__parameters: + camera_topic: "/camera/camera/color/image_raw" + detection_topic: "/detection_array" + dino_service: "/grounding_dino_classify" + # Format: "class1:threshold1, class2, class3:threshold3" + # Classes without explicit thresholds use default_class_threshold + # Example: "person:0.5, cup, bottle:0.4, box" (person uses 0.5, cup uses default, bottle uses 0.4, box uses default) + detection_classes: | + person, cup, bottle, box, bag, chair, table, shelf, door, window, couch, sofa, bed, stove + default_class_threshold: 0.35 + detection_interval: 2.0 + box_threshold: 0.3 + text_threshold: 0.25 diff --git a/src/rai_semap/rai_semap/ros2/config/node.yaml b/src/rai_semap/rai_semap/ros2/config/node.yaml new file mode 100644 index 000000000..fec4fd60f --- /dev/null +++ b/src/rai_semap/rai_semap/ros2/config/node.yaml @@ -0,0 +1,34 @@ +rai_semap_node: + ros__parameters: + # Storage configuration + storage: + backend_type: "sqlite" # Database backend: 'sqlite' (default) or 'postgres' (future) + database_path: "semantic_map.db" + location_id: "default_location" + + # Detection filtering configuration + detection_filtering: + confidence_threshold: 0.5 + class_confidence_thresholds: "" + # Minimum bounding box area in pixels^2 to filter small false positives + min_bbox_area: 100.0 + + # Deduplication configuration + deduplication: + class_merge_thresholds: "" + use_pointcloud_dedup: true + + # Topic configuration + topics: + detection_topic: "/detection_array" + map_topic: "/map" + # Set these to your depth and camera info topics for point cloud extraction + # Leave empty if point cloud deduplication is not needed + depth_topic: "/camera/camera/depth/image_rect_raw" + camera_info_topic: "/camera/camera/color/camera_info" + + # Map/SLAM configuration + map: + map_frame_id: "map" + # OccupancyGrid resolution in meters/pixel (0.05 = 5 cm per pixel) + map_resolution: 0.05 diff --git a/src/rai_semap/rai_semap/ros2/config/perception_utils.yaml b/src/rai_semap/rai_semap/ros2/config/perception_utils.yaml new file mode 100644 index 000000000..648ae0526 --- /dev/null +++ b/src/rai_semap/rai_semap/ros2/config/perception_utils.yaml @@ -0,0 +1,7 @@ +perception_utils: + ros__parameters: + # Set these to your depth and camera info topics for 3D pose computation + # Leave empty if 3D poses are not needed + depth_topic: "/camera/camera/depth/image_rect_raw" + camera_info_topic: "/camera/camera/color/camera_info" + depth_fallback_region_size: 5 diff --git a/src/rai_semap/rai_semap/ros2/config/visualizer.yaml b/src/rai_semap/rai_semap/ros2/config/visualizer.yaml new file mode 100644 index 000000000..4dc01e992 --- /dev/null +++ b/src/rai_semap/rai_semap/ros2/config/visualizer.yaml @@ -0,0 +1,11 @@ +# Format: [r, g, b, a] where r, g, b are in range [0.0, 1.0] and a is alpha [0.0, 1.0] +# Default color +default_color: [0.5, 0.5, 0.5, 0.8] + +# Color mapping for specific object classes +class_colors: + bed: [0.2, 0.4, 0.8, 0.8] + chair: [0.2, 0.8, 0.4, 0.8] + door: [0.8, 0.6, 0.2, 0.8] + shelf: [0.8, 0.2, 0.6, 0.8] + table: [0.6, 0.2, 0.8, 0.8] diff --git a/src/rai_semap/rai_semap/ros2/detection_publisher.py b/src/rai_semap/rai_semap/ros2/detection_publisher.py new file mode 100644 index 000000000..ccbb75a91 --- /dev/null +++ b/src/rai_semap/rai_semap/ros2/detection_publisher.py @@ -0,0 +1,583 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import rclpy +import yaml +from cv_bridge import CvBridge +from rai.communication.ros2 import ROS2Connector +from rcl_interfaces.msg import ParameterDescriptor, ParameterType +from rclpy.qos import qos_profile_sensor_data +from sensor_msgs.msg import CameraInfo, Image + +from rai_interfaces.msg import RAIDetectionArray +from rai_interfaces.srv import RAIGroundingDino +from rai_semap.ros2.perception_utils import enhance_detection_with_3d_pose + + +# NOTE: This module contains perception layer logic that may belong to rai_perception. +# It performs object detection and 3D pose computation, which are general perception +# tasks not specific to semantic mapping. Consider moving to rai_perception when +# that package has ROS2 node infrastructure in place. +class DetectionPublisher: + """ROS2 node that subscribes to camera images, calls DINO service, and publishes detections. + + Design Notes: + - Uses ROS2Connector for node lifecycle and infrastructure + - Uses connector.node.* directly for raw ROS2 messages and QoS control + - Uses connector.node.create_client() for async service calls (non-blocking) + """ + + def __init__(self, connector: ROS2Connector): + """Initialize DetectionPublisher. + + Args: + connector: ROS2Connector instance for ROS2 communication. + """ + self.connector = connector + self._initialize_parameters() + self.bridge = CvBridge() + self._initialize_clients() + self._initialize_subscriptions() + self._initialize_publishers() + self.last_image: Optional[Image] = None + self.last_depth_image: Optional[Image] = None + self.last_camera_info: Optional[CameraInfo] = None + self.last_detection_time = 0.0 + self.last_log_time = 0.0 + self.log_interval = 5.0 # Log summary every 5 seconds + + def _initialize_parameters(self): + """Initialize ROS2 parameters from YAML files.""" + # Get directory containing this file + current_dir = Path(__file__).parent + config_dir = current_dir / "config" + + # Declare config file path parameters first + config_params = [ + ( + "detection_publisher_config", + "", + ParameterType.PARAMETER_STRING, + "Path to detection_publisher YAML config file (empty = use default in config/)", + ), + ( + "perception_utils_config", + "", + ParameterType.PARAMETER_STRING, + "Path to perception_utils YAML config file (empty = use default in config/)", + ), + ] + for name, default, param_type, description in config_params: + self.connector.node.declare_parameter( + name, + default, + descriptor=ParameterDescriptor( + type=param_type, description=description + ), + ) + + # Get config file paths + detection_pub_config_path = ( + self.connector.node.get_parameter("detection_publisher_config") + .get_parameter_value() + .string_value + ) + perception_utils_config_path = ( + self.connector.node.get_parameter("perception_utils_config") + .get_parameter_value() + .string_value + ) + + # Load detection_publisher parameters + if detection_pub_config_path: + detection_pub_yaml = Path(detection_pub_config_path) + else: + detection_pub_yaml = config_dir / "detection_publisher.yaml" + + with open(detection_pub_yaml, "r") as f: + detection_pub_config = yaml.safe_load(f) + detection_pub_params = detection_pub_config.get("detection_publisher", {}).get( + "ros__parameters", {} + ) + + # Load perception_utils parameters + if perception_utils_config_path: + perception_utils_yaml = Path(perception_utils_config_path) + else: + perception_utils_yaml = config_dir / "perception_utils.yaml" + + with open(perception_utils_yaml, "r") as f: + perception_utils_config = yaml.safe_load(f) + perception_utils_params = perception_utils_config.get( + "perception_utils", {} + ).get("ros__parameters", {}) + + # Declare detection_publisher parameters + parameters = [ + ( + "camera_topic", + detection_pub_params.get( + "camera_topic", "/camera/camera/color/image_raw" + ), + ParameterType.PARAMETER_STRING, + "Camera image topic to subscribe to", + ), + ( + "detection_topic", + detection_pub_params.get("detection_topic", "/detection_array"), + ParameterType.PARAMETER_STRING, + "Topic to publish RAIDetectionArray messages", + ), + ( + "dino_service", + detection_pub_params.get("dino_service", "/grounding_dino_classify"), + ParameterType.PARAMETER_STRING, + "GroundingDINO service name", + ), + ( + "detection_classes", + detection_pub_params.get( + "detection_classes", + "person, cup, bottle, box, bag, chair, table, shelf, door, window, couch, sofa, bed", + ), + ParameterType.PARAMETER_STRING, + "Comma-separated list of object classes to detect. Format: 'class1:threshold1, class2, class3:threshold3' where classes without thresholds use default_class_threshold", + ), + ( + "default_class_threshold", + detection_pub_params.get("default_class_threshold", 0.3), + ParameterType.PARAMETER_DOUBLE, + "Default box threshold for classes without explicit threshold in detection_classes", + ), + ( + "detection_interval", + detection_pub_params.get("detection_interval", 2.0), + ParameterType.PARAMETER_DOUBLE, + "Minimum time between detections (seconds)", + ), + ( + "box_threshold", + detection_pub_params.get("box_threshold", 0.3), + ParameterType.PARAMETER_DOUBLE, + "DINO box threshold (used as minimum for DINO call to get all detections)", + ), + ( + "text_threshold", + detection_pub_params.get("text_threshold", 0.25), + ParameterType.PARAMETER_DOUBLE, + "DINO text threshold", + ), + ] + + for name, default, param_type, description in parameters: + self.connector.node.declare_parameter( + name, + default, + descriptor=ParameterDescriptor( + type=param_type, + description=description, + ), + ) + + # Declare perception_utils parameters + perception_params = [ + ( + "depth_topic", + perception_utils_params.get("depth_topic", ""), + ParameterType.PARAMETER_STRING, + "Depth image topic (optional, for 3D pose computation)", + ), + ( + "camera_info_topic", + perception_utils_params.get("camera_info_topic", ""), + ParameterType.PARAMETER_STRING, + "Camera info topic (optional, for 3D pose computation)", + ), + ( + "depth_fallback_region_size", + perception_utils_params.get("depth_fallback_region_size", 5), + ParameterType.PARAMETER_INTEGER, + "Region size for depth fallback when center pixel has no depth", + ), + ] + + for name, default, param_type, description in perception_params: + self.connector.node.declare_parameter( + name, + default, + descriptor=ParameterDescriptor( + type=param_type, + description=description, + ), + ) + + def _get_string_parameter(self, name: str) -> str: + """Get string parameter value.""" + return ( + self.connector.node.get_parameter(name).get_parameter_value().string_value + ) + + def _get_double_parameter(self, name: str) -> float: + return ( + self.connector.node.get_parameter(name).get_parameter_value().double_value + ) + + def _get_integer_parameter(self, name: str) -> int: + return ( + self.connector.node.get_parameter(name).get_parameter_value().integer_value + ) + + def _parse_detection_classes( + self, detection_classes_str: str + ) -> Tuple[List[str], Dict[str, float]]: + """Parse detection_classes string to extract class names and per-class thresholds. + + Format: "class1:threshold1, class2, class3:threshold3" + Classes without explicit thresholds use default_class_threshold. + + Returns: + Tuple of (class_names_list, class_thresholds_dict) + """ + default_threshold = self._get_double_parameter("default_class_threshold") + class_names = [] + class_thresholds = {} + + for item in detection_classes_str.split(","): + item = item.strip() + if not item: + continue + + if ":" in item: + class_name, threshold_str = item.split(":", 1) + class_name = class_name.strip() + try: + threshold = float(threshold_str.strip()) + class_names.append(class_name) + class_thresholds[class_name] = threshold + except ValueError: + self.connector.node.get_logger().warning( + f"Invalid threshold value in '{item}', using default" + ) + class_names.append(class_name) + class_thresholds[class_name] = default_threshold + else: + class_name = item.strip() + class_names.append(class_name) + class_thresholds[class_name] = default_threshold + + return class_names, class_thresholds + + def _initialize_clients(self): + """Initialize service clients. + + Note: We use self.connector.node.create_client() directly instead of ROS2Connector's + service_call() because we need async service calls (call_async) for non-blocking + detection processing. The connector's service_call() is synchronous and designed + for the connector's message wrapper system. + """ + dino_service = self._get_string_parameter("dino_service") + self.dino_client = self.connector.node.create_client( + RAIGroundingDino, dino_service + ) + self.connector.node.get_logger().info( + f"Waiting for DINO service: {dino_service}" + ) + # Use short timeout - _process_image() will check again before actual use + if not self.dino_client.wait_for_service(timeout_sec=0.1): + self.connector.node.get_logger().warning( + f"DINO service not available: {dino_service}" + ) + else: + self.connector.node.get_logger().info(f"DINO service ready: {dino_service}") + + def _initialize_subscriptions(self): + """Initialize ROS2 subscriptions.""" + camera_topic = self._get_string_parameter("camera_topic") + self.image_subscription = self.connector.node.create_subscription( + Image, camera_topic, self.image_callback, qos_profile_sensor_data + ) + self.connector.node.get_logger().info( + f"Subscribed to camera topic: {camera_topic} " + f"(QoS: {qos_profile_sensor_data.reliability.name})" + ) + + # Optional depth and camera info subscriptions for 3D pose computation + depth_topic = self._get_string_parameter("depth_topic") + camera_info_topic = self._get_string_parameter("camera_info_topic") + + if depth_topic: + self.depth_subscription = self.connector.node.create_subscription( + Image, depth_topic, self.depth_callback, qos_profile_sensor_data + ) + self.connector.node.get_logger().info( + f"Subscribed to depth topic: {depth_topic}" + ) + else: + self.depth_subscription = None + self.connector.node.get_logger().info( + "No depth topic provided, 3D poses will not be computed" + ) + + if camera_info_topic: + self.camera_info_subscription = self.connector.node.create_subscription( + CameraInfo, + camera_info_topic, + self.camera_info_callback, + qos_profile_sensor_data, + ) + self.connector.node.get_logger().info( + f"Subscribed to camera info topic: {camera_info_topic}" + ) + else: + self.camera_info_subscription = None + self.connector.node.get_logger().info( + "No camera info topic provided, 3D poses will not be computed" + ) + + def _initialize_publishers(self): + """Initialize ROS2 publishers.""" + detection_topic = self._get_string_parameter("detection_topic") + self.detection_publisher = self.connector.node.create_publisher( + RAIDetectionArray, detection_topic, qos_profile_sensor_data + ) + self.connector.node.get_logger().info( + f"Publishing to detection topic: {detection_topic} " + f"(QoS: reliability={qos_profile_sensor_data.reliability.name})" + ) + + def depth_callback(self, msg: Image): + """Store latest depth image.""" + self.last_depth_image = msg + + def camera_info_callback(self, msg: CameraInfo): + """Store latest camera info.""" + self.last_camera_info = msg + + def image_callback(self, msg: Image): + """Process incoming camera image.""" + self.connector.node.get_logger().debug( + f"Received camera image (stamp: {msg.header.stamp.sec}.{msg.header.stamp.nanosec:09d}, " + f"frame_id: {msg.header.frame_id})" + ) + current_time = time.time() + detection_interval = self._get_double_parameter("detection_interval") + + # Throttle detections + if current_time - self.last_detection_time < detection_interval: + time_since_last = current_time - self.last_detection_time + self.connector.node.get_logger().debug( + f"Throttling: {time_since_last:.2f}s since last detection (interval: {detection_interval}s)" + ) + return + + self.last_image = msg + self.connector.node.get_logger().debug("Processing camera image...") + self._process_image(msg) + + def _process_image(self, image_msg: Image): + """Call DINO service and publish detections.""" + if not self.dino_client.wait_for_service(timeout_sec=0.1): + self.connector.node.get_logger().warning( + "DINO service not ready, skipping detection" + ) + return + + detection_classes_str = self._get_string_parameter("detection_classes") + class_names, class_thresholds = self._parse_detection_classes( + detection_classes_str + ) + + # Use minimum threshold for DINO call to ensure we get all relevant detections + # Results will be filtered by per-class thresholds in _handle_dino_response + min_threshold = ( + min(class_thresholds.values()) + if class_thresholds + else self._get_double_parameter("default_class_threshold") + ) + box_threshold = min(self._get_double_parameter("box_threshold"), min_threshold) + text_threshold = self._get_double_parameter("text_threshold") + + # Store class_thresholds for filtering in response handler + self._current_class_thresholds = class_thresholds + + request = RAIGroundingDino.Request() + request.source_img = image_msg + request.classes = ", ".join(class_names) + request.box_threshold = box_threshold + request.text_threshold = text_threshold + + self.connector.node.get_logger().debug( + f"Calling DINO service with {len(class_names)} classes (box_threshold={box_threshold:.3f})" + ) + + future = self.dino_client.call_async(request) + future.add_done_callback( + lambda f: self._handle_dino_response(f, image_msg.header) + ) + + def _handle_dino_response(self, future, image_header): + """Handle DINO service response.""" + try: + response = future.result() + if response is None: + self.connector.node.get_logger().warning("DINO service returned None") + return + + # Get class thresholds for filtering (set in _process_image) + class_thresholds = getattr(self, "_current_class_thresholds", {}) + + # Filter detections by per-class thresholds + filtered_detections = [] + for det in response.detections.detections: + if det.results and len(det.results) > 0: + result = det.results[0] + class_id = result.hypothesis.class_id + score = result.hypothesis.score + + # Get threshold for this class (use default if not found) + threshold = class_thresholds.get( + class_id, self._get_double_parameter("default_class_threshold") + ) + + if score >= threshold: + filtered_detections.append(det) + else: + self.connector.node.get_logger().debug( + f"Filtered out {class_id} detection with score {score:.3f} " + f"(threshold: {threshold:.3f})" + ) + else: + # Keep detections without results (shouldn't happen, but be safe) + filtered_detections.append(det) + + # Create RAIDetectionArray message + detection_array = RAIDetectionArray() + detection_array.header = image_header + detection_array.header.frame_id = image_header.frame_id + detection_array.detections = filtered_detections + detection_array.detection_classes = response.detections.detection_classes + + # Ensure each detection has the correct frame_id and enhance with 3D poses + for det in detection_array.detections: + if not det.header.frame_id: + det.header.frame_id = image_header.frame_id + det.header.stamp = image_header.stamp + + # Enhance detection with 3D pose if pose is empty + region_size = self._get_integer_parameter("depth_fallback_region_size") + if enhance_detection_with_3d_pose( + det, + self.last_depth_image, + self.last_camera_info, + self.bridge, + region_size, + ): + if det.results and len(det.results) > 0: + result = det.results[0] + computed_pose = result.pose.pose + self.connector.node.get_logger().debug( + f"Computed 3D pose for {result.hypothesis.class_id}: " + f"({computed_pose.position.x:.3f}, {computed_pose.position.y:.3f}, " + f"{computed_pose.position.z:.3f})" + ) + elif det.results and len(det.results) > 0: + result = det.results[0] + pose = result.pose.pose + if ( + pose.position.x == 0.0 + and pose.position.y == 0.0 + and pose.position.z == 0.0 + ): + self.connector.node.get_logger().debug( + f"Could not compute 3D pose for {result.hypothesis.class_id} " + f"(depth or camera info not available)" + ) + + # Log detection details for debugging + detection_count = len(detection_array.detections) + current_time = time.time() + should_log = current_time - self.last_log_time >= self.log_interval + + if detection_count > 0: + # Log individual detections only at DEBUG level + for i, det in enumerate(detection_array.detections): + results_count = len(det.results) if det.results else 0 + if results_count > 0: + result = det.results[0] + self.connector.node.get_logger().debug( + f"Detection {i}: class={result.hypothesis.class_id}, " + f"score={result.hypothesis.score:.3f}" + ) + else: + self.connector.node.get_logger().warning( + f"Detection {i} has no results! frame_id={det.header.frame_id}" + ) + + # Throttled summary log + if should_log: + classes_found = [ + det.results[0].hypothesis.class_id + for det in detection_array.detections + if det.results and len(det.results) > 0 + ] + self.connector.node.get_logger().info( + f"Published {detection_count} detections: {', '.join(set(classes_found))}" + ) + self.last_log_time = current_time + else: + if should_log: + self.connector.node.get_logger().debug( + "No detections found in image" + ) + self.last_log_time = current_time + + # Publish detections + self.detection_publisher.publish(detection_array) + self.last_detection_time = time.time() + + except Exception as e: + self.connector.node.get_logger().error( + f"Error processing DINO response: {e}" + ) + + +def main(args=None): + """Main entry point for the detection publisher node.""" + rclpy.init(args=args) + connector = ROS2Connector( + node_name="detection_publisher", executor_type="multi_threaded" + ) + detection_publisher = DetectionPublisher(connector=connector) + detection_publisher.connector.node.get_logger().info("=" * 60) + detection_publisher.connector.node.get_logger().info( + "Detection Publisher Node Started" + ) + detection_publisher.connector.node.get_logger().info("=" * 60) + try: + rclpy.spin(detection_publisher.connector.node) + except KeyboardInterrupt: + detection_publisher.connector.node.get_logger().info( + "Shutting down detection publisher..." + ) + finally: + detection_publisher.connector.shutdown() + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/src/rai_semap/rai_semap/ros2/node.py b/src/rai_semap/rai_semap/ros2/node.py new file mode 100644 index 000000000..15ec4ce1f --- /dev/null +++ b/src/rai_semap/rai_semap/ros2/node.py @@ -0,0 +1,917 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from pathlib import Path +from typing import Optional, Tuple + +# ROS2 core +import rclpy +import yaml +from cv_bridge import CvBridge + +# ROS2 geometry and transforms +from geometry_msgs.msg import Point, Pose, PoseStamped +from nav_msgs.msg import OccupancyGrid +from rai.communication.ros2 import ROS2Connector +from rcl_interfaces.msg import ParameterDescriptor, ParameterType +from rclpy.qos import qos_profile_sensor_data +from sensor_msgs.msg import CameraInfo, Image +from tf2_geometry_msgs import do_transform_pose_stamped +from vision_msgs.msg import Detection2D + +# RAI interfaces +from rai_interfaces.msg import RAIDetectionArray + +# Local imports +from rai_semap.core.backend.sqlite_backend import SQLiteBackend +from rai_semap.core.semantic_map_memory import SemanticAnnotation, SemanticMapMemory +from rai_semap.ros2.perception_utils import extract_pointcloud_from_bbox +from rai_semap.utils.ros2_log import ROS2LogHandler + +# Constants +DEFAULT_QUEUE_SIZE = 10 +TF_LOOKUP_TIMEOUT_SEC = 1.0 + + +class SemanticMapNode: + """ROS2 node for semantic map processing.""" + + def __init__( + self, + connector: ROS2Connector, + database_path: Optional[str] = None, + ): + """Initialize SemanticMapNode. + + Args: + connector: ROS2Connector instance for ROS2 communication. + database_path: Optional path to SQLite database file. + """ + self.connector = connector + + # Configure Python logging to forward to ROS2 logger + # Configure all rai_semap loggers (including submodules) + handler = ROS2LogHandler(self.connector.node) + handler.setLevel(logging.DEBUG) + + # Configure root rai_semap logger + python_logger = logging.getLogger("rai_semap") + python_logger.setLevel(logging.DEBUG) + python_logger.handlers.clear() # Remove any existing handlers + python_logger.addHandler(handler) + python_logger.propagate = False # Prevent propagation to root logger + + # Also explicitly configure SQLite backend logger + sqlite_logger = logging.getLogger("rai_semap.core.backend.sqlite_backend") + sqlite_logger.setLevel(logging.DEBUG) + sqlite_logger.handlers.clear() + sqlite_logger.addHandler(handler) + sqlite_logger.propagate = False + + self._initialize_parameters() + if database_path is not None: + self.connector.node.set_parameters( + [ + rclpy.parameter.Parameter( + "database_path", + rclpy.parameter.Parameter.Type.STRING, + database_path, + ) + ] + ) + self._parse_class_thresholds() + self._parse_class_merge_thresholds() + self.bridge = CvBridge() + self.last_depth_image: Optional[Image] = None + self.last_camera_info: Optional[CameraInfo] = None + self._initialize_memory() + self._initialize_subscriptions() + + def _initialize_parameters(self): + """Initialize ROS2 parameters from YAML config file.""" + current_dir = Path(__file__).parent + config_dir = current_dir / "config" + + # Declare config file path parameter + self.connector.node.declare_parameter( + "node_config", + "", + descriptor=ParameterDescriptor( + type=ParameterType.PARAMETER_STRING, + description="Path to node YAML config file (empty = use default in config/)", + ), + ) + + # Get config file path + node_config_path = ( + self.connector.node.get_parameter("node_config") + .get_parameter_value() + .string_value + ) + if node_config_path: + node_yaml = Path(node_config_path) + else: + node_yaml = config_dir / "node.yaml" + + # Load YAML config + with open(node_yaml, "r") as f: + node_config = yaml.safe_load(f) + node_params = node_config.get("rai_semap_node", {}).get("ros__parameters", {}) + + # Extract grouped parameters with defaults + storage = node_params.get("storage", {}) + detection_filtering = node_params.get("detection_filtering", {}) + deduplication = node_params.get("deduplication", {}) + topics = node_params.get("topics", {}) + map_config = node_params.get("map", {}) + + # Declare all parameters with descriptions + parameters = [ + # Storage + ( + "backend_type", + storage.get("backend_type", "sqlite"), + ParameterType.PARAMETER_STRING, + "Database backend type: 'sqlite' (default) or 'postgres' (future)", + ), + ( + "database_path", + storage.get("database_path", "semantic_map.db"), + ParameterType.PARAMETER_STRING, + "Path to database file (SQLite) or connection string (PostgreSQL)", + ), + ( + "location_id", + storage.get("location_id", "default_location"), + ParameterType.PARAMETER_STRING, + "Identifier for the physical location", + ), + # Detection filtering + ( + "confidence_threshold", + detection_filtering.get("confidence_threshold", 0.5), + ParameterType.PARAMETER_DOUBLE, + "Minimum confidence score (0.0-1.0) for storing detections", + ), + ( + "class_confidence_thresholds", + detection_filtering.get("class_confidence_thresholds", ""), + ParameterType.PARAMETER_STRING, + "Class-specific thresholds as 'class1:threshold1,class2:threshold2' (e.g., 'person:0.7,window:0.6')", + ), + ( + "min_bbox_area", + detection_filtering.get("min_bbox_area", 100.0), + ParameterType.PARAMETER_DOUBLE, + "Minimum bounding box area (pixels^2) to filter small false positives", + ), + # Deduplication + ( + "class_merge_thresholds", + deduplication.get("class_merge_thresholds", ""), + ParameterType.PARAMETER_STRING, + "Class-specific merge radii (meters) for deduplication as 'class1:radius1,class2:radius2' (e.g., 'couch:2.5,table:1.5')", + ), + ( + "use_pointcloud_dedup", + deduplication.get("use_pointcloud_dedup", True), + ParameterType.PARAMETER_BOOL, + "Use point cloud features for improved deduplication matching", + ), + # Topics + ( + "detection_topic", + topics.get("detection_topic", "/detection_array"), + ParameterType.PARAMETER_STRING, + "Topic for RAIDetectionArray messages", + ), + ( + "map_topic", + topics.get("map_topic", "/map"), + ParameterType.PARAMETER_STRING, + "Topic for OccupancyGrid map messages", + ), + ( + "depth_topic", + topics.get("depth_topic", ""), + ParameterType.PARAMETER_STRING, + "Depth image topic (optional, for point cloud extraction)", + ), + ( + "camera_info_topic", + topics.get("camera_info_topic", ""), + ParameterType.PARAMETER_STRING, + "Camera info topic (optional, for point cloud extraction)", + ), + # Map/SLAM + ( + "map_frame_id", + map_config.get("map_frame_id", "map"), + ParameterType.PARAMETER_STRING, + "Frame ID of the SLAM map", + ), + ( + "map_resolution", + map_config.get("map_resolution", 0.05), + ParameterType.PARAMETER_DOUBLE, + "OccupancyGrid resolution (meters/pixel)", + ), + ] + + for name, default, param_type, description in parameters: + self.connector.node.declare_parameter( + name, + default, + descriptor=ParameterDescriptor( + type=param_type, + description=description, + ), + ) + + def _get_string_parameter(self, name: str) -> str: + """Get string parameter value.""" + return ( + self.connector.node.get_parameter(name).get_parameter_value().string_value + ) + + def _get_double_parameter(self, name: str) -> float: + """Get double parameter value.""" + return ( + self.connector.node.get_parameter(name).get_parameter_value().double_value + ) + + def _parse_class_thresholds(self): + """Parse class-specific confidence thresholds from parameter.""" + thresholds_str = self._get_string_parameter("class_confidence_thresholds") + self.class_thresholds = {} + if thresholds_str: + for item in thresholds_str.split(","): + item = item.strip() + if ":" in item: + class_name, threshold_str = item.split(":", 1) + try: + threshold = float(threshold_str.strip()) + self.class_thresholds[class_name.strip()] = threshold + self.connector.node.get_logger().info( + f"Class-specific threshold: {class_name.strip()}={threshold:.3f}" + ) + except ValueError: + self.connector.node.get_logger().warning( + f"Invalid threshold value in '{item}', skipping" + ) + + def _parse_class_merge_thresholds(self): + """Parse class-specific merge thresholds from parameter.""" + merge_thresholds_str = self._get_string_parameter("class_merge_thresholds") + self.class_merge_thresholds = {} + if merge_thresholds_str: + for item in merge_thresholds_str.split(","): + item = item.strip() + if ":" in item: + class_name, radius_str = item.split(":", 1) + try: + radius = float(radius_str.strip()) + self.class_merge_thresholds[class_name.strip()] = radius + self.connector.node.get_logger().info( + f"Class-specific merge radius: {class_name.strip()}={radius:.2f}m" + ) + except ValueError: + self.connector.node.get_logger().warning( + f"Invalid merge radius value in '{item}', skipping" + ) + + def _initialize_memory(self): + """Initialize semantic map memory backend.""" + backend_type = self._get_string_parameter("backend_type") + database_path = self._get_string_parameter("database_path") + location_id = self._get_string_parameter("location_id") + map_frame_id = self._get_string_parameter("map_frame_id") + map_resolution = self._get_double_parameter("map_resolution") + + if backend_type == "sqlite": + backend = SQLiteBackend(database_path) + elif backend_type == "postgres": + raise NotImplementedError( + "PostgreSQL backend not yet implemented. Use 'sqlite' for now." + ) + else: + raise ValueError( + f"Unknown backend_type: {backend_type}. Supported: 'sqlite'" + ) + + backend.init_schema() + self.memory = SemanticMapMemory( + backend=backend, + location_id=location_id, + map_frame_id=map_frame_id, + resolution=map_resolution, + ) + self.connector.node.get_logger().info( + f"Initialized semantic map memory: backend={backend_type}, " + f"location_id={location_id}, map_frame_id={map_frame_id}, " + f"database_path={database_path}" + ) + + def _initialize_subscriptions(self): + """Initialize ROS2 subscriptions.""" + detection_topic = self._get_string_parameter("detection_topic") + map_topic = self._get_string_parameter("map_topic") + + self.detection_subscription = self.connector.node.create_subscription( + RAIDetectionArray, + detection_topic, + self.detection_callback, + qos_profile_sensor_data, + ) + self.connector.node.get_logger().info( + f"Subscribed to detection topic: {detection_topic} " + f"(QoS: reliability={qos_profile_sensor_data.reliability.name})" + ) + self.map_subscription = self.connector.node.create_subscription( + OccupancyGrid, map_topic, self.map_callback, DEFAULT_QUEUE_SIZE + ) + + # Optional depth and camera info for point cloud extraction + depth_topic = self._get_string_parameter("depth_topic") + camera_info_topic = self._get_string_parameter("camera_info_topic") + use_pointcloud = ( + self.connector.node.get_parameter("use_pointcloud_dedup") + .get_parameter_value() + .bool_value + ) + + if use_pointcloud and depth_topic: + self.depth_subscription = self.connector.node.create_subscription( + Image, depth_topic, self.depth_callback, qos_profile_sensor_data + ) + self.connector.node.get_logger().info( + f"Subscribed to depth topic: {depth_topic}" + ) + else: + self.depth_subscription = None + + if use_pointcloud and camera_info_topic: + self.camera_info_subscription = self.connector.node.create_subscription( + CameraInfo, + camera_info_topic, + self.camera_info_callback, + qos_profile_sensor_data, + ) + self.connector.node.get_logger().info( + f"Subscribed to camera info topic: {camera_info_topic}" + ) + else: + self.camera_info_subscription = None + + self.connector.node.get_logger().info( + f"Subscribed to detection_topic={detection_topic}, map_topic={map_topic}" + ) + + def depth_callback(self, msg: Image): + """Store latest depth image.""" + self.last_depth_image = msg + + def camera_info_callback(self, msg: CameraInfo): + """Store latest camera info.""" + self.last_camera_info = msg + + def _extract_pointcloud_from_bbox( + self, detection, source_frame: str + ) -> Optional[Tuple[Point, float, int]]: + """Extract point cloud from bounding box region and compute features. + + Args: + detection: Detection2D message with bounding box + source_frame: Frame ID of the detection (unused, kept for compatibility) + + Returns: + Tuple of (centroid_3d, pointcloud_size, point_count) or None if extraction fails. + centroid_3d: 3D centroid of point cloud in source frame + pointcloud_size: Approximate 3D size (diagonal of bounding box in meters) + point_count: Number of valid 3D points + """ + if self.last_depth_image is None or self.last_camera_info is None: + return None + + result = extract_pointcloud_from_bbox( + detection, + self.last_depth_image, + self.last_camera_info, + self.bridge, + ) + + if result is None: + self.connector.node.get_logger().warning( + "Failed to extract point cloud from bbox" + ) + + return result + + def detection_callback(self, msg: RAIDetectionArray): + """Process detection array and store annotations.""" + self.connector.node.get_logger().debug("Entering detection_callback") + confidence_threshold = self._get_double_parameter("confidence_threshold") + map_frame_id = self._get_string_parameter("map_frame_id") + + self.connector.node.get_logger().info( + f"Received detection array with {len(msg.detections)} detections: {msg.detection_classes}, " + f"header.frame_id={msg.header.frame_id}, confidence_threshold={confidence_threshold}" + ) + + # Log details of each detection + for i, det in enumerate(msg.detections): + results_count = len(det.results) if det.results else 0 + if results_count > 0: + result = det.results[0] + self.connector.node.get_logger().debug( + f" Detection {i}: class={result.hypothesis.class_id}, " + f"score={result.hypothesis.score:.3f}, " + f"frame_id={det.header.frame_id}" + ) + else: + self.connector.node.get_logger().warning( + f" Detection {i} has no results!" + ) + + timestamp_ros = rclpy.time.Time.from_msg(msg.header.stamp) + timestamp = ( + timestamp_ros.nanoseconds / 1e9 + ) # Convert to Unix timestamp (seconds) + detection_source = msg.header.frame_id or "unknown" + + stored_count = 0 + skipped_count = 0 + default_frame_id = msg.header.frame_id + self.connector.node.get_logger().debug( + f"Processing {len(msg.detections)} detections from source={detection_source}, " + f"confidence_threshold={confidence_threshold}" + ) + for detection in msg.detections: + if self._process_detection( + detection, + confidence_threshold, + map_frame_id, + timestamp, + detection_source, + default_frame_id, + ): + stored_count += 1 + else: + skipped_count += 1 + + if stored_count > 0: + self.connector.node.get_logger().info( + f"Stored {stored_count} annotations, skipped {skipped_count} (low confidence or transform failed)" + ) + elif len(msg.detections) > 0: + self.connector.node.get_logger().warning( + f"Received {len(msg.detections)} detections but none were stored " + f"(confidence threshold: {confidence_threshold})" + ) + + def _process_detection( + self, + detection: Detection2D, + confidence_threshold: float, + map_frame_id: str, + timestamp: float, + detection_source: str, + default_frame_id: str, + ) -> bool: + """Process a single detection and store annotation if valid. + + Returns: + True if annotation was stored, False otherwise. + """ + self.connector.node.get_logger().debug( + f"Entering _process_detection: source={detection_source}" + ) + + # Validate detection and extract basic data + validation_result = self._validate_and_extract_detection_data( + detection, confidence_threshold, default_frame_id + ) + if not validation_result: + return False + + object_class, confidence, source_frame, pose_in_source_frame = validation_result + + # Validate and transform pose to map frame + pose_in_map_frame = self._validate_and_transform_pose( + pose_in_source_frame, source_frame, map_frame_id, object_class, detection + ) + if pose_in_map_frame is None: + return False + + # Extract point cloud features if enabled + pointcloud_features, pointcloud_centroid_map, pc_size = ( + self._extract_pointcloud_features(detection, source_frame, map_frame_id) + ) + + # Store or update annotation + vision_detection_id = detection.id if hasattr(detection, "id") else None + return self._store_or_update_annotation( + object_class=object_class, + confidence=confidence, + pose_in_map_frame=pose_in_map_frame, + pointcloud_centroid_map=pointcloud_centroid_map, + pointcloud_features=pointcloud_features, + pc_size=pc_size, + timestamp=timestamp, + detection_source=detection_source, + source_frame=source_frame, + vision_detection_id=vision_detection_id, + ) + + def _validate_and_extract_detection_data( + self, detection: Detection2D, confidence_threshold: float, default_frame_id: str + ) -> Optional[Tuple[str, float, str, Pose]]: + """Validate detection and extract basic data. + + Returns: + Tuple of (object_class, confidence, source_frame, pose_in_source_frame) if valid, + None otherwise. + """ + if not detection.results: + self.connector.node.get_logger().debug("Detection has no results, skipping") + return None + + result = detection.results[0] + confidence = result.hypothesis.score + object_class = result.hypothesis.class_id + + # Check bounding box size + min_bbox_area = self._get_double_parameter("min_bbox_area") + bbox_area = detection.bbox.size_x * detection.bbox.size_y + + if bbox_area < min_bbox_area: + self.connector.node.get_logger().debug( + f"Bounding box too small: area={bbox_area:.1f} < {min_bbox_area:.1f} pixels^2, " + f"skipping {object_class}" + ) + return None + + # Use class-specific threshold if available + effective_threshold = self.class_thresholds.get( + object_class, confidence_threshold + ) + + self.connector.node.get_logger().info( + f"Processing detection: class={object_class}, confidence={confidence:.3f}, " + f"threshold={effective_threshold:.3f}, bbox_area={bbox_area:.1f}" + ) + + if confidence < effective_threshold: + self.connector.node.get_logger().debug( + f"Confidence {confidence:.3f} below threshold {effective_threshold:.3f}, skipping" + ) + return None + + # Use detection frame_id, fallback to message header frame_id if empty + source_frame = detection.header.frame_id or default_frame_id + if not source_frame: + self.connector.node.get_logger().warning( + f"Detection has no frame_id (detection.frame_id='{detection.header.frame_id}', " + f"default_frame_id='{default_frame_id}'), skipping" + ) + return None + + pose_in_source_frame = result.pose.pose + return (object_class, confidence, source_frame, pose_in_source_frame) + + def _validate_and_transform_pose( + self, + pose_in_source_frame: Pose, + source_frame: str, + map_frame_id: str, + object_class: str, + detection: Detection2D, + ) -> Optional[Pose]: + """Validate pose and transform to map frame. + + Returns: + Transformed pose in map frame if successful, None otherwise. + """ + # Check if pose is empty (all zeros) + pose_is_empty = ( + pose_in_source_frame.position.x == 0.0 + and pose_in_source_frame.position.y == 0.0 + and pose_in_source_frame.position.z == 0.0 + ) + + if pose_is_empty: + self.connector.node.get_logger().warning( + f"Detection for {object_class} has empty pose (0,0,0). " + f"GroundingDINO provides 2D bounding boxes but no 3D pose. " + f"Cannot store annotation without 3D position. " + f"Bounding box center: ({detection.bbox.center.position.x:.1f}, " + f"{detection.bbox.center.position.y:.1f})" + ) + return None + + self.connector.node.get_logger().debug( + f"Pose in source frame ({source_frame}): " + f"x={pose_in_source_frame.position.x:.3f}, " + f"y={pose_in_source_frame.position.y:.3f}, " + f"z={pose_in_source_frame.position.z:.3f}" + ) + + try: + pose_in_map_frame = self._transform_pose_to_map( + pose_in_source_frame, source_frame, map_frame_id + ) + self.connector.node.get_logger().info( + f"Transformed pose to map frame ({map_frame_id}): " + f"x={pose_in_map_frame.position.x:.3f}, " + f"y={pose_in_map_frame.position.y:.3f}, " + f"z={pose_in_map_frame.position.z:.3f}" + ) + return pose_in_map_frame + except Exception as e: + self.connector.node.get_logger().warning( + f"Failed to transform pose from {source_frame} to {map_frame_id}: {e}" + ) + return None + + def _extract_pointcloud_features( + self, detection, source_frame: str, map_frame_id: str + ) -> Tuple[Optional[dict], Optional[Point], Optional[float]]: + """Extract point cloud features if enabled. + + Returns: + Tuple of (pointcloud_features_dict, pointcloud_centroid_map, pc_size) + """ + use_pointcloud = ( + self.connector.node.get_parameter("use_pointcloud_dedup") + .get_parameter_value() + .bool_value + ) + + if not use_pointcloud: + return (None, None, None) + + pc_result = self._extract_pointcloud_from_bbox(detection, source_frame) + if not pc_result: + return (None, None, None) + + pc_centroid_source, pc_size, pc_count = pc_result + + try: + pc_centroid_map = self._transform_pose_to_map( + Pose(position=pc_centroid_source), source_frame, map_frame_id + ) + pointcloud_features = { + "centroid": { + "x": pc_centroid_map.position.x, + "y": pc_centroid_map.position.y, + "z": pc_centroid_map.position.z, + }, + "size_3d": pc_size, + "point_count": pc_count, + } + pointcloud_centroid_map = Point( + x=pc_centroid_map.position.x, + y=pc_centroid_map.position.y, + z=pc_centroid_map.position.z, + ) + self.connector.node.get_logger().debug( + f"Point cloud features: size={pc_size:.2f}m, points={pc_count}, " + f"centroid=({pc_centroid_map.position.x:.2f}, {pc_centroid_map.position.y:.2f}, " + f"{pc_centroid_map.position.z:.2f})" + ) + return (pointcloud_features, pointcloud_centroid_map, pc_size) + except Exception as e: + self.connector.node.get_logger().warning( + f"Failed to transform point cloud centroid: {e}" + ) + return (None, None, None) + + def _determine_merge_decision( + self, + nearby: list, + pointcloud_features: Optional[dict], + pc_size: Optional[float], + use_pointcloud: bool, + ) -> Tuple[bool, Optional[str]]: + """Determine if detection should merge with existing annotation. + + Returns: + Tuple of (should_merge, existing_id) + """ + if not nearby: + return (False, None) + + existing = nearby[0] + + # If both have point cloud data, check size similarity + if pointcloud_features and use_pointcloud and pc_size is not None: + if existing.metadata and "pointcloud" in existing.metadata: + existing_pc = existing.metadata["pointcloud"] + existing_size = existing_pc.get("size_3d", 0) + + if existing_size > 0: + size_ratio = min(pc_size, existing_size) / max( + pc_size, existing_size + ) + size_diff = abs(existing_size - pc_size) + + # If sizes are very different, likely different objects + if size_ratio < 0.5 and size_diff > 0.5: + self.connector.node.get_logger().info( + f"Point cloud size mismatch: existing={existing_size:.2f}m, " + f"new={pc_size:.2f}m, ratio={size_ratio:.2f}. Treating as different object." + ) + return (False, None) + else: + self.connector.node.get_logger().debug( + f"Point cloud size match: existing={existing_size:.2f}m, " + f"new={pc_size:.2f}m, ratio={size_ratio:.2f}" + ) + return (True, existing.id) + + # Use spatial matching (either no point cloud or sizes match) + return (True, existing.id) + + def _store_or_update_annotation( + self, + object_class: str, + confidence: float, + pose_in_map_frame: Pose, + pointcloud_centroid_map: Optional[Point], + pointcloud_features: Optional[dict], + pc_size: Optional[float], + timestamp: float, + detection_source: str, + source_frame: str, + vision_detection_id: Optional[str], + ) -> bool: + """Store or update annotation based on matching logic. + + Returns: + True if annotation was stored/updated, False otherwise. + """ + # Use class-specific merge threshold if available + merge_threshold = self.class_merge_thresholds.get(object_class, 0.5) + + # Use point cloud centroid for matching if available, otherwise use pose + match_center = ( + pointcloud_centroid_map + if pointcloud_centroid_map + else Point( + x=pose_in_map_frame.position.x, + y=pose_in_map_frame.position.y, + z=pose_in_map_frame.position.z, + ) + ) + + try: + self.connector.node.get_logger().info( + f"Storing annotation: class={object_class}, confidence={confidence:.3f}, " + f"merge_radius={merge_threshold:.2f}m, location_id={self.memory.location_id}" + ) + + # Query nearby annotations + nearby = self.memory.query_by_location( + match_center, + radius=merge_threshold, + object_class=object_class, + location_id=self.memory.location_id, + ) + + use_pointcloud = ( + self.connector.node.get_parameter("use_pointcloud_dedup") + .get_parameter_value() + .bool_value + ) + should_merge, existing_id = self._determine_merge_decision( + nearby, pointcloud_features, pc_size, use_pointcloud + ) + + # Prepare metadata + metadata = {} + if pointcloud_features: + metadata["pointcloud"] = pointcloud_features + + if should_merge and existing_id: + # Update existing annotation + existing_ann = nearby[0] + updated = SemanticAnnotation( + id=existing_id, + object_class=object_class, + pose=pose_in_map_frame, + confidence=max(existing_ann.confidence, confidence), + timestamp=timestamp, + detection_source=detection_source, + source_frame=source_frame, + location_id=self.memory.location_id, + vision_detection_id=vision_detection_id, + metadata=metadata if metadata else existing_ann.metadata, + ) + self.memory.update_annotation(updated) + self.connector.node.get_logger().info( + f"Updated existing annotation for {object_class}" + ) + else: + # Insert new annotation + self.memory.store_annotation( + object_class=object_class, + pose=pose_in_map_frame, + confidence=confidence, + timestamp=timestamp, + detection_source=detection_source, + source_frame=source_frame, + location_id=self.memory.location_id, + vision_detection_id=vision_detection_id, + metadata=metadata if metadata else None, + ) + self.connector.node.get_logger().info( + f"Created new annotation for {object_class}" + ) + + return True + + except Exception as e: + self.connector.node.get_logger().error(f"Failed to store annotation: {e}") + return False + + def map_callback(self, msg: OccupancyGrid): + """Process map update and store metadata.""" + map_frame_id = self._get_string_parameter("map_frame_id") + + if msg.header.frame_id != map_frame_id: + self.connector.node.get_logger().warning( + f"Map frame_id mismatch: expected {map_frame_id}, got {msg.header.frame_id}" + ) + + self.memory.map_frame_id = msg.header.frame_id + self.memory.resolution = msg.info.resolution + + self.connector.node.get_logger().debug( + f"Updated map metadata: frame_id={msg.header.frame_id}, " + f"resolution={msg.info.resolution}" + ) + + def _transform_pose_to_map( + self, pose: Pose, source_frame: str, target_frame: str + ) -> Pose: + """Transform pose from source frame to map frame. + + If source and target frames are the same, returns the pose unchanged. + + Raises: + Exception: If transform lookup fails. + """ + # No transform needed if frames are identical + if source_frame == target_frame: + return pose + + try: + # Use ROS2Connector's get_transform method which handles waiting and errors + transform = self.connector.get_transform( + target_frame=target_frame, + source_frame=source_frame, + timeout_sec=TF_LOOKUP_TIMEOUT_SEC, + ) + pose_stamped = PoseStamped() + pose_stamped.pose = pose + pose_stamped.header.frame_id = source_frame + pose_stamped.header.stamp = transform.header.stamp + + transformed_pose_stamped = do_transform_pose_stamped( + pose_stamped, transform + ) + return transformed_pose_stamped.pose + except Exception as e: + raise Exception(f"Transform lookup failed: {e}") from e + + +def main(args=None): + """Main entry point for the semantic map node.""" + rclpy.init(args=args) + connector = ROS2Connector( + node_name="rai_semap_node", executor_type="multi_threaded" + ) + semantic_map_node = SemanticMapNode(connector=connector) + try: + rclpy.spin(semantic_map_node.connector.node) + except KeyboardInterrupt: + pass + finally: + semantic_map_node.connector.shutdown() + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/src/rai_semap/rai_semap/ros2/perception_utils.py b/src/rai_semap/rai_semap/ros2/perception_utils.py new file mode 100644 index 000000000..21c28c9f7 --- /dev/null +++ b/src/rai_semap/rai_semap/ros2/perception_utils.py @@ -0,0 +1,248 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Perception utilities for 3D pose computation and point cloud extraction. + +This module contains perception layer logic that may belong to rai_perception: +- 3D pose computation from 2D bounding boxes using depth images +- Point cloud extraction from bounding box regions +- Detection enhancement (filling empty poses from 2D detections) +""" + +from typing import Optional, Tuple + +import numpy as np +from cv_bridge import CvBridge +from geometry_msgs.msg import Point, Pose, Quaternion +from sensor_msgs.msg import CameraInfo, Image +from vision_msgs.msg import Detection2D + + +def compute_3d_pose_from_bbox( + bbox_center_x: float, + bbox_center_y: float, + depth_image: Image, + camera_info: CameraInfo, + bridge: CvBridge, + region_size: int = 5, +) -> Optional[Pose]: + """Compute 3D pose from 2D bounding box center using depth and camera intrinsics. + + This is perception layer logic that converts 2D detections to 3D poses. + + Args: + bbox_center_x: X coordinate of bounding box center in pixels + bbox_center_y: Y coordinate of bounding box center in pixels + depth_image: Depth image message + camera_info: Camera info message with intrinsics + bridge: CvBridge instance for image conversion + + Returns: + Pose in camera frame, or None if computation fails + """ + try: + # Convert depth image to numpy array + depth_array = bridge.imgmsg_to_cv2(depth_image, desired_encoding="passthrough") + + # Get pixel coordinates (round to nearest integer) + u = int(round(bbox_center_x)) + v = int(round(bbox_center_y)) + + # Check bounds + if u < 0 or u >= depth_array.shape[1] or v < 0 or v >= depth_array.shape[0]: + return None + + # Get depth value at pixel (in meters, assuming depth is in mm) + depth_value = float(depth_array[v, u]) + if depth_value <= 0: + # Try a small region around the center + y_start = max(0, v - region_size // 2) + y_end = min(depth_array.shape[0], v + region_size // 2 + 1) + x_start = max(0, u - region_size // 2) + x_end = min(depth_array.shape[1], u + region_size // 2 + 1) + region = depth_array[y_start:y_end, x_start:x_end] + valid_depths = region[region > 0] + if len(valid_depths) == 0: + return None + depth_value = float(np.median(valid_depths)) + + # Convert depth to meters (assuming depth image is in mm, adjust if needed) + # Common depth encodings: 16UC1 (mm), 32FC1 (m) + if depth_image.encoding in ["16UC1", "mono16"]: + depth_value = depth_value / 1000.0 # mm to meters + + # Get camera intrinsics + fx = camera_info.k[0] + fy = camera_info.k[4] + cx = camera_info.k[2] + cy = camera_info.k[5] + + # Project pixel to 3D + z = depth_value + x = (u - cx) * z / fx + y = (v - cy) * z / fy + + # Create pose + pose = Pose() + pose.position = Point(x=x, y=y, z=z) + pose.orientation = Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) # No rotation + + return pose + + except Exception: + return None + + +def extract_pointcloud_from_bbox( + detection, + depth_image: Image, + camera_info: CameraInfo, + bridge: CvBridge, +) -> Optional[Tuple[Point, float, int]]: + """Extract point cloud from bounding box region and compute features. + + This is perception layer logic that extracts 3D point cloud data from images. + + Args: + detection: Detection2D message with bounding box + depth_image: Depth image message + camera_info: Camera info message with intrinsics + bridge: CvBridge instance for image conversion + + Returns: + Tuple of (centroid_3d, pointcloud_size, point_count) or None if extraction fails. + centroid_3d: 3D centroid of point cloud in camera frame + pointcloud_size: Approximate 3D size (diagonal of bounding box in meters) + point_count: Number of valid 3D points + """ + try: + # Convert depth image to numpy array + depth_array = bridge.imgmsg_to_cv2(depth_image, desired_encoding="passthrough") + + # Get bounding box bounds + bbox_center_x = detection.bbox.center.position.x + bbox_center_y = detection.bbox.center.position.y + bbox_size_x = detection.bbox.size_x + bbox_size_y = detection.bbox.size_y + + # Convert to pixel coordinates + x_min = int(max(0, bbox_center_x - bbox_size_x / 2)) + x_max = int(min(depth_array.shape[1], bbox_center_x + bbox_size_x / 2)) + y_min = int(max(0, bbox_center_y - bbox_size_y / 2)) + y_max = int(min(depth_array.shape[0], bbox_center_y + bbox_size_y / 2)) + + if x_max <= x_min or y_max <= y_min: + return None + + # Get camera intrinsics + fx = camera_info.k[0] + fy = camera_info.k[4] + cx = camera_info.k[2] + cy = camera_info.k[5] + + # Extract depth region + depth_region = depth_array[y_min:y_max, x_min:x_max] + + # Convert depth to meters if needed + if depth_image.encoding in ["16UC1", "mono16"]: + depth_region = depth_region.astype(np.float32) / 1000.0 + + # Extract valid points and convert to 3D + valid_mask = depth_region > 0 + if not np.any(valid_mask): + return None + + y_coords, x_coords = np.where(valid_mask) + depths = depth_region[valid_mask] + + # Convert to 3D points in camera frame + u_coords = x_coords + x_min + v_coords = y_coords + y_min + + z = depths + x = (u_coords - cx) * z / fx + y = (v_coords - cy) * z / fy + + # Compute centroid + centroid_x = np.mean(x) + centroid_y = np.mean(y) + centroid_z = np.mean(z) + + # Compute 3D bounding box size (diagonal) + if len(x) > 0: + x_range = np.max(x) - np.min(x) + y_range = np.max(y) - np.min(y) + z_range = np.max(z) - np.min(z) + size_3d = np.sqrt(x_range**2 + y_range**2 + z_range**2) + else: + size_3d = 0.0 + + point_count = len(x) + + centroid = Point(x=float(centroid_x), y=float(centroid_y), z=float(centroid_z)) + + return (centroid, float(size_3d), point_count) + + except Exception: + return None + + +def enhance_detection_with_3d_pose( + detection: Detection2D, + depth_image: Optional[Image], + camera_info: Optional[CameraInfo], + bridge: CvBridge, + region_size: int = 5, +) -> bool: + """Enhance detection with 3D pose if pose is empty and depth data is available. + + This is perception layer logic that handles incomplete detections (2D detections + without 3D poses) by computing 3D poses from depth images. + + Args: + detection: Detection2D message to enhance + depth_image: Optional depth image for 3D pose computation + camera_info: Optional camera info for 3D pose computation + bridge: CvBridge instance for image conversion + + Returns: + True if pose was enhanced, False otherwise + """ + if not detection.results or len(detection.results) == 0: + return False + + result = detection.results[0] + pose = result.pose.pose + + # Check if pose is empty (0,0,0) + if not ( + pose.position.x == 0.0 and pose.position.y == 0.0 and pose.position.z == 0.0 + ): + return False # Pose already exists + + # Compute 3D pose from bounding box if depth and camera info are available + if depth_image is None or camera_info is None: + return False + + bbox_center_x = detection.bbox.center.position.x + bbox_center_y = detection.bbox.center.position.y + computed_pose = compute_3d_pose_from_bbox( + bbox_center_x, bbox_center_y, depth_image, camera_info, bridge, region_size + ) + + if computed_pose: + result.pose.pose = computed_pose + return True + + return False diff --git a/src/rai_semap/rai_semap/ros2/visualizer.py b/src/rai_semap/rai_semap/ros2/visualizer.py new file mode 100644 index 000000000..62a0570b0 --- /dev/null +++ b/src/rai_semap/rai_semap/ros2/visualizer.py @@ -0,0 +1,362 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from pathlib import Path +from typing import Dict + +import rclpy +import yaml +from builtin_interfaces.msg import Duration +from geometry_msgs.msg import Point +from geometry_msgs.msg import Pose as ROS2Pose +from rai.types.base import ROS2BaseModel +from rai.types.ros2.convert import to_ros2_msg +from rcl_interfaces.msg import ParameterDescriptor, ParameterType +from rclpy.node import Node +from std_msgs.msg import ColorRGBA +from visualization_msgs.msg import Marker, MarkerArray + +from rai_semap.core.backend.sqlite_backend import SQLiteBackend +from rai_semap.core.semantic_map_memory import SemanticMapMemory +from rai_semap.utils.ros2_log import ROS2LogHandler + + +class SemanticMapVisualizer(Node): + """ROS2 node for visualizing semantic map annotations in RViz2.""" + + def __init__(self): + super().__init__("semantic_map_visualizer") + + handler = ROS2LogHandler(self) + handler.setLevel(logging.DEBUG) + python_logger = logging.getLogger("rai_semap") + python_logger.setLevel(logging.DEBUG) + python_logger.handlers.clear() + python_logger.addHandler(handler) + python_logger.propagate = False + + self._initialize_parameters() + self._initialize_semap_memory() + self._initialize_publisher() + self._initialize_timer() + + self.class_colors, self.default_color = self._generate_class_colors() + + def _initialize_parameters(self): + """Initialize ROS2 parameters.""" + parameters = [ + ( + "database_path", + "semantic_map.db", + ParameterType.PARAMETER_STRING, + "Path to SQLite database file", + ), + ( + "location_id", + "default_location", + ParameterType.PARAMETER_STRING, + "Location identifier to query", + ), + ( + "map_frame_id", + "map", + ParameterType.PARAMETER_STRING, + "Frame ID of the SLAM map", + ), + ( + "map_resolution", + 0.05, + ParameterType.PARAMETER_DOUBLE, + "OccupancyGrid resolution (meters/pixel)", + ), + ( + "marker_topic", + "/semantic_map_markers", + ParameterType.PARAMETER_STRING, + "Topic for publishing MarkerArray messages", + ), + ( + "update_rate", + 1.0, + ParameterType.PARAMETER_DOUBLE, + "Rate (Hz) for updating markers", + ), + ( + "marker_scale", + 0.3, + ParameterType.PARAMETER_DOUBLE, + "Scale factor for marker size", + ), + ( + "show_text_labels", + True, + ParameterType.PARAMETER_BOOL, + "Whether to show text labels with object class names", + ), + ( + "marker_lifetime", + 0.0, + ParameterType.PARAMETER_DOUBLE, + "Marker lifetime in seconds (0 = never expire)", + ), + ( + "class_colors_config", + "", + ParameterType.PARAMETER_STRING, + "Path to YAML file with class color definitions (empty = use default in config/)", + ), + ] + + for name, default, param_type, description in parameters: + self.declare_parameter( + name, + default, + descriptor=ParameterDescriptor( + type=param_type, description=description + ), + ) + + def _get_string_parameter(self, name: str) -> str: + """Get string parameter value.""" + return self.get_parameter(name).get_parameter_value().string_value + + def _get_double_parameter(self, name: str) -> float: + """Get double parameter value.""" + return self.get_parameter(name).get_parameter_value().double_value + + def _get_bool_parameter(self, name: str) -> bool: + """Get bool parameter value.""" + return self.get_parameter(name).get_parameter_value().bool_value + + def _initialize_semap_memory(self): + """Initialize semantic map memory backend.""" + database_path = self._get_string_parameter("database_path") + location_id = self._get_string_parameter("location_id") + map_frame_id = self._get_string_parameter("map_frame_id") + map_resolution = self._get_double_parameter("map_resolution") + + backend = SQLiteBackend(database_path) + self.memory = SemanticMapMemory( + backend=backend, + location_id=location_id, + map_frame_id=map_frame_id, + resolution=map_resolution, + ) + self.get_logger().info( + f"Initialized semantic map memory: location_id={location_id}, " + f"map_frame_id={map_frame_id}, database_path={database_path}" + ) + + def _initialize_publisher(self): + """Initialize marker publisher.""" + marker_topic = self._get_string_parameter("marker_topic") + self.marker_publisher = self.create_publisher(MarkerArray, marker_topic, 10) + self.get_logger().info(f"Publishing markers to: {marker_topic}") + + def _initialize_timer(self): + """Initialize update timer.""" + update_rate = self._get_double_parameter("update_rate") + timer_period = 1.0 / update_rate if update_rate > 0 else 1.0 + self.timer = self.create_timer(timer_period, self._update_markers) + self.get_logger().info(f"Update rate: {update_rate} Hz") + + def _generate_class_colors(self) -> tuple[Dict[str, ColorRGBA], ColorRGBA]: + """Load color map for object classes from YAML config.""" + config_path = self._get_string_parameter("class_colors_config") + + if config_path: + yaml_path = Path(config_path) + else: + current_dir = Path(__file__).parent + yaml_path = current_dir / "config" / "visualizer.yaml" + + default_color = ColorRGBA(r=0.5, g=0.5, b=0.5, a=0.8) + colors = {} + if yaml_path.exists(): + try: + with open(yaml_path, "r") as f: + config = yaml.safe_load(f) + + default_color_list = config.get("default_color", [0.5, 0.5, 0.5, 0.8]) + default_color = ColorRGBA( + r=default_color_list[0], + g=default_color_list[1], + b=default_color_list[2], + a=default_color_list[3] if len(default_color_list) > 3 else 0.8, + ) + + class_colors_config = config.get("class_colors", {}) + for class_name, color_value in class_colors_config.items(): + if isinstance(color_value, list): + colors[class_name] = ColorRGBA( + r=color_value[0], + g=color_value[1], + b=color_value[2], + a=color_value[3] if len(color_value) > 3 else 0.8, + ) + else: + colors[class_name] = ColorRGBA( + r=color_value.get("r", 0.5), + g=color_value.get("g", 0.5), + b=color_value.get("b", 0.5), + a=color_value.get("a", 0.8), + ) + self.get_logger().info( + f"Loaded {len(colors)} class colors from {yaml_path}" + ) + except Exception as e: + self.get_logger().warning( + f"Failed to load class colors from {yaml_path}: {e}" + ) + else: + self.get_logger().warning( + f"Class colors config file not found: {yaml_path}" + ) + + return colors, default_color + + def _get_class_color(self, object_class: str) -> ColorRGBA: + """Get color for object class, with fallback for unknown classes.""" + if object_class in self.class_colors: + return self.class_colors[object_class] + return self.default_color + + def _convert_pose_to_ros2(self, pose) -> ROS2Pose: + """Convert pose to ROS2 message type if needed.""" + if isinstance(pose, ROS2BaseModel): + # It's a rai.types.Pose, convert to geometry_msgs.msg.Pose + return to_ros2_msg(pose) + else: + # Already a geometry_msgs.msg.Pose + return pose + + def _create_sphere_marker(self, annotation, marker_id: int, scale: float) -> Marker: + """Create a sphere marker for an annotation.""" + marker = Marker() + marker.header.frame_id = self._get_string_parameter("map_frame_id") + marker.header.stamp = self.get_clock().now().to_msg() + marker.ns = annotation.object_class + marker.id = marker_id + marker.type = Marker.SPHERE + marker.action = Marker.ADD + + marker.pose = self._convert_pose_to_ros2(annotation.pose) + marker.scale.x = scale + marker.scale.y = scale + marker.scale.z = scale + + color = self._get_class_color(annotation.object_class) + color.a = 0.6 + 0.4 * annotation.confidence + marker.color = color + + marker_lifetime = self._get_double_parameter("marker_lifetime") + if marker_lifetime > 0: + lifetime_duration = Duration() + lifetime_duration.sec = int(marker_lifetime) + lifetime_duration.nanosec = int( + (marker_lifetime - int(marker_lifetime)) * 1e9 + ) + marker.lifetime = lifetime_duration + + return marker + + def _create_text_marker(self, annotation, marker_id: int, scale: float) -> Marker: + """Create a text marker for an annotation label.""" + marker = Marker() + marker.header.frame_id = self._get_string_parameter("map_frame_id") + marker.header.stamp = self.get_clock().now().to_msg() + marker.ns = f"{annotation.object_class}_text" + marker.id = marker_id + marker.type = Marker.TEXT_VIEW_FACING + marker.action = Marker.ADD + + marker.pose = self._convert_pose_to_ros2(annotation.pose) + marker.pose.position.z += scale * 0.5 + marker.scale.z = scale * 0.3 + + marker.color = ColorRGBA(r=1.0, g=1.0, b=1.0, a=1.0) + marker.text = f"{annotation.object_class}\n{annotation.confidence:.2f}" + + marker_lifetime = self._get_double_parameter("marker_lifetime") + if marker_lifetime > 0: + lifetime_duration = Duration() + lifetime_duration.sec = int(marker_lifetime) + lifetime_duration.nanosec = int( + (marker_lifetime - int(marker_lifetime)) * 1e9 + ) + marker.lifetime = lifetime_duration + + return marker + + def _update_markers(self): + """Query database and publish markers.""" + location_id = self._get_string_parameter("location_id") + center = Point(x=0.0, y=0.0, z=0.0) + + try: + annotations = self.memory.query_by_location( + center, radius=1e10, location_id=location_id + ) + except Exception as e: + self.get_logger().error(f"Failed to query annotations: {e}") + return + + if not annotations: + self.get_logger().debug("No annotations found") + marker_array = MarkerArray() + marker_array.markers = [] + self.marker_publisher.publish(marker_array) + return + + marker_array = MarkerArray() + marker_scale = self._get_double_parameter("marker_scale") + show_text = self._get_bool_parameter("show_text_labels") + + marker_id = 0 + for annotation in annotations: + sphere_marker = self._create_sphere_marker( + annotation, marker_id, marker_scale + ) + marker_array.markers.append(sphere_marker) + marker_id += 1 + + if show_text: + text_marker = self._create_text_marker( + annotation, marker_id, marker_scale + ) + marker_array.markers.append(text_marker) + marker_id += 1 + + self.marker_publisher.publish(marker_array) + self.get_logger().debug( + f"Published {len(annotations)} annotations as {len(marker_array.markers)} markers" + ) + + +def main(args=None): + """Main entry point for the semantic map visualizer.""" + rclpy.init(args=args) + node = SemanticMapVisualizer() + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + node.destroy_node() + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/src/rai_semap/rai_semap/scripts/navigate_collect.py b/src/rai_semap/rai_semap/scripts/navigate_collect.py new file mode 100644 index 000000000..adfa15685 --- /dev/null +++ b/src/rai_semap/rai_semap/scripts/navigate_collect.py @@ -0,0 +1,218 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import logging +import time +from typing import List + +import rclpy +from nav2_msgs.action import NavigateToPose +from rai.communication.ros2 import ROS2Connector +from rai.tools.ros2 import Nav2Toolkit +from rai.tools.time import WaitForSecondsTool +from rclpy.action import ActionClient + +from rai_semap.utils.ros2_log import ROS2LogHandler + + +class NavigationCollector: + """Navigate robot and collect detections for semantic map validation.""" + + def __init__(self, connector: ROS2Connector): + self.connector = connector + self.nav_toolkit = Nav2Toolkit(connector=connector) + self.wait_tool = WaitForSecondsTool() + self._nav_action_ready = False + + def wait_for_nav_action_server(self, timeout_sec: float = 60.0) -> bool: + """Wait for Nav2 action server to be available. + + Args: + timeout_sec: Maximum time to wait for server. + + Returns: + True if server is available, False otherwise. + """ + if self._nav_action_ready: + return True + + node = self.connector.node + node.get_logger().info("Waiting for Nav2 action server...") + + # Try different possible action names + action_names = ["navigate_to_pose", "/navigate_to_pose"] + + for action_name in action_names: + action_client = ActionClient(node, NavigateToPose, action_name) + start_time = time.time() + + while time.time() - start_time < timeout_sec: + if action_client.wait_for_server(timeout_sec=2.0): + node.get_logger().info( + f"Nav2 action server ready at: {action_name}" + ) + self._nav_action_ready = True + return True + elapsed = time.time() - start_time + if int(elapsed) % 10 == 0 and elapsed > 0: + node.get_logger().info(f"Waiting for Nav2... ({elapsed:.1f}s)") + + node.get_logger().error( + f"Nav2 action server not available after {timeout_sec}s. " + "Check: ros2 action list | grep navigate" + ) + return False + + def navigate_to_waypoints(self, waypoints: List[tuple]) -> None: + """Navigate to a series of waypoints. + + Args: + waypoints: List of (x, y) or (x, y, yaw) tuples representing waypoints in map frame. + """ + node = self.connector.node + + # Wait for Nav2 action server to be ready + if not self.wait_for_nav_action_server(): + node.get_logger().error("Cannot navigate: Nav2 action server not available") + return + + node.get_logger().info(f"Starting navigation to {len(waypoints)} waypoints") + + for i, waypoint in enumerate(waypoints): + if len(waypoint) == 2: + x, y = waypoint + yaw = 0.0 + elif len(waypoint) == 3: + x, y, yaw = waypoint + else: + node.get_logger().warn(f"Invalid waypoint format: {waypoint}, skipping") + continue + + node.get_logger().info( + f"Waypoint {i + 1}/{len(waypoints)}: ({x:.1f}, {y:.1f})" + ) + + # Use Nav2Toolkit to navigate + nav_tools = self.nav_toolkit.get_tools() + navigate_tool = None + for tool in nav_tools: + if "navigate" in tool.name.lower(): + navigate_tool = tool + break + + if navigate_tool: + try: + navigate_tool.invoke({"x": x, "y": y, "z": 0.0, "yaw": yaw}) + except Exception as e: + node.get_logger().warn(f"Navigation failed: {e}") + else: + node.get_logger().warn("Navigate tool not found, skipping waypoint") + + # Wait at waypoint to allow detections to be collected + self.wait_tool.invoke({"seconds": 5.0}) + + node.get_logger().info("Navigation complete") + + def collect_detections(self, duration_seconds: float = 30.0) -> None: + """Stay in place and collect detections. + + Args: + duration_seconds: How long to collect detections. + """ + node = self.connector.node + node.get_logger().info(f"Collecting detections for {duration_seconds}s...") + + start_time = time.time() + while time.time() - start_time < duration_seconds: + self.wait_tool.invoke({"seconds": 2.0}) + elapsed = time.time() - start_time + if int(elapsed) % 5 == 0: + node.get_logger().info( + f"Collecting... {elapsed:.0f}/{duration_seconds}s" + ) + + node.get_logger().info("Collection complete") + + +def main(): + parser = argparse.ArgumentParser( + description="Navigate robot and collect detections for semantic map validation" + ) + parser.add_argument( + "--waypoints", + nargs="+", + type=float, + help="Waypoints as x1 y1 x2 y2 ... (in map frame)", + default=[2.0, 0.0, 4.0, 0.0, 2.0, 2.0], + ) + parser.add_argument( + "--collect-duration", + type=float, + default=10.0, + help="Duration to collect detections at final location (seconds). Detections are collected in real-time during navigation, so this is typically minimal (default: 2.0)", + ) + parser.add_argument( + "--use-sim-time", + action="store_true", + help="Use simulation time", + ) + + args = parser.parse_args() + + if len(args.waypoints) % 2 != 0: + parser.error("Waypoints must be pairs of (x, y) coordinates") + + waypoints = [ + (args.waypoints[i], args.waypoints[i + 1]) + for i in range(0, len(args.waypoints), 2) + ] + + rclpy.init() + + try: + connector = ROS2Connector( + executor_type="multi_threaded", + use_sim_time=args.use_sim_time, + ) + + # Configure Python logging to forward to ROS2 logger + handler = ROS2LogHandler(connector.node) + handler.setLevel(logging.DEBUG) + python_logger = logging.getLogger("rai_semap") + python_logger.setLevel(logging.DEBUG) + python_logger.handlers.clear() + python_logger.addHandler(handler) + python_logger.propagate = False + + collector = NavigationCollector(connector) + + # Navigate to waypoints + collector.navigate_to_waypoints(waypoints) + + # Brief wait to ensure final detections are processed + # (detections are collected in real-time during navigation, so this is minimal) + if args.collect_duration > 0: + collector.collect_detections(duration_seconds=args.collect_duration) + + connector.node.get_logger().info("Navigation completed") + + except KeyboardInterrupt: + pass + finally: + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/src/rai_semap/rai_semap/scripts/semantic_map.rviz b/src/rai_semap/rai_semap/scripts/semantic_map.rviz new file mode 100644 index 000000000..728e80998 --- /dev/null +++ b/src/rai_semap/rai_semap/scripts/semantic_map.rviz @@ -0,0 +1,179 @@ +Panels: + - Class: rviz_common/Displays + Help Height: 78 + Name: Displays + Property Tree Widget: + Expanded: + - /Global Options1 + - /Status1 + - /Map1 + - /MarkerArray1 + Splitter Ratio: 0.5 + Tree Height: 560 + - Class: rviz_common/Selection + Name: Selection + - Class: rviz_common/Tool Properties + Expanded: + - /2D Goal Pose1 + - /Publish Point1 + Name: Tool Properties + Splitter Ratio: 0.5886790156364441 + - Class: rviz_common/Views + Expanded: + - /Current View1 + Name: Views + Splitter Ratio: 0.5 + - Class: rviz_common/Time + Experimental: false + Name: Time + SyncMode: 0 + SyncSource: "" +Visualization Manager: + Class: "" + Displays: + - Alpha: 0.5 + Cell Size: 1 + Class: rviz_default_plugins/Grid + Color: 160; 160; 164 + Enabled: true + Line Style: + Line Width: 0.029999999329447746 + Value: Lines + Name: Grid + Normal Cell Count: 0 + Offset: + X: 0 + Y: 0 + Z: 0 + Plane: XY + Plane Cell Count: 10 + Reference Frame: + Value: true + - Alpha: 0.699999988079071 + Binary representation: false + Binary threshold: 100 + Class: rviz_default_plugins/Map + Color Scheme: map + Draw Behind: false + Enabled: true + Name: Map + Topic: + Depth: 5 + Durability Policy: Volatile + Filter size: 10 + History Policy: Keep Last + Reliability Policy: Reliable + Value: /map + Update Topic: + Depth: 5 + Durability Policy: Volatile + History Policy: Keep Last + Reliability Policy: Reliable + Value: /map_updates + Use Timestamp: false + Value: true + - Class: rviz_default_plugins/MarkerArray + Enabled: true + Name: MarkerArray + Namespaces: + bed: true + bed_text: true + chair: true + chair_text: true + door: true + door_text: true + shelf: true + shelf_text: true + table: true + table_text: true + Topic: + Depth: 5 + Durability Policy: Volatile + History Policy: Keep Last + Reliability Policy: Reliable + Value: /semantic_map_markers + Value: true + Enabled: true + Global Options: + Background Color: 48; 48; 48 + Fixed Frame: map + Frame Rate: 30 + Name: root + Tools: + - Class: rviz_default_plugins/Interact + Hide Inactive Objects: true + - Class: rviz_default_plugins/MoveCamera + - Class: rviz_default_plugins/Select + - Class: rviz_default_plugins/FocusCamera + - Class: rviz_default_plugins/Measure + Line color: 128; 128; 0 + - Class: rviz_default_plugins/SetInitialPose + Covariance x: 0.25 + Covariance y: 0.25 + Covariance yaw: 0.06853891909122467 + Topic: + Depth: 5 + Durability Policy: Volatile + History Policy: Keep Last + Reliability Policy: Reliable + Value: /initialpose + - Class: rviz_default_plugins/SetGoal + Topic: + Depth: 5 + Durability Policy: Volatile + History Policy: Keep Last + Reliability Policy: Reliable + Value: /goal_pose + - Class: rviz_default_plugins/PublishPoint + Single click: true + Topic: + Depth: 5 + Durability Policy: Volatile + History Policy: Keep Last + Reliability Policy: Reliable + Value: /clicked_point + Transformation: + Current: + Class: rviz_default_plugins/TF + Value: true + Views: + Current: + Class: rviz_default_plugins/Orbit + Distance: 25.51291275024414 + Enable Stereo Rendering: + Stereo Eye Separation: 0.05999999865889549 + Stereo Focal Distance: 1 + Swap Stereo Eyes: false + Value: false + Focal Point: + X: 0 + Y: 0 + Z: 0 + Focal Shape Fixed Size: true + Focal Shape Size: 0.05000000074505806 + Invert Z Axis: false + Name: Current View + Near Clip Distance: 0.009999999776482582 + Pitch: 0.6303982138633728 + Target Frame: + Value: Orbit (rviz) + Yaw: 0.7904001474380493 + Saved: ~ +Window Geometry: + Displays: + collapsed: false + Height: 846 + Hide Left Dock: false + Hide Right Dock: false + QMainWindow State: 000000ff00000000fd000000040000000000000153000002b8fc0200000008fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000005300fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c0061007900730100000036000002b8000000b400fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261000000010000010f000002b8fc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a005600690065007700730100000036000002b80000009800fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000004b00000003efc0100000002fb0000000800540069006d00650100000000000004b00000022700fffffffb0000000800540069006d0065010000000000000450000000000000000000000244000002b800000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000 + Selection: + collapsed: false + Time: + collapsed: false + Tool Properties: + collapsed: false + Views: + collapsed: false + Width: 1200 + X: 333 + Y: 269 diff --git a/src/rai_semap/rai_semap/scripts/semap.launch.py b/src/rai_semap/rai_semap/scripts/semap.launch.py new file mode 100644 index 000000000..ef7f0a318 --- /dev/null +++ b/src/rai_semap/rai_semap/scripts/semap.launch.py @@ -0,0 +1,90 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from launch import LaunchDescription +from launch.actions import DeclareLaunchArgument, ExecuteProcess, OpaqueFunction + + +def generate_detection_publisher_cmd(context): + """Generate detection publisher command with conditional parameters.""" + detection_publisher_config = context.launch_configurations.get( + "detection_publisher_config", "" + ) + perception_utils_config = context.launch_configurations.get( + "perception_utils_config", "" + ) + + cmd = [ + "python", + "-m", + "rai_semap.ros2.detection_publisher", + "--ros-args", + ] + + # Add config file parameters only if provided + if detection_publisher_config: + cmd.extend(["-p", f"detection_publisher_config:={detection_publisher_config}"]) + if perception_utils_config: + cmd.extend(["-p", f"perception_utils_config:={perception_utils_config}"]) + + return [ExecuteProcess(cmd=cmd, output="screen")] + + +def generate_semap_cmd(context): + """Generate semantic map node command with conditional parameters.""" + node_config = context.launch_configurations.get("node_config", "") + + cmd = [ + "python", + "-m", + "rai_semap.ros2.node", + "--ros-args", + ] + + # Add config file parameter only if provided + if node_config: + cmd.extend(["-p", f"node_config:={node_config}"]) + + return [ExecuteProcess(cmd=cmd, output="screen")] + + +def generate_launch_description(): + # Declare launch arguments + node_config_arg = DeclareLaunchArgument( + "node_config", + default_value="", + description="Path to node YAML config file (empty = use default in config/)", + ) + + detection_publisher_config_arg = DeclareLaunchArgument( + "detection_publisher_config", + default_value="", + description="Path to detection_publisher YAML config file (empty = use default in config/)", + ) + + perception_utils_config_arg = DeclareLaunchArgument( + "perception_utils_config", + default_value="", + description="Path to perception_utils YAML config file (empty = use default in config/)", + ) + + return LaunchDescription( + [ + node_config_arg, + detection_publisher_config_arg, + perception_utils_config_arg, + OpaqueFunction(function=generate_detection_publisher_cmd), + OpaqueFunction(function=generate_semap_cmd), + ] + ) diff --git a/src/rai_semap/rai_semap/scripts/validate_semap.py b/src/rai_semap/rai_semap/scripts/validate_semap.py new file mode 100644 index 000000000..7dbedf3e8 --- /dev/null +++ b/src/rai_semap/rai_semap/scripts/validate_semap.py @@ -0,0 +1,168 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import sys +from collections import defaultdict +from pathlib import Path + +from geometry_msgs.msg import Point + +from rai_semap.core.backend.sqlite_backend import SQLiteBackend +from rai_semap.core.semantic_map_memory import SemanticMapMemory + + +def validate_database( + database_path: str, location_id: str = "default_location" +) -> bool: + """Validate stored data in semantic map database. + + Args: + database_path: Path to SQLite database file. + location_id: Location identifier to query. + + Returns: + True if validation passes, False otherwise. + """ + if not Path(database_path).exists(): + print(f"ERROR: Database file not found: {database_path}") + return False + + try: + backend = SQLiteBackend(database_path) + memory = SemanticMapMemory( + backend=backend, + location_id=location_id, + map_frame_id="map", + resolution=0.05, + ) + except Exception as e: + print(f"ERROR: Failed to initialize memory: {e}") + return False + + print(f"Validating semantic map database: {database_path}") + print(f"Location ID: {location_id}") + print("-" * 60) + + # Get map metadata + try: + metadata = memory.get_map_metadata() + print(f"Map Frame ID: {metadata.map_frame_id}") + print(f"Map Resolution: {metadata.resolution} m/pixel") + print(f"Last Updated: {metadata.last_updated}") + except Exception as e: + print(f"WARNING: Failed to get map metadata: {e}") + + print("-" * 60) + + # Query all annotations + center = Point(x=0.0, y=0.0, z=0.0) + all_annotations = memory.query_by_location( + center, radius=1e10, location_id=location_id + ) + + if not all_annotations: + print("WARNING: No annotations found in database") + return False + + print(f"Total annotations: {len(all_annotations)}") + print("-" * 60) + + # Group by object class + class_counts = defaultdict(int) + confidence_sum = defaultdict(float) + detection_sources = defaultdict(set) + + for ann in all_annotations: + class_counts[ann.object_class] += 1 + confidence_sum[ann.object_class] += ann.confidence + detection_sources[ann.object_class].add(ann.detection_source) + + print("Annotations by class:") + for obj_class in sorted(class_counts.keys()): + count = class_counts[obj_class] + avg_confidence = confidence_sum[obj_class] / count + sources = ", ".join(sorted(detection_sources[obj_class])) + print( + f" {obj_class}: {count} annotations, avg confidence: {avg_confidence:.3f}, sources: {sources}" + ) + + print("-" * 60) + + # Check for required fields + print("Validating annotation fields...") + all_valid = True + + for ann in all_annotations: + if not ann.object_class: + print(f"ERROR: Annotation {ann.id} has empty object_class") + all_valid = False + if ann.confidence < 0.0 or ann.confidence > 1.0: + print( + f"ERROR: Annotation {ann.id} has invalid confidence: {ann.confidence}" + ) + all_valid = False + if not ann.detection_source: + print(f"WARNING: Annotation {ann.id} has empty detection_source") + if not ann.source_frame: + print(f"WARNING: Annotation {ann.id} has empty source_frame") + + if all_valid: + print("All annotations have valid required fields") + + print("-" * 60) + + # Spatial distribution + if all_annotations: + x_coords = [ann.pose.position.x for ann in all_annotations] + y_coords = [ann.pose.position.y for ann in all_annotations] + + print("Spatial distribution:") + print(f" X range: [{min(x_coords):.2f}, {max(x_coords):.2f}]") + print(f" Y range: [{min(y_coords):.2f}, {max(y_coords):.2f}]") + print( + f" Mean position: ({sum(x_coords) / len(x_coords):.2f}, {sum(y_coords) / len(y_coords):.2f})" + ) + + print("-" * 60) + print("Validation complete") + + return True + + +def main(): + parser = argparse.ArgumentParser( + description="Validate stored data in semantic map database" + ) + parser.add_argument( + "--database-path", + type=str, + default="semantic_map.db", + help="Path to SQLite database file", + ) + parser.add_argument( + "--location-id", + type=str, + default="default_location", + help="Location identifier to query", + ) + + args = parser.parse_args() + + success = validate_database(args.database_path, args.location_id) + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + main() diff --git a/src/rai_semap/rai_semap/tool/__init__.py b/src/rai_semap/rai_semap/tool/__init__.py new file mode 100644 index 000000000..c8b4abf7f --- /dev/null +++ b/src/rai_semap/rai_semap/tool/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/rai_semap/rai_semap/tool/query_semantic_map_tool.py b/src/rai_semap/rai_semap/tool/query_semantic_map_tool.py new file mode 100644 index 000000000..fdb944b25 --- /dev/null +++ b/src/rai_semap/rai_semap/tool/query_semantic_map_tool.py @@ -0,0 +1,72 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, Type + +from langchain_core.tools import BaseTool +from pydantic import BaseModel, Field + +from rai_semap.core.semantic_map_memory import SemanticMapMemory + + +class QuerySemanticMapToolInput(BaseModel): + """Input schema for QuerySemanticMapTool.""" + + query: str = Field(description="Natural language query about object locations") + room: Optional[str] = Field( + default=None, description="Optional room or region name" + ) + + +class QuerySemanticMapTool(BaseTool): + """Tool for querying semantic map for object locations.""" + + name: str = "query_semantic_map" + description: str = "Query the semantic map for object locations" + + args_schema: Type[QuerySemanticMapToolInput] = QuerySemanticMapToolInput + + memory: SemanticMapMemory + + def _run(self, query: str, room: Optional[str] = None) -> str: + """Execute semantic map query.""" + pass + + +class GetSeenObjectsToolInput(BaseModel): + """Input schema for GetSeenObjectsTool.""" + + location_id: Optional[str] = Field( + default=None, + description="Optional location ID. If not provided, uses the memory's default location.", + ) + + +class GetSeenObjectsTool(BaseTool): + """Tool for retrieving object types previously seen in a location.""" + + name: str = "get_seen_objects" + description: str = ( + "Get a list of object types (e.g., 'bottle', 'cup', 'table') that have " + "been previously seen in a location. Useful for discovering what objects " + "exist before querying for specific instances." + ) + + args_schema: Type[GetSeenObjectsToolInput] = GetSeenObjectsToolInput + + memory: SemanticMapMemory + + def _run(self, location_id: Optional[str] = None) -> str: + """Get list of distinct object classes seen in a location.""" + pass diff --git a/src/rai_semap/rai_semap/utils/__init__.py b/src/rai_semap/rai_semap/utils/__init__.py new file mode 100644 index 000000000..23cecced4 --- /dev/null +++ b/src/rai_semap/rai_semap/utils/__init__.py @@ -0,0 +1,15 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility scripts for rai_semap.""" diff --git a/src/rai_semap/rai_semap/utils/clear_annotations.py b/src/rai_semap/rai_semap/utils/clear_annotations.py new file mode 100644 index 000000000..22824d2a8 --- /dev/null +++ b/src/rai_semap/rai_semap/utils/clear_annotations.py @@ -0,0 +1,102 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility script to clear all annotations from semantic map database.""" + +import argparse +import sys + +from geometry_msgs.msg import Point + +from rai_semap.core.backend.sqlite_backend import SQLiteBackend +from rai_semap.core.semantic_map_memory import SemanticMapMemory + + +def main(): + parser = argparse.ArgumentParser( + description="Clear all annotations from semantic map database" + ) + parser.add_argument( + "--database-path", + type=str, + default="semantic_map.db", + help="Path to SQLite database file (default: semantic_map.db)", + ) + parser.add_argument( + "--location-id", + type=str, + default=None, + help="If provided, only delete annotations for this location. " + "If not provided, delete all annotations.", + ) + parser.add_argument( + "--yes", + action="store_true", + help="Skip confirmation prompt", + ) + + args = parser.parse_args() + + # Initialize backend and memory + backend = SQLiteBackend(args.database_path) + backend.init_schema() + + # Query to get count before deletion + if args.location_id: + center = Point(x=0.0, y=0.0, z=0.0) + filters = {"location_id": args.location_id} + existing = backend.spatial_query(center, radius=1e10, filters=filters) + count = len(existing) + location_msg = f" for location_id='{args.location_id}'" + else: + center = Point(x=0.0, y=0.0, z=0.0) + existing = backend.spatial_query(center, radius=1e10, filters={}) + count = len(existing) + location_msg = "" + + if count == 0: + print(f"No annotations found{location_msg} in database: {args.database_path}") + return 0 + + # Confirmation prompt + if not args.yes: + if args.location_id: + prompt = ( + f"Are you sure you want to delete {count} annotation(s) " + f"for location_id='{args.location_id}' from {args.database_path}? [y/N]: " + ) + else: + prompt = ( + f"Are you sure you want to delete ALL {count} annotation(s) " + f"from {args.database_path}? [y/N]: " + ) + response = input(prompt) + if response.lower() not in ["y", "yes"]: + print("Cancelled.") + return 0 + + # Delete annotations + memory = SemanticMapMemory( + backend=backend, + location_id=args.location_id or "default_location", + ) + + deleted_count = memory.delete_all_annotations(location_id=args.location_id) + + print(f"✓ Successfully deleted {deleted_count} annotation(s){location_msg}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/rai_semap/rai_semap/utils/ros2_log.py b/src/rai_semap/rai_semap/utils/ros2_log.py new file mode 100644 index 000000000..f402c25ec --- /dev/null +++ b/src/rai_semap/rai_semap/utils/ros2_log.py @@ -0,0 +1,36 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from rclpy.node import Node + + +class ROS2LogHandler(logging.Handler): + """Log handler that forwards Python logging to ROS2 logger.""" + + def __init__(self, ros2_node: Node): + super().__init__() + self.ros2_node = ros2_node + + def emit(self, record): + log_msg = self.format(record) + if record.levelno >= logging.ERROR: + self.ros2_node.get_logger().error(log_msg) + elif record.levelno >= logging.WARNING: + self.ros2_node.get_logger().warning(log_msg) + elif record.levelno >= logging.INFO: + self.ros2_node.get_logger().info(log_msg) + else: + self.ros2_node.get_logger().debug(log_msg) diff --git a/tests/rai_semap/__init__.py b/tests/rai_semap/__init__.py new file mode 100644 index 000000000..c8b4abf7f --- /dev/null +++ b/tests/rai_semap/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/rai_semap/conftest.py b/tests/rai_semap/conftest.py new file mode 100644 index 000000000..ab4919767 --- /dev/null +++ b/tests/rai_semap/conftest.py @@ -0,0 +1,90 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import tempfile +from pathlib import Path +from typing import Optional + +import pytest +import rclpy +from rai.types import Pose + +# Add src/rai_semap to Python path +rai_semap_path = Path(__file__).parent.parent.parent / "src" / "rai_semap" +sys.path.insert(0, str(rai_semap_path)) + +from rai_semap.core.semantic_map_memory import SemanticAnnotation # noqa: E402 + +# Common test constants +TEST_LOCATION_ID = "test_location" +TEST_DETECTION_SOURCE = "GroundingDINO" +TEST_SOURCE_FRAME = "camera_frame" +TEST_BASE_TIMESTAMP = 1234567890 + + +@pytest.fixture(scope="module") +def ros2_context(): + """Initialize ROS2 context for testing.""" + rclpy.init() + yield + rclpy.shutdown() + + +@pytest.fixture +def temp_db_path(): + """Create a temporary database path for testing.""" + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + db_path = f.name + yield db_path + Path(db_path).unlink(missing_ok=True) + + +def make_pose(x: float, y: float, z: float = 0.0) -> Pose: + """Create a Pose with specified position.""" + pose = Pose() + pose.position.x = x + pose.position.y = y + pose.position.z = z + return pose + + +def make_annotation( + annotation_id: str, + object_class: str, + x: float, + y: float, + z: float = 0.0, + confidence: float = 0.9, + timestamp: float = TEST_BASE_TIMESTAMP, + detection_source: str = TEST_DETECTION_SOURCE, + source_frame: str = TEST_SOURCE_FRAME, + location_id: str = TEST_LOCATION_ID, + vision_detection_id: Optional[str] = None, + metadata: Optional[dict] = None, +) -> SemanticAnnotation: + """Create a SemanticAnnotation with common defaults.""" + pose = make_pose(x, y, z) + return SemanticAnnotation( + id=annotation_id, + object_class=object_class, + pose=pose, + confidence=confidence, + timestamp=timestamp, + detection_source=detection_source, + source_frame=source_frame, + location_id=location_id, + vision_detection_id=vision_detection_id, + metadata=metadata, + ) diff --git a/tests/rai_semap/test_backend.py b/tests/rai_semap/test_backend.py new file mode 100644 index 000000000..90db9ba29 --- /dev/null +++ b/tests/rai_semap/test_backend.py @@ -0,0 +1,145 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from rai.types import Point + +from rai_semap.core.backend.sqlite_backend import SQLiteBackend + +from .conftest import make_annotation + + +@pytest.fixture +def backend(temp_db_path): + """Create a SQLiteBackend instance for testing.""" + backend = SQLiteBackend(temp_db_path) + backend.init_schema() + return backend + + +def test_backend_init_schema(backend): + """Test that schema initialization completes without error.""" + backend.init_schema() + + +def test_backend_insert_annotation(backend): + """Test inserting a single annotation.""" + annotation = make_annotation("test-id", "cup", 1.0, 2.0) + annotation_id = backend.insert_annotation(annotation) + assert annotation_id == "test-id" + assert isinstance(annotation_id, str) + + +def test_backend_insert_and_query(backend): + """Test inserting annotation and querying to verify persistence.""" + annotation = make_annotation("test-id-1", "cup", 1.0, 2.0) + backend.insert_annotation(annotation) + + center = Point(x=1.0, y=2.0, z=0.0) + results = backend.spatial_query(center, radius=0.5) + assert len(results) == 1 + assert results[0].id == "test-id-1" + assert results[0].object_class == "cup" + assert results[0].confidence == 0.9 + assert results[0].pose.position.x == 1.0 + assert results[0].pose.position.y == 2.0 + assert results[0].location_id == "test_location" + + +def test_backend_query_by_class_filter(backend): + """Test querying with object_class filter.""" + annotation1 = make_annotation("cup-1", "cup", 1.0, 1.0) + annotation2 = make_annotation( + "bottle-1", "bottle", 2.0, 2.0, confidence=0.8, timestamp=1234567891 + ) + + backend.insert_annotation(annotation1) + backend.insert_annotation(annotation2) + + center = Point(x=0.0, y=0.0, z=0.0) + results = backend.spatial_query( + center, radius=10.0, filters={"object_class": "cup"} + ) + assert len(results) == 1 + assert results[0].object_class == "cup" + assert results[0].id == "cup-1" + + +def test_backend_query_by_confidence_threshold(backend): + """Test querying with confidence threshold filter.""" + annotation1 = make_annotation("high-conf", "cup", 1.0, 1.0, confidence=0.9) + annotation2 = make_annotation( + "low-conf", "cup", 2.0, 2.0, confidence=0.3, timestamp=1234567891 + ) + + backend.insert_annotation(annotation1) + backend.insert_annotation(annotation2) + + center = Point(x=0.0, y=0.0, z=0.0) + results = backend.spatial_query( + center, radius=10.0, filters={"confidence_threshold": 0.5} + ) + assert len(results) == 1 + assert results[0].id == "high-conf" + assert results[0].confidence >= 0.5 + + +def test_backend_update_annotation(backend): + """Test updating an existing annotation.""" + annotation = make_annotation("test-id", "cup", 1.0, 2.0, confidence=0.7) + backend.insert_annotation(annotation) + + updated_annotation = make_annotation( + "test-id", + "cup", + 1.5, + 2.5, + confidence=0.95, + timestamp=1234567900, + detection_source="GroundedSAM", + ) + success = backend.update_annotation(updated_annotation) + assert success is True + + center = Point(x=1.0, y=2.0, z=0.0) + results = backend.spatial_query(center, radius=1.0) + assert len(results) == 1 + assert results[0].confidence == 0.95 + assert results[0].pose.position.x == 1.5 + assert results[0].detection_source == "GroundedSAM" + + +def test_backend_delete_annotation(backend): + """Test deleting an annotation.""" + annotation = make_annotation("test-id", "cup", 1.0, 2.0) + backend.insert_annotation(annotation) + + success = backend.delete_annotation("test-id") + assert success is True + + center = Point(x=1.0, y=2.0, z=0.0) + results = backend.spatial_query(center, radius=1.0) + assert len(results) == 0 + + success = backend.delete_annotation("non-existent") + assert success is False + + +def test_backend_spatial_query(backend): + """Test spatial query returns list of annotations.""" + center = Point(x=0.0, y=0.0, z=0.0) + radius = 1.0 + results = backend.spatial_query(center, radius) + assert isinstance(results, list) + assert len(results) == 0 diff --git a/tests/rai_semap/test_detection_publisher.py b/tests/rai_semap/test_detection_publisher.py new file mode 100644 index 000000000..5cd4acbcb --- /dev/null +++ b/tests/rai_semap/test_detection_publisher.py @@ -0,0 +1,122 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import rclpy +from rai.communication.ros2 import ROS2Connector + +from rai_semap.ros2.detection_publisher import DetectionPublisher + + +@pytest.fixture(scope="module") +def ros2_context(): + """Initialize ROS2 context for testing.""" + rclpy.init() + yield + rclpy.shutdown() + + +def set_parameter(node, name: str, value, param_type): + """Helper to set a single parameter on a node.""" + node.connector.node.set_parameters( + [rclpy.parameter.Parameter(name, param_type, value)] + ) + + +@pytest.fixture +def detection_publisher(ros2_context): + """Create a DetectionPublisher instance for testing. + + Uses single_threaded executor to avoid executor performance warnings + in simple unit tests that don't need multi-threaded execution. + """ + connector = ROS2Connector( + node_name="detection_publisher", executor_type="single_threaded" + ) + node = DetectionPublisher(connector=connector) + yield node + node.connector.shutdown() + + +def test_detection_publisher_initialization(detection_publisher): + """Test that DetectionPublisher initializes correctly.""" + assert detection_publisher is not None + assert detection_publisher.connector.node.get_name() == "detection_publisher" + assert detection_publisher.bridge is not None + assert detection_publisher.last_image is None + assert detection_publisher.last_depth_image is None + assert detection_publisher.last_camera_info is None + assert detection_publisher.last_detection_time == 0.0 + + +def test_parse_detection_classes_basic(detection_publisher): + """Test parsing detection classes with basic format.""" + set_parameter( + detection_publisher, + "default_class_threshold", + 0.3, + rclpy.parameter.Parameter.Type.DOUBLE, + ) + + classes_str = "person, cup, bottle" + class_names, class_thresholds = detection_publisher._parse_detection_classes( + classes_str + ) + + assert len(class_names) == 3 + assert set(class_names) == {"person", "cup", "bottle"} + assert all(class_thresholds[cls] == 0.3 for cls in class_names) + + +def test_parse_detection_classes_with_thresholds(detection_publisher): + """Test parsing detection classes with explicit thresholds.""" + set_parameter( + detection_publisher, + "default_class_threshold", + 0.3, + rclpy.parameter.Parameter.Type.DOUBLE, + ) + + classes_str = "person:0.7, cup, bottle:0.4" + class_names, class_thresholds = detection_publisher._parse_detection_classes( + classes_str + ) + + assert len(class_names) == 3 + assert set(class_names) == {"person", "cup", "bottle"} + assert class_thresholds["person"] == 0.7 + assert class_thresholds["cup"] == 0.3 + assert class_thresholds["bottle"] == 0.4 + + +def test_get_string_parameter(detection_publisher): + """Test getting string parameter.""" + set_parameter( + detection_publisher, + "camera_topic", + "/test/camera", + rclpy.parameter.Parameter.Type.STRING, + ) + assert detection_publisher._get_string_parameter("camera_topic") == "/test/camera" + + +def test_get_double_parameter(detection_publisher): + """Test getting double parameter.""" + set_parameter( + detection_publisher, + "default_class_threshold", + 0.5, + rclpy.parameter.Parameter.Type.DOUBLE, + ) + assert detection_publisher._get_double_parameter("default_class_threshold") == 0.5 diff --git a/tests/rai_semap/test_memory.py b/tests/rai_semap/test_memory.py new file mode 100644 index 000000000..983524b54 --- /dev/null +++ b/tests/rai_semap/test_memory.py @@ -0,0 +1,242 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest.mock import MagicMock + +import pytest +from geometry_msgs.msg import Point + +from rai_semap.core.backend.sqlite_backend import SQLiteBackend +from rai_semap.core.semantic_map_memory import SemanticAnnotation, SemanticMapMemory + +from .conftest import TEST_LOCATION_ID, make_annotation, make_pose + + +@pytest.fixture +def mock_backend(): + """Create a mock backend for testing.""" + backend = MagicMock(spec=SQLiteBackend) + return backend + + +@pytest.fixture +def memory(mock_backend): + """Create a SemanticMapMemory instance with mock backend.""" + return SemanticMapMemory(mock_backend, location_id=TEST_LOCATION_ID) + + +@pytest.fixture +def real_backend(temp_db_path): + """Create a real SQLiteBackend instance for integration testing.""" + backend = SQLiteBackend(temp_db_path) + backend.init_schema() + return backend + + +@pytest.fixture +def real_memory(real_backend): + """Create a SemanticMapMemory instance with real backend.""" + return SemanticMapMemory(real_backend, location_id=TEST_LOCATION_ID) + + +def test_memory_store_annotation(memory): + """Test storing an annotation returns an ID.""" + memory.backend.insert_annotation.return_value = "test-id" + annotation_id = memory.store_annotation( + object_class="cup", + pose=make_pose(1.0, 2.0), + confidence=0.9, + timestamp=1234567890, + detection_source="GroundingDINO", + source_frame="camera_frame", + location_id=TEST_LOCATION_ID, + ) + assert annotation_id == "test-id" + assert isinstance(annotation_id, str) + memory.backend.insert_annotation.assert_called_once() + call_args = memory.backend.insert_annotation.call_args[0][0] + assert isinstance(call_args, SemanticAnnotation) + assert call_args.object_class == "cup" + assert call_args.location_id == TEST_LOCATION_ID + + +def test_memory_query_by_class(memory): + """Test querying by object class returns list.""" + mock_annotation = make_annotation("test-id", "cup", 0.0, 0.0) + memory.backend.spatial_query.return_value = [mock_annotation] + results = memory.query_by_class("cup") + assert isinstance(results, list) + assert len(results) == 1 + assert results[0].object_class == "cup" + memory.backend.spatial_query.assert_called_once() + call_kwargs = memory.backend.spatial_query.call_args[1] + assert call_kwargs["filters"]["object_class"] == "cup" + assert call_kwargs["filters"]["location_id"] == TEST_LOCATION_ID + + +def test_memory_query_by_location(memory): + """Test querying by location returns list.""" + mock_annotation = make_annotation("test-id", "cup", 0.0, 0.0) + memory.backend.spatial_query.return_value = [mock_annotation] + center = Point(x=1.0, y=2.0, z=0.0) + results = memory.query_by_location(center, radius=1.0) + assert isinstance(results, list) + assert len(results) == 1 + memory.backend.spatial_query.assert_called_once() + call_args = memory.backend.spatial_query.call_args[0] + assert call_args[0] == center + assert call_args[1] == 1.0 + + +def test_memory_query_by_region(memory): + """Test querying by region returns list.""" + mock_annotation = make_annotation("test-id", "cup", 0.0, 0.0) + memory.backend.spatial_query.return_value = [mock_annotation] + bbox = (0.0, 0.0, 2.0, 2.0) + results = memory.query_by_region(bbox) + assert isinstance(results, list) + memory.backend.spatial_query.assert_called_once() + + +def test_memory_get_map_metadata(memory): + """Test getting map metadata returns MapMetadata.""" + memory.backend.spatial_query.return_value = [] + metadata = memory.get_map_metadata() + assert metadata is not None + assert metadata.location_id == TEST_LOCATION_ID + assert metadata.map_frame_id == "map" + + +def test_memory_store_or_update_new_annotation(real_memory): + """Test store_or_update_annotation creates new annotation when none nearby.""" + annotation_id = real_memory.store_or_update_annotation( + object_class="cup", + pose=make_pose(1.0, 2.0), + confidence=0.9, + timestamp=1234567890, + detection_source="GroundingDINO", + source_frame="camera_frame", + location_id=TEST_LOCATION_ID, + ) + assert isinstance(annotation_id, str) + assert len(annotation_id) > 0 + + center = Point(x=1.0, y=2.0, z=0.0) + results = real_memory.query_by_location(center, radius=0.5) + assert len(results) == 1 + assert results[0].object_class == "cup" + assert results[0].confidence == 0.9 + + +def test_memory_store_or_update_merges_nearby(real_memory): + """Test store_or_update_annotation merges nearby duplicate detections.""" + annotation_id1 = real_memory.store_or_update_annotation( + object_class="cup", + pose=make_pose(1.0, 2.0), + confidence=0.7, + timestamp=1234567890, + detection_source="GroundingDINO", + source_frame="camera_frame", + location_id=TEST_LOCATION_ID, + ) + + annotation_id2 = real_memory.store_or_update_annotation( + object_class="cup", + pose=make_pose(1.1, 2.1), + confidence=0.9, + timestamp=1234567900, + detection_source="GroundedSAM", + source_frame="camera_frame", + location_id=TEST_LOCATION_ID, + merge_threshold=0.5, + ) + + assert annotation_id1 == annotation_id2 + + center = Point(x=1.0, y=2.0, z=0.0) + results = real_memory.query_by_location(center, radius=0.5) + assert len(results) == 1 + assert results[0].confidence == 0.9 + assert results[0].detection_source == "GroundedSAM" + + +def test_memory_store_or_update_creates_separate_for_different_classes(real_memory): + """Test store_or_update_annotation creates separate annotations for different classes.""" + annotation_id1 = real_memory.store_or_update_annotation( + object_class="cup", + pose=make_pose(1.0, 2.0), + confidence=0.9, + timestamp=1234567890, + detection_source="GroundingDINO", + source_frame="camera_frame", + location_id=TEST_LOCATION_ID, + ) + + annotation_id2 = real_memory.store_or_update_annotation( + object_class="bottle", + pose=make_pose(1.1, 2.1), + confidence=0.8, + timestamp=1234567900, + detection_source="GroundingDINO", + source_frame="camera_frame", + location_id=TEST_LOCATION_ID, + merge_threshold=0.5, + ) + + assert annotation_id1 != annotation_id2 + + center = Point(x=1.0, y=2.0, z=0.0) + results = real_memory.query_by_location(center, radius=0.5) + assert len(results) == 2 + + +def test_memory_end_to_end_store_and_query(real_memory): + """Test end-to-end: store multiple annotations and query by class.""" + real_memory.store_annotation( + object_class="cup", + pose=make_pose(1.0, 1.0), + confidence=0.9, + timestamp=1234567890, + detection_source="GroundingDINO", + source_frame="camera_frame", + location_id=TEST_LOCATION_ID, + ) + + real_memory.store_annotation( + object_class="bottle", + pose=make_pose(2.0, 2.0), + confidence=0.8, + timestamp=1234567891, + detection_source="GroundingDINO", + source_frame="camera_frame", + location_id=TEST_LOCATION_ID, + ) + + real_memory.store_annotation( + object_class="cup", + pose=make_pose(3.0, 3.0), + confidence=0.85, + timestamp=1234567892, + detection_source="GroundedSAM", + source_frame="camera_frame", + location_id=TEST_LOCATION_ID, + ) + + cups = real_memory.query_by_class("cup") + assert len(cups) == 2 + assert all(c.object_class == "cup" for c in cups) + + bottles = real_memory.query_by_class("bottle") + assert len(bottles) == 1 + assert bottles[0].object_class == "bottle" diff --git a/tests/rai_semap/test_node.py b/tests/rai_semap/test_node.py new file mode 100644 index 000000000..179643141 --- /dev/null +++ b/tests/rai_semap/test_node.py @@ -0,0 +1,544 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile +from pathlib import Path + +# Testing framework +import pytest + +# ROS2 core +import rclpy + +# ROS2 message types +from geometry_msgs.msg import Pose, Quaternion +from nav_msgs.msg import MapMetaData, OccupancyGrid + +# Local imports +from rai.communication.ros2 import ROS2Connector +from std_msgs.msg import Header +from vision_msgs.msg import Detection2D, ObjectHypothesis, ObjectHypothesisWithPose + +# RAI interfaces +from rai_interfaces.msg import RAIDetectionArray +from rai_semap.ros2.node import SemanticMapNode + + +@pytest.fixture(scope="module") +def ros2_context(): + """Initialize ROS2 context for testing.""" + rclpy.init() + yield + rclpy.shutdown() + + +@pytest.fixture +def temp_db_path(): + """Create a temporary database path for testing.""" + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + db_path = f.name + yield db_path + Path(db_path).unlink(missing_ok=True) + + +@pytest.fixture +def node(ros2_context, temp_db_path): + """Create a SemanticMapNode instance for testing. + + Uses single_threaded executor to avoid executor performance warnings + in simple unit tests that don't need multi-threaded execution. + """ + connector = ROS2Connector( + node_name="rai_semap_node", executor_type="single_threaded" + ) + node = SemanticMapNode(connector=connector, database_path=temp_db_path) + yield node + node.connector.shutdown() + + +# Test helper functions +def create_detection_message(frame_id: str, detections: list) -> RAIDetectionArray: + """Create a RAIDetectionArray message with given detections.""" + msg = RAIDetectionArray() + msg.header = Header() + msg.header.frame_id = frame_id + msg.detections = detections + return msg + + +def create_detection( + frame_id: str, + class_id: str, + score: float, + x: float = 1.0, + y: float = 2.0, + z: float = 0.0, + bbox_size_x: float = 100.0, + bbox_size_y: float = 100.0, +) -> Detection2D: + """Create a Detection2D with ObjectHypothesisWithPose.""" + detection = Detection2D() + detection.header = Header() + detection.header.frame_id = frame_id + + hypothesis = ObjectHypothesis() + hypothesis.class_id = class_id + hypothesis.score = score + + result = ObjectHypothesisWithPose() + result.hypothesis = hypothesis + result.pose.pose = Pose() + result.pose.pose.position.x = x + result.pose.pose.position.y = y + result.pose.pose.position.z = z + result.pose.pose.orientation = Quaternion(w=1.0) + + detection.results = [result] + detection.bbox.size_x = bbox_size_x + detection.bbox.size_y = bbox_size_y + return detection + + +# Tests for refactored methods +def test_validate_and_extract_detection_data_success(node): + """Test successful validation and extraction of detection data.""" + node.connector.node.set_parameters( + [ + rclpy.parameter.Parameter( + "confidence_threshold", rclpy.parameter.Parameter.Type.DOUBLE, 0.5 + ), + rclpy.parameter.Parameter( + "min_bbox_area", rclpy.parameter.Parameter.Type.DOUBLE, 100.0 + ), + ] + ) + + detection = create_detection( + "camera_frame", "cup", score=0.9, bbox_size_x=200.0, bbox_size_y=200.0 + ) + + result = node._validate_and_extract_detection_data( + detection, confidence_threshold=0.5, default_frame_id="camera_frame" + ) + + assert result is not None + object_class, confidence, source_frame, pose = result + assert object_class == "cup" + assert confidence == 0.9 + assert source_frame == "camera_frame" + assert pose.position.x == 1.0 + assert pose.position.y == 2.0 + assert pose.position.z == 0.0 + + +def test_validate_and_extract_detection_data_low_confidence(node): + """Test that low confidence detections are rejected.""" + node.connector.node.set_parameters( + [ + rclpy.parameter.Parameter( + "confidence_threshold", rclpy.parameter.Parameter.Type.DOUBLE, 0.8 + ), + ] + ) + + detection = create_detection("camera_frame", "cup", score=0.5) + + result = node._validate_and_extract_detection_data( + detection, confidence_threshold=0.8, default_frame_id="camera_frame" + ) + + assert result is None + + +def test_validate_and_extract_detection_data_frame_id_fallback(node): + """Test that frame_id fallback works.""" + detection = create_detection("", "cup", score=0.9) + detection.header.frame_id = "" + + result = node._validate_and_extract_detection_data( + detection, confidence_threshold=0.5, default_frame_id="default_frame" + ) + + assert result is not None + _, _, source_frame, _ = result + assert source_frame == "default_frame" + + +def test_validate_and_transform_pose_empty_pose(node): + """Test that empty poses are rejected.""" + pose = Pose() + pose.position.x = 0.0 + pose.position.y = 0.0 + pose.position.z = 0.0 + + detection = create_detection("camera_frame", "cup", score=0.9, x=0.0, y=0.0, z=0.0) + + result = node._validate_and_transform_pose( + pose, "camera_frame", "map", "cup", detection + ) + + assert result is None + + +def test_extract_pointcloud_features_disabled(node): + """Test that point cloud extraction returns None when disabled.""" + node.connector.node.set_parameters( + [ + rclpy.parameter.Parameter( + "use_pointcloud_dedup", + rclpy.parameter.Parameter.Type.BOOL, + False, + ), + ] + ) + + detection = create_detection("camera_frame", "cup", score=0.9) + + features, centroid, size = node._extract_pointcloud_features( + detection, "camera_frame", "map" + ) + + assert features is None + assert centroid is None + assert size is None + + +def test_determine_merge_decision_no_nearby(node): + """Test merge decision when no nearby annotations exist.""" + should_merge, existing_id = node._determine_merge_decision( + nearby=[], pointcloud_features=None, pc_size=None, use_pointcloud=False + ) + + assert should_merge is False + assert existing_id is None + + +def test_determine_merge_decision_with_nearby_no_pcl(node): + """Test merge decision with nearby annotation but no point cloud.""" + from rai.types import Point, Pose + + from rai_semap.core.semantic_map_memory import SemanticAnnotation + + existing = SemanticAnnotation( + id="test-id", + object_class="cup", + pose=Pose(position=Point(x=1.0, y=2.0, z=0.0)), + confidence=0.8, + timestamp=1234567890, + detection_source="test", + source_frame="map", + location_id="default_location", + ) + + should_merge, existing_id = node._determine_merge_decision( + nearby=[existing], + pointcloud_features=None, + pc_size=None, + use_pointcloud=False, + ) + + assert should_merge is True + assert existing_id == "test-id" + + +def test_determine_merge_decision_with_pc_size_match(node): + """Test merge decision with point cloud size matching.""" + from rai.types import Point, Pose + + from rai_semap.core.semantic_map_memory import SemanticAnnotation + + existing = SemanticAnnotation( + id="test-id", + object_class="cup", + pose=Pose(position=Point(x=1.0, y=2.0, z=0.0)), + confidence=0.8, + timestamp=1234567890, + detection_source="test", + source_frame="map", + location_id="default_location", + metadata={"pointcloud": {"size_3d": 0.5}}, + ) + + pointcloud_features = {"size_3d": 0.5, "centroid": {}, "point_count": 100} + + should_merge, existing_id = node._determine_merge_decision( + nearby=[existing], + pointcloud_features=pointcloud_features, + pc_size=0.5, + use_pointcloud=True, + ) + + assert should_merge is True + assert existing_id == "test-id" + + +def test_store_or_update_annotation_new(node): + """Test storing a new annotation.""" + import rclpy.time + from rai.types import Point, Pose + + node.connector.node.set_parameters( + [ + rclpy.parameter.Parameter( + "map_frame_id", rclpy.parameter.Parameter.Type.STRING, "camera_frame" + ), + ] + ) + + pose = Pose(position=Point(x=1.0, y=2.0, z=0.0)) + timestamp_ros = rclpy.time.Time() + timestamp = timestamp_ros.nanoseconds / 1e9 # Convert to Unix timestamp (seconds) + + initial_count = len(node.memory.query_by_class("test_object")) + success = node._store_or_update_annotation( + object_class="test_object", + confidence=0.9, + pose_in_map_frame=pose, + pointcloud_centroid_map=None, + pointcloud_features=None, + pc_size=None, + timestamp=timestamp, + detection_source="test", + source_frame="camera_frame", + vision_detection_id=None, + ) + + assert success is True + final_count = len(node.memory.query_by_class("test_object")) + assert final_count == initial_count + 1 + + +def test_store_or_update_annotation_update_existing(node): + """Test updating an existing annotation.""" + import rclpy.time + from rai.types import Point, Pose + + node.connector.node.set_parameters( + [ + rclpy.parameter.Parameter( + "map_frame_id", rclpy.parameter.Parameter.Type.STRING, "camera_frame" + ), + ] + ) + + # First, store an annotation + pose1 = Pose(position=Point(x=1.0, y=2.0, z=0.0)) + timestamp_ros = rclpy.time.Time() + timestamp = timestamp_ros.nanoseconds / 1e9 # Convert to Unix timestamp (seconds) + + node._store_or_update_annotation( + object_class="test_object", + confidence=0.8, + pose_in_map_frame=pose1, + pointcloud_centroid_map=None, + pointcloud_features=None, + pc_size=None, + timestamp=timestamp, + detection_source="test", + source_frame="camera_frame", + vision_detection_id=None, + ) + + # Now update it with higher confidence + pose2 = Pose(position=Point(x=1.01, y=2.01, z=0.0)) + success = node._store_or_update_annotation( + object_class="test_object", + confidence=0.95, + pose_in_map_frame=pose2, + pointcloud_centroid_map=None, + pointcloud_features=None, + pc_size=None, + timestamp=timestamp, + detection_source="test", + source_frame="camera_frame", + vision_detection_id=None, + ) + + assert success is True + annotations = node.memory.query_by_class("test_object") + assert len(annotations) == 1 + assert annotations[0].confidence == 0.95 + + +def test_node_creation(node): + """Test that SemanticMapNode can be created with default parameters.""" + assert isinstance(node.connector, ROS2Connector) + assert node.connector.node.get_name() == "rai_semap_node" + + expected_params = [ + "backend_type", + "database_path", + "confidence_threshold", + "detection_topic", + "map_topic", + "map_frame_id", + "location_id", + "map_resolution", + ] + for param in expected_params: + assert node.connector.node.has_parameter(param) + + +def test_node_parameter_defaults(node, temp_db_path): + """Test that node parameters have correct default values.""" + assert ( + node.connector.node.get_parameter("backend_type") + .get_parameter_value() + .string_value + == "sqlite" + ) + assert ( + node.connector.node.get_parameter("database_path") + .get_parameter_value() + .string_value + == temp_db_path + ) + assert ( + node.connector.node.get_parameter("confidence_threshold") + .get_parameter_value() + .double_value + == 0.5 + ) + assert ( + node.connector.node.get_parameter("detection_topic") + .get_parameter_value() + .string_value + == "/detection_array" + ) + assert ( + node.connector.node.get_parameter("map_topic") + .get_parameter_value() + .string_value + == "/map" + ) + assert ( + node.connector.node.get_parameter("map_frame_id") + .get_parameter_value() + .string_value + == "map" + ) + assert ( + node.connector.node.get_parameter("location_id") + .get_parameter_value() + .string_value + == "default_location" + ) + assert ( + node.connector.node.get_parameter("map_resolution") + .get_parameter_value() + .double_value + == 0.05 + ) + + +def test_node_memory_initialization(node): + """Test that semantic map memory is properly initialized.""" + assert node.memory is not None + assert node.memory.location_id == "default_location" + assert node.memory.map_frame_id == "map" + assert node.memory.resolution == 0.05 + + +def test_node_subscriptions_created(node): + """Test that subscriptions are created.""" + assert node.detection_subscription is not None + assert node.map_subscription is not None + + +def test_detection_callback_low_confidence(node): + """Test that detections below confidence threshold are filtered out.""" + node.connector.node.set_parameters( + [ + rclpy.parameter.Parameter( + "confidence_threshold", rclpy.parameter.Parameter.Type.DOUBLE, 0.8 + ), + ] + ) + + detection = create_detection("camera_frame", "cup", score=0.5) + msg = create_detection_message("camera_frame", [detection]) + + initial_count = len(node.memory.query_by_class("cup")) + node.detection_callback(msg) + final_count = len(node.memory.query_by_class("cup")) + + assert final_count == initial_count + + +def test_detection_callback_high_confidence(node): + """Test that high-confidence detections are processed.""" + node.connector.node.set_parameters( + [ + rclpy.parameter.Parameter( + "confidence_threshold", rclpy.parameter.Parameter.Type.DOUBLE, 0.5 + ), + rclpy.parameter.Parameter( + "map_frame_id", rclpy.parameter.Parameter.Type.STRING, "camera_frame" + ), + ] + ) + + detection = create_detection("camera_frame", "bottle", score=0.9) + msg = create_detection_message("GroundingDINO", [detection]) + + initial_count = len(node.memory.query_by_class("bottle")) + node.detection_callback(msg) + final_count = len(node.memory.query_by_class("bottle")) + + assert final_count >= initial_count + + +def test_map_callback_updates_metadata(node): + """Test that map callback updates metadata.""" + node.connector.node.set_parameters( + [ + rclpy.parameter.Parameter( + "map_frame_id", rclpy.parameter.Parameter.Type.STRING, "map" + ), + ] + ) + + msg = OccupancyGrid() + msg.header = Header() + msg.header.frame_id = "map" + msg.info = MapMetaData() + msg.info.resolution = 0.1 + + initial_resolution = node.memory.resolution + node.map_callback(msg) + + assert node.memory.map_frame_id == "map" + assert node.memory.resolution == 0.1 + assert node.memory.resolution != initial_resolution + + +def test_detection_callback_empty_detections(node): + """Test that empty detection arrays are handled gracefully.""" + msg = create_detection_message("camera_frame", []) + node.detection_callback(msg) + + +def test_detection_callback_no_results(node): + """Test that detections without results are skipped.""" + detection = Detection2D() + detection.header = Header() + detection.results = [] + + msg = create_detection_message("camera_frame", [detection]) + + initial_count = len(node.memory.query_by_class("cup")) + node.detection_callback(msg) + final_count = len(node.memory.query_by_class("cup")) + + assert final_count == initial_count diff --git a/tests/rai_semap/test_perception_utils.py b/tests/rai_semap/test_perception_utils.py new file mode 100644 index 000000000..c3fe12fd3 --- /dev/null +++ b/tests/rai_semap/test_perception_utils.py @@ -0,0 +1,113 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +from cv_bridge import CvBridge +from sensor_msgs.msg import CameraInfo +from std_msgs.msg import Header +from vision_msgs.msg import BoundingBox2D, Detection2D, Point2D + +from rai_semap.ros2.perception_utils import ( + compute_3d_pose_from_bbox, + extract_pointcloud_from_bbox, +) + + +@pytest.fixture +def bridge(): + """Create a CvBridge instance.""" + return CvBridge() + + +@pytest.fixture +def camera_info(): + """Create a basic camera info message.""" + info = CameraInfo() + info.width = 640 + info.height = 480 + info.k = [500.0, 0.0, 320.0, 0.0, 500.0, 240.0, 0.0, 0.0, 1.0] + return info + + +@pytest.fixture +def depth_image(bridge): + """Create a depth image message.""" + depth_array = np.ones((480, 640), dtype=np.uint16) * 1000 + depth_msg = bridge.cv2_to_imgmsg(depth_array, encoding="16UC1") + depth_msg.header = Header() + depth_msg.header.frame_id = "camera_frame" + return depth_msg + + +@pytest.fixture +def detection2d(): + """Create a Detection2D message.""" + detection = Detection2D() + detection.bbox = BoundingBox2D() + detection.bbox.center.position = Point2D(x=320.0, y=240.0) + detection.bbox.size_x = 100.0 + detection.bbox.size_y = 80.0 + return detection + + +def test_compute_3d_pose_from_bbox(bridge, camera_info, depth_image): + """Test computing 3D pose from bounding box center.""" + bbox_center_x = 320.0 + bbox_center_y = 240.0 + + pose = compute_3d_pose_from_bbox( + bbox_center_x, bbox_center_y, depth_image, camera_info, bridge + ) + + assert pose is not None + assert pose.position.z > 0 + assert pose.orientation.w == 1.0 + + +def test_compute_3d_pose_from_bbox_out_of_bounds(bridge, camera_info, depth_image): + """Test computing 3D pose with out-of-bounds coordinates.""" + bbox_center_x = 1000.0 + bbox_center_y = 1000.0 + + pose = compute_3d_pose_from_bbox( + bbox_center_x, bbox_center_y, depth_image, camera_info, bridge + ) + + assert pose is None + + +def test_extract_pointcloud_from_bbox(bridge, camera_info, depth_image, detection2d): + """Test extracting point cloud from bounding box.""" + result = extract_pointcloud_from_bbox(detection2d, depth_image, camera_info, bridge) + + assert result is not None + centroid, size, point_count = result + assert point_count > 0 + assert centroid.x is not None + assert centroid.y is not None + assert centroid.z is not None + assert size >= 0 + + +def test_extract_pointcloud_from_bbox_empty_depth(bridge, camera_info, detection2d): + """Test extracting point cloud with empty depth image.""" + depth_array = np.zeros((480, 640), dtype=np.uint16) + depth_msg = bridge.cv2_to_imgmsg(depth_array, encoding="16UC1") + depth_msg.header = Header() + depth_msg.header.frame_id = "camera_frame" + + result = extract_pointcloud_from_bbox(detection2d, depth_msg, camera_info, bridge) + + assert result is None diff --git a/tests/rai_semap/test_visualizer.py b/tests/rai_semap/test_visualizer.py new file mode 100644 index 000000000..b6cea38b3 --- /dev/null +++ b/tests/rai_semap/test_visualizer.py @@ -0,0 +1,144 @@ +# Copyright (C) 2025 Julia Jia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile +from pathlib import Path + +import pytest +import rclpy +import yaml +from std_msgs.msg import ColorRGBA + +from rai_semap.ros2.visualizer import SemanticMapVisualizer + + +def set_parameter(node, name: str, value, param_type): + """Helper to set a single parameter on a node.""" + node.set_parameters([rclpy.parameter.Parameter(name, param_type, value)]) + + +@pytest.fixture +def temp_config_file(): + """Create a temporary config file for testing.""" + config_data = { + "default_color": [0.5, 0.5, 0.5, 0.8], + "class_colors": { + "chair": [0.2, 0.8, 0.4, 0.8], + "table": [0.6, 0.2, 0.8, 0.8], + }, + } + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(config_data, f) + config_path = f.name + yield config_path + Path(config_path).unlink(missing_ok=True) + + +@pytest.fixture +def visualizer(ros2_context, temp_db_path): + """Create a SemanticMapVisualizer instance for testing.""" + node = SemanticMapVisualizer() + set_parameter( + node, "database_path", temp_db_path, rclpy.parameter.Parameter.Type.STRING + ) + yield node + node.destroy_node() + + +def test_visualizer_initialization(visualizer): + """Test that SemanticMapVisualizer initializes correctly.""" + assert visualizer is not None + assert visualizer.get_name() == "semantic_map_visualizer" + assert visualizer.class_colors is not None + assert visualizer.default_color is not None + assert visualizer.marker_publisher is not None + + +def test_generate_class_colors(visualizer): + """Test generating class colors from config.""" + colors, default_color = visualizer._generate_class_colors() + + assert isinstance(colors, dict) + assert isinstance(default_color, ColorRGBA) + assert default_color.r == 0.5 + assert default_color.g == 0.5 + assert default_color.b == 0.5 + assert default_color.a == 0.8 + + +def test_generate_class_colors_from_custom_config( + ros2_context, temp_db_path, temp_config_file +): + """Test generating class colors from custom config file.""" + node = SemanticMapVisualizer() + set_parameter( + node, "database_path", temp_db_path, rclpy.parameter.Parameter.Type.STRING + ) + set_parameter( + node, + "class_colors_config", + temp_config_file, + rclpy.parameter.Parameter.Type.STRING, + ) + + colors, default_color = node._generate_class_colors() + + assert "chair" in colors + assert "table" in colors + assert colors["chair"].r == 0.2 + assert colors["chair"].g == 0.8 + assert colors["chair"].b == 0.4 + assert default_color.r == 0.5 + + node.destroy_node() + + +def test_get_class_color(visualizer): + """Test getting color for object class.""" + visualizer.class_colors["test_class"] = ColorRGBA(r=1.0, g=0.0, b=0.0, a=1.0) + + color = visualizer._get_class_color("test_class") + assert color.r == 1.0 + assert color.g == 0.0 + assert color.b == 0.0 + + unknown_color = visualizer._get_class_color("unknown_class") + assert unknown_color == visualizer.default_color + + +def test_get_string_parameter(visualizer): + """Test getting string parameter.""" + set_parameter( + visualizer, + "database_path", + "/test/path.db", + rclpy.parameter.Parameter.Type.STRING, + ) + assert visualizer._get_string_parameter("database_path") == "/test/path.db" + + +def test_get_double_parameter(visualizer): + """Test getting double parameter.""" + set_parameter( + visualizer, "map_resolution", 0.1, rclpy.parameter.Parameter.Type.DOUBLE + ) + assert visualizer._get_double_parameter("map_resolution") == 0.1 + + +def test_get_bool_parameter(visualizer): + """Test getting bool parameter.""" + set_parameter( + visualizer, "show_text_labels", False, rclpy.parameter.Parameter.Type.BOOL + ) + assert visualizer._get_bool_parameter("show_text_labels") is False diff --git a/tests/smoke/import_test.py b/tests/smoke/import_test.py index 901008e89..b651d008e 100644 --- a/tests/smoke/import_test.py +++ b/tests/smoke/import_test.py @@ -51,6 +51,9 @@ def import_submodules(package: ModuleType) -> None: continue if path.is_file() and path.suffix != ".py" or path.name == "__init__.py": continue + # Skip files with dots in name (e.g., semap.launch.py) as they can't be imported as modules + if path.is_file() and "." in path.stem: + continue relative_path = str(path.relative_to(package_path)) subpage_name = relative_path.replace(os.path.sep, ".").replace(".py", "")