From 2ce54020c95b57f86c3399e2a5de169a133a015f Mon Sep 17 00:00:00 2001 From: Adityavardhan Agrawal Date: Sat, 6 Dec 2025 15:20:23 -0800 Subject: [PATCH] Enhance documentation for folder management and nested folder support across SDK methods. Update parameters to include folder_name and folder_depth for scoping in various document and graph operations. --- python-sdk/add_document_to_folder.mdx | 6 ++-- python-sdk/batch_get_chunks.mdx | 4 +-- python-sdk/batch_get_documents.mdx | 5 +-- python-sdk/create_folder.mdx | 27 +++++++++++--- python-sdk/create_graph.mdx | 9 +++++ python-sdk/delete_folder.mdx | 4 ++- python-sdk/folders.mdx | 42 ++++++++++++++++------ python-sdk/get_folder.mdx | 6 ++-- python-sdk/get_folders_details.mdx | 6 ++-- python-sdk/get_folders_summary.mdx | 3 ++ python-sdk/get_graph.mdx | 27 ++++++++++++-- python-sdk/get_graph_status.mdx | 15 +++++--- python-sdk/get_graph_visualization.mdx | 7 ++-- python-sdk/list_documents.mdx | 22 ++++++++++-- python-sdk/list_folders.mdx | 6 ++++ python-sdk/list_graphs.mdx | 22 ++++++++++-- python-sdk/morphik.mdx | 2 ++ python-sdk/query.mdx | 19 +++++++++- python-sdk/query_document.mdx | 4 +-- python-sdk/remove_document_from_folder.mdx | 6 ++-- python-sdk/retrieve_chunks.mdx | 17 ++++++++- python-sdk/retrieve_chunks_grouped.mdx | 11 ++++-- python-sdk/retrieve_docs.mdx | 17 ++++++++- python-sdk/search_documents.mdx | 13 ++++--- python-sdk/update_graph.mdx | 13 +++++-- self-hosting.mdx | 28 +++++++++++++++ 26 files changed, 282 insertions(+), 59 deletions(-) diff --git a/python-sdk/add_document_to_folder.mdx b/python-sdk/add_document_to_folder.mdx index eab7b60..8de7cc0 100644 --- a/python-sdk/add_document_to_folder.mdx +++ b/python-sdk/add_document_to_folder.mdx @@ -24,7 +24,7 @@ description: "Add an existing document to a folder" ## Parameters -- `folder_id_or_name` (str): Folder identifier. Accepts either the folder's UUID or its name. +- `folder_id_or_name` (str): Folder identifier. Accepts the folder's UUID, name, or canonical path (e.g., `/projects/alpha/specs`; leading slash optional). - `document_id` (str): Identifier of the document to move into the folder. ## Returns @@ -42,7 +42,7 @@ description: "Add an existing document to a folder" folder = db.get_folder("marketing_docs") db.add_document_to_folder(folder.id, "doc_123") - db.add_document_to_folder("marketing_docs", "doc_456") + db.add_document_to_folder("/projects/alpha/specs", "doc_456") ``` @@ -53,7 +53,7 @@ description: "Add an existing document to a folder" folder = await db.get_folder("marketing_docs") await db.add_document_to_folder(folder.id, "doc_123") - await db.add_document_to_folder("marketing_docs", "doc_456") + await db.add_document_to_folder("/projects/alpha/specs", "doc_456") ``` diff --git a/python-sdk/batch_get_chunks.mdx b/python-sdk/batch_get_chunks.mdx index 9a85d22..04073f1 100644 --- a/python-sdk/batch_get_chunks.mdx +++ b/python-sdk/batch_get_chunks.mdx @@ -29,7 +29,7 @@ description: "Retrieve specific chunks by their document ID and chunk number" ## Parameters - `sources` (List[Union[ChunkSource, Dict[str, Any]]]): List of ChunkSource objects or dictionaries with document_id and chunk_number -- `folder_name` (str | List[str], optional): Optional folder scope. Accepts a single folder name or a list of folder names. +- `folder_name` (str | List[str], optional): Optional folder scope. Accepts canonical paths or a list of paths/names. - `use_colpali` (bool, optional): Whether to request multimodal chunks when available. Defaults to True. - `output_format` (str, optional): Controls how image chunks are returned. Set to `"url"` to receive presigned URLs; omit or set to `"base64"` (default) to receive base64 content. @@ -100,4 +100,4 @@ Each `FinalChunkResult` object in the returned list has the following properties - `metadata` (Dict[str, Any]): Document metadata - `content_type` (str): Content type - `filename` (Optional[str]): Original filename -- `download_url` (Optional[str]): URL to download full document \ No newline at end of file +- `download_url` (Optional[str]): URL to download full document diff --git a/python-sdk/batch_get_documents.mdx b/python-sdk/batch_get_documents.mdx index 7cace3c..8bc1da3 100644 --- a/python-sdk/batch_get_documents.mdx +++ b/python-sdk/batch_get_documents.mdx @@ -25,7 +25,7 @@ description: "Retrieve multiple documents by their IDs in a single batch operati ## Parameters - `document_ids` (List[str]): List of document IDs to retrieve -- `folder_name` (str | List[str], optional): Optional folder scope. Accepts a single folder name or a list of folder names. +- `folder_name` (str | List[str], optional): Optional folder scope. Accepts canonical paths or a list of paths/names. ## Returns @@ -67,4 +67,5 @@ Each `Document` object in the returned list has the following properties: - `metadata` (Dict[str, Any]): User-defined metadata - `storage_info` (Dict[str, str]): Storage-related information - `system_metadata` (Dict[str, Any]): System-managed metadata -- `chunk_ids` (List[str]): IDs of document chunks +- `chunk_ids` (List[str]): IDs of document chunks +- `folder_path` (Optional[str]): Canonical folder path (includes nested parents when scoped) diff --git a/python-sdk/create_folder.mdx b/python-sdk/create_folder.mdx index 505ad75..350993a 100644 --- a/python-sdk/create_folder.mdx +++ b/python-sdk/create_folder.mdx @@ -9,6 +9,8 @@ description: "Create a new folder for organizing documents" def create_folder( name: str, description: Optional[str] = None, + full_path: Optional[str] = None, + parent_id: Optional[str] = None, ) -> Folder ``` @@ -17,6 +19,8 @@ description: "Create a new folder for organizing documents" async def create_folder( name: str, description: Optional[str] = None, + full_path: Optional[str] = None, + parent_id: Optional[str] = None, ) -> Folder ``` @@ -24,10 +28,10 @@ description: "Create a new folder for organizing documents" ## Parameters -- `name` (str): Folder name. Must be unique per app and cannot contain `/`. +- `name` (str): Folder name (leaf segment when using nested paths). If `full_path` is omitted, this becomes the canonical path. - `description` (str, optional): Optional description of the folder. - -> ⚠️ Nested folders are not supported. Use underscores (`_`) to mimic hierarchy (e.g., `team_reports_q1`). +- `full_path` (str, optional): Canonical folder path (e.g., `"/projects/alpha/specs"`). Leading slash is optional; parents are created automatically. +- `parent_id` (str, optional): Explicit parent folder ID. Usually not needed—`full_path` handles hierarchy creation. ## Returns @@ -39,9 +43,17 @@ description: "Create a new folder for organizing documents" ```python from morphik import Morphik - + db = Morphik() folder = db.create_folder("marketing_docs", description="All marketing collateral") + + # Create a nested folder (parents auto-created) + nested = db.create_folder( + name="specs", + full_path="/projects/alpha/specs", + description="All project specs", + ) + print(nested.full_path) # "/projects/alpha/specs" ``` @@ -50,6 +62,13 @@ description: "Create a new folder for organizing documents" async with AsyncMorphik() as db: folder = await db.create_folder("marketing_docs", description="All marketing collateral") + + nested = await db.create_folder( + name="specs", + full_path="/projects/alpha/specs", + description="All project specs", + ) + print(nested.full_path) # "/projects/alpha/specs" ``` diff --git a/python-sdk/create_graph.mdx b/python-sdk/create_graph.mdx index 32dad8c..c701eb7 100644 --- a/python-sdk/create_graph.mdx +++ b/python-sdk/create_graph.mdx @@ -11,6 +11,8 @@ description: "Create a graph from documents" filters: Optional[Dict[str, Any]] = None, documents: Optional[List[str]] = None, prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None, + folder_name: Optional[Union[str, List[str]]] = None, + end_user_id: Optional[str] = None, ) -> Graph ``` @@ -21,6 +23,8 @@ description: "Create a graph from documents" filters: Optional[Dict[str, Any]] = None, documents: Optional[List[str]] = None, prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]] = None, + folder_name: Optional[Union[str, List[str]]] = None, + end_user_id: Optional[str] = None, ) -> Graph ``` @@ -32,6 +36,8 @@ description: "Create a graph from documents" - `filters` (Dict[str, Any], optional): Optional metadata filters to determine which documents to include - `documents` (List[str], optional): Optional list of specific document IDs to include - `prompt_overrides` (GraphPromptOverrides | Dict[str, Any], optional): Optional customizations for entity extraction and resolution prompts +- `folder_name` (str | List[str], optional): Optional folder scope (canonical path or list of paths/names) +- `end_user_id` (str, optional): Optional end-user scope ## Returns @@ -57,6 +63,7 @@ graph is done, or poll `graph.is_processing` / `graph.is_completed`. graph = db.create_graph( name="research_graph", filters={"category": "research"}, + folder_name="/projects/alpha", ) # Option 1: Block until finished @@ -127,6 +134,7 @@ graph is done, or poll `graph.is_processing` / `graph.is_completed`. graph = await db.create_graph( name="research_graph", filters={"category": "research"}, + folder_name="/projects/alpha", ) # Wait for completion @@ -206,3 +214,4 @@ The returned `Graph` object has the following properties: - `created_at` (datetime): Creation timestamp - `updated_at` (datetime): Last update timestamp - `owner` (Dict[str, str]): Graph owner information +- `folder_path` (Optional[str]): Canonical folder path for the graph (if scoped) diff --git a/python-sdk/delete_folder.mdx b/python-sdk/delete_folder.mdx index 090ac24..c71aaa0 100644 --- a/python-sdk/delete_folder.mdx +++ b/python-sdk/delete_folder.mdx @@ -22,7 +22,7 @@ description: "Delete a folder and its documents" ## Parameters -- `folder_id_or_name` (str): Folder identifier. Accepts either the folder's UUID or its name. +- `folder_id_or_name` (str): Folder identifier. Accepts the folder's UUID, name, or canonical path (e.g., `/projects/alpha/specs`; leading slash optional). ## Returns @@ -37,6 +37,7 @@ description: "Delete a folder and its documents" db = Morphik() db.delete_folder("marketing_docs") + db.delete_folder("/projects/alpha/specs") db.delete_folder("bfd74128-8539-4050-8938-542d6ee68be0") ``` @@ -46,6 +47,7 @@ description: "Delete a folder and its documents" async with AsyncMorphik() as db: await db.delete_folder("marketing_docs") + await db.delete_folder("/projects/alpha/specs") await db.delete_folder("bfd74128-8539-4050-8938-542d6ee68be0") ``` diff --git a/python-sdk/folders.mdx b/python-sdk/folders.mdx index c172a07..9f1667d 100644 --- a/python-sdk/folders.mdx +++ b/python-sdk/folders.mdx @@ -7,7 +7,7 @@ description: "Organize and isolate data into logical folder groups in Morphik" Folders in Morphik provide a way to organize documents into logical groups. This is particularly useful for multi-project environments where you want to maintain separation between different contexts. Documents within a folder are isolated from those in other folders, allowing for clean organization and data separation. -> ℹ️ All folder APIs accept **either the folder’s UUID or its name**. Use whichever identifier you already have—Morphik resolves it automatically. +> ℹ️ All folder APIs accept **folder UUIDs, names, or canonical paths** (e.g., `"/projects/alpha/specs"`). Folder objects expose `full_path`, `parent_id`, `depth`, and `child_count`; documents and graphs expose `folder_path` to mirror server responses. ## Creating and Accessing Folders @@ -20,9 +20,12 @@ Folders in Morphik provide a way to organize documents into logical groups. This # Create a new folder folder = db.create_folder("marketing_docs") + + # Create a nested folder (parents created automatically) + nested = db.create_folder(full_path="/projects/alpha/specs") - # Access an existing folder by name or UUID - folder = db.get_folder("marketing_docs") + # Access an existing folder by name/path or UUID + folder = db.get_folder("/projects/alpha") folder_by_id = db.get_folder(folder.id) ``` @@ -32,10 +35,12 @@ Folders in Morphik provide a way to organize documents into logical groups. This async with AsyncMorphik() as db: # Create a new folder - folder = db.create_folder("marketing_docs") + folder = await db.create_folder("marketing_docs") + + nested = await db.create_folder(full_path="/projects/alpha/specs") - # Access an existing folder by name or UUID - folder = await db.get_folder("marketing_docs") + # Access an existing folder by name/path or UUID + folder = await db.get_folder("/projects/alpha") folder_by_id = await db.get_folder(folder.id) ``` @@ -88,6 +93,23 @@ Once you have a folder object, all operations performed on it are scoped to that +## Nested Folders and Scope Depth + +Folders can be nested arbitrarily. Use canonical paths (leading slash optional) to address them, and include descendant folders in retrieval/listing by setting `folder_depth`: + +```python +folder = db.create_folder(full_path="/projects/alpha/specs") + +# Query across /projects/alpha and all children +chunks = db.retrieve_chunks( + query="design notes", + folder_name="/projects/alpha", + folder_depth=-1, # -1: all descendants, 0/None: exact only, n>0: include up to n levels +) +``` + +Folder-scoped helpers inherit the path automatically, so `folder.retrieve_chunks(..., folder_depth=-1)` will include its children. + ## Folder Methods All the core document operations available on the main Morphik client are also available on folder objects, but they are automatically scoped to the specific folder: @@ -108,7 +130,7 @@ All the core document operations available on the main Morphik client are also a ## Managing Existing Documents and Folders -You can move previously ingested documents into a folder, remove them, or delete the entire folder. The SDK methods accept a folder name or UUID. +You can move previously ingested documents into a folder, remove them, or delete the entire folder. The SDK methods accept a folder UUID, name, or canonical path. @@ -120,9 +142,9 @@ You can move previously ingested documents into a folder, remove them, or delete document_id = "doc_123" - # Add an existing document to the folder (name or UUID works) + # Add an existing document to the folder (name/path or UUID works) db.add_document_to_folder(folder.id, document_id) - db.add_document_to_folder("marketing_docs", document_id) + db.add_document_to_folder("/projects/alpha/specs", document_id) # Remove the document from the folder db.remove_document_from_folder("marketing_docs", document_id) @@ -140,7 +162,7 @@ You can move previously ingested documents into a folder, remove them, or delete document_id = "doc_123" await db.add_document_to_folder(folder.id, document_id) - await db.add_document_to_folder("marketing_docs", document_id) + await db.add_document_to_folder("/projects/alpha/specs", document_id) await db.remove_document_from_folder("marketing_docs", document_id) diff --git a/python-sdk/get_folder.mdx b/python-sdk/get_folder.mdx index ab838d9..fc60438 100644 --- a/python-sdk/get_folder.mdx +++ b/python-sdk/get_folder.mdx @@ -1,6 +1,6 @@ --- title: "get_folder" -description: "Retrieve a folder by name or UUID" +description: "Retrieve a folder by name, canonical path, or UUID" --- @@ -22,12 +22,14 @@ description: "Retrieve a folder by name or UUID" ## Parameters -- `folder_id_or_name` (str): Folder identifier. Accepts either the folder's UUID or its name. +- `folder_id_or_name` (str): Folder identifier. Accepts the folder's UUID, name, or canonical path (e.g., `/projects/alpha/specs`; leading slash optional). ## Returns - `Folder`: Folder object that can be used to scope operations (ingest, query, etc.). +Folder objects include hierarchy metadata such as `full_path`, `parent_id`, `depth`, and `child_count`, mirroring the server response. + ## Examples diff --git a/python-sdk/get_folders_details.mdx b/python-sdk/get_folders_details.mdx index f74404d..1a3c370 100644 --- a/python-sdk/get_folders_details.mdx +++ b/python-sdk/get_folders_details.mdx @@ -40,7 +40,7 @@ description: "Get detailed information about folders with optional document stat ## Parameters -- `identifiers` (List[str], optional): List of folder IDs or names. If None, returns all accessible folders. +- `identifiers` (List[str], optional): List of folder IDs, names, or canonical paths (e.g., `/projects/alpha/specs`). If None, returns all accessible folders. - `include_document_count` (bool, optional): Include total document count. Defaults to True. - `include_status_counts` (bool, optional): Include document counts grouped by processing status. Defaults to False. - `include_documents` (bool, optional): Include paginated document list. Defaults to False. @@ -79,7 +79,7 @@ Filters follow the same JSON syntax across the API. See the [Metadata Filtering # Get specific folders with status counts response = db.get_folders_details( - identifiers=["reports", "invoices"], + identifiers=["/projects/reports", "invoices"], include_status_counts=True, ) for folder_detail in response.folders: @@ -176,6 +176,8 @@ Filters follow the same JSON syntax across the API. See the [Metadata Filtering - `folder` (FolderInfo): Folder information - `document_info` (FolderDocumentInfo | None): Document statistics and list +`FolderInfo` includes hierarchy fields: `full_path`, `parent_id`, `depth`, and `child_count`, plus description/name metadata. + ### FolderDocumentInfo - `total_count` (int | None): Total document count (when `include_document_count=True`) diff --git a/python-sdk/get_folders_summary.mdx b/python-sdk/get_folders_summary.mdx index 0efa1cd..8615acc 100644 --- a/python-sdk/get_folders_summary.mdx +++ b/python-sdk/get_folders_summary.mdx @@ -78,6 +78,9 @@ The `FolderSummary` objects have the following properties: - `id` (str): Unique folder identifier - `name` (str): Folder name +- `full_path` (str | None): Canonical folder path (e.g., `/projects/alpha/specs`) +- `parent_id` (str | None): Parent folder ID +- `depth` (int | None): Depth in the hierarchy (root = 1) - `description` (str | None): Folder description - `doc_count` (int): Number of documents in the folder - `updated_at` (str | None): Last update timestamp diff --git a/python-sdk/get_graph.mdx b/python-sdk/get_graph.mdx index 1854139..21f806b 100644 --- a/python-sdk/get_graph.mdx +++ b/python-sdk/get_graph.mdx @@ -6,12 +6,22 @@ description: "Get a graph by name" ```python - def get_graph(name: str) -> Graph + def get_graph( + name: str, + folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, + end_user_id: Optional[str] = None, + ) -> Graph ``` ```python - async def get_graph(name: str) -> Graph + async def get_graph( + name: str, + folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, + end_user_id: Optional[str] = None, + ) -> Graph ``` @@ -19,6 +29,9 @@ description: "Get a graph by name" ## Parameters - `name` (str): Name of the graph to retrieve +- `folder_name` (str | List[str], optional): Optional folder scope. Accepts canonical paths or a list of paths/names. +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. +- `end_user_id` (str, optional): Optional end-user scope. ## Returns @@ -38,6 +51,9 @@ description: "Get a graph by name" # Get a graph by name graph = db.get_graph("finance_graph") + + # Or fetch by path and include nested folders + nested_graph = db.get_graph("finance_graph", folder_name="/projects/alpha", folder_depth=-1) if graph.is_processing: print("Graph still processing, waiting...") @@ -64,6 +80,12 @@ description: "Get a graph by name" async with AsyncMorphik() as db: # Get a graph by name graph = await db.get_graph("finance_graph") + + nested_graph = await db.get_graph( + "finance_graph", + folder_name="/projects/alpha", + folder_depth=-1, + ) if graph.is_processing: print("Graph still processing, waiting...") @@ -99,6 +121,7 @@ The returned `Graph` object has the following properties: - `created_at` (datetime): Creation timestamp - `updated_at` (datetime): Last update timestamp - `owner` (Dict[str, str]): Graph owner information +- `folder_path` (Optional[str]): Canonical folder path for the graph (if scoped) ### Entity Properties diff --git a/python-sdk/get_graph_status.mdx b/python-sdk/get_graph_status.mdx index f41ab5b..10ce9d0 100644 --- a/python-sdk/get_graph_status.mdx +++ b/python-sdk/get_graph_status.mdx @@ -8,7 +8,8 @@ description: "Get the current status of a graph with pipeline stage information" ```python def get_graph_status( graph_name: str, - folder_name: Optional[str] = None, + folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, end_user_id: Optional[str] = None, ) -> Dict[str, Any] ``` @@ -17,7 +18,8 @@ description: "Get the current status of a graph with pipeline stage information" ```python async def get_graph_status( graph_name: str, - folder_name: Optional[str] = None, + folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, end_user_id: Optional[str] = None, ) -> Dict[str, Any] ``` @@ -27,7 +29,8 @@ description: "Get the current status of a graph with pipeline stage information" ## Parameters - `graph_name` (str): Name of the graph to check -- `folder_name` (str, optional): Optional folder name for scoping +- `folder_name` (str | List[str], optional): Optional folder scope (canonical path or list of paths/names) +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. - `end_user_id` (str, optional): Optional end user ID for scoping ## Returns @@ -53,7 +56,8 @@ description: "Get the current status of a graph with pipeline stage information" # Check with folder scoping status = db.get_graph_status( graph_name="team_graph", - folder_name="engineering", + folder_name="/engineering/graphs", + folder_depth=-1, ) print(f"Graph status in folder: {status.get('status')}") ``` @@ -73,7 +77,8 @@ description: "Get the current status of a graph with pipeline stage information" # Check with folder scoping status = await db.get_graph_status( graph_name="team_graph", - folder_name="engineering", + folder_name="/engineering/graphs", + folder_depth=-1, ) print(f"Graph status in folder: {status.get('status')}") ``` diff --git a/python-sdk/get_graph_visualization.mdx b/python-sdk/get_graph_visualization.mdx index 688eb56..db68f4d 100644 --- a/python-sdk/get_graph_visualization.mdx +++ b/python-sdk/get_graph_visualization.mdx @@ -10,6 +10,7 @@ description: "Retrieve nodes and links for visualizing a knowledge graph" def get_graph_visualization( name: str, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, end_user_id: Optional[str] = None, ) -> Dict[str, Any] ``` @@ -21,6 +22,7 @@ description: "Retrieve nodes and links for visualizing a knowledge graph" async def get_graph_visualization( name: str, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, end_user_id: Optional[str] = None, ) -> Dict[str, Any] ``` @@ -31,7 +33,8 @@ description: "Retrieve nodes and links for visualizing a knowledge graph" ## Parameters - `name` (str): Graph name. -- `folder_name` (str | List[str], optional): Folder scope filter. +- `folder_name` (str | List[str], optional): Folder scope filter (canonical path or list of paths/names). +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. - `end_user_id` (str, optional): End-user scope filter. ## Returns @@ -43,4 +46,4 @@ description: "Retrieve nodes and links for visualizing a knowledge graph" ```python viz = db.get_graph_visualization("research_graph") print(len(viz["nodes"]), "nodes", len(viz["links"]), "edges") -``` \ No newline at end of file +``` diff --git a/python-sdk/list_documents.mdx b/python-sdk/list_documents.mdx index 3353326..33f347f 100644 --- a/python-sdk/list_documents.mdx +++ b/python-sdk/list_documents.mdx @@ -16,6 +16,7 @@ description: "List accessible documents in Morphik" limit: int = 100, filters: Optional[Dict[str, Any]] = None, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, include_total_count: bool = False, include_status_counts: bool = False, include_folder_counts: bool = False, @@ -32,6 +33,7 @@ description: "List accessible documents in Morphik" limit: int = 100, filters: Optional[Dict[str, Any]] = None, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, include_total_count: bool = False, include_status_counts: bool = False, include_folder_counts: bool = False, @@ -48,7 +50,8 @@ description: "List accessible documents in Morphik" - `skip` (int, optional): Number of documents to skip for pagination. Defaults to 0. - `limit` (int, optional): Maximum number of documents to return. Defaults to 100. - `filters` (Dict[str, Any], optional): Metadata filters to apply -- `folder_name` (str | List[str], optional): Optional folder scope. Accepts a single folder name or a list of folder names. +- `folder_name` (str | List[str], optional): Optional folder scope. Accepts a canonical path (e.g., `/projects/alpha`) or a list of paths/names. +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. - `include_total_count` (bool, optional): Include total count of matching documents. Defaults to False. - `include_status_counts` (bool, optional): Include counts grouped by processing status. Defaults to False. - `include_folder_counts` (bool, optional): Include counts grouped by folder. Defaults to False. @@ -165,10 +168,21 @@ response = db.list_documents(filters=filters, include_total_count=True) sort_direction="desc", filters={"department": "research"} ) - ``` + ``` +### Nested Folder Queries + +```python +# Include all documents under /projects/alpha and its children +response = db.list_documents( + folder_name="/projects/alpha", + folder_depth=-1, # descend through nested folders + include_folder_counts=True, +) +``` + ### Aggregates and Counts @@ -203,4 +217,6 @@ The `Document` objects returned by this method have the following properties: - `metadata` (Dict[str, Any]): User-defined metadata - `storage_info` (Dict[str, str]): Storage-related information - `system_metadata` (Dict[str, Any]): System-managed metadata -- `chunk_ids` (List[str]): IDs of document chunks +- `chunk_ids` (List[str]): IDs of document chunks +- `folder_name` (Optional[str]): Folder leaf name +- `folder_path` (Optional[str]): Canonical folder path (includes nested parents) diff --git a/python-sdk/list_folders.mdx b/python-sdk/list_folders.mdx index 929d2a7..785a3fd 100644 --- a/python-sdk/list_folders.mdx +++ b/python-sdk/list_folders.mdx @@ -20,6 +20,12 @@ description: "List all folders available to the client" - `List[Folder]`: Collection of folders the current auth context can access. +Each `Folder` now surfaces hierarchy details: +- `full_path`: Canonical path (e.g., `/projects/alpha/specs`) +- `parent_id`: Parent folder ID (if any) +- `depth`: Depth in the tree (root = 1) +- `child_count`: Number of direct children when provided + ## Examples diff --git a/python-sdk/list_graphs.mdx b/python-sdk/list_graphs.mdx index 563fac9..4951f83 100644 --- a/python-sdk/list_graphs.mdx +++ b/python-sdk/list_graphs.mdx @@ -6,19 +6,29 @@ description: "List all graphs the user has access to" ```python - def list_graphs() -> List[Graph] + def list_graphs( + folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, + end_user_id: Optional[str] = None, + ) -> List[Graph] ``` ```python - async def list_graphs() -> List[Graph] + async def list_graphs( + folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, + end_user_id: Optional[str] = None, + ) -> List[Graph] ``` ## Parameters -None +- `folder_name` (str | List[str], optional): Optional folder scope. Accepts canonical paths or a list of paths/names. +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. +- `end_user_id` (str, optional): Optional end-user scope. ## Returns @@ -42,6 +52,9 @@ None f"Graph: {graph.name} (status={status}), " f"Entities: {len(graph.entities)}, Relationships: {len(graph.relationships)}", ) + + # Scope to a nested folder subtree + nested_graphs = db.list_graphs(folder_name="/projects/alpha", folder_depth=-1) # Find the most recent graph latest_graph = max(graphs, key=lambda g: g.updated_at) @@ -62,6 +75,8 @@ None f"Graph: {graph.name} (status={status}), " f"Entities: {len(graph.entities)}, Relationships: {len(graph.relationships)}", ) + + nested_graphs = await db.list_graphs(folder_name="/projects/alpha", folder_depth=-1) # Find the most recent graph latest_graph = max(graphs, key=lambda g: g.updated_at) @@ -84,3 +99,4 @@ Each `Graph` object in the returned list has the following properties: - `created_at` (datetime): Creation timestamp - `updated_at` (datetime): Last update timestamp - `owner` (Dict[str, str]): Graph owner information +- `folder_path` (Optional[str]): Canonical folder path for the graph (if scoped) diff --git a/python-sdk/morphik.mdx b/python-sdk/morphik.mdx index 8d5b643..2174654 100644 --- a/python-sdk/morphik.mdx +++ b/python-sdk/morphik.mdx @@ -71,6 +71,8 @@ Morphik supports organizing and isolating data by user and folder. This provides +Nested folders are supported across the SDK. Use canonical paths (e.g., `"/projects/alpha/specs"`) when creating or scoping folders, and pass `folder_depth` on retrieval/list/graph helpers to include descendant folders. + For detailed documentation and examples: - [Folder Management](/python-sdk/folders) - Organizing documents by logical groups - [User Management](/python-sdk/users) - Multi-tenant isolation and user-level data management diff --git a/python-sdk/query.mdx b/python-sdk/query.mdx index f8bd9ba..4d7a229 100644 --- a/python-sdk/query.mdx +++ b/python-sdk/query.mdx @@ -24,6 +24,7 @@ Generate completion using relevant chunks as context. include_paths: bool = False, prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, chat_id: Optional[str] = None, schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None, llm_config: Optional[Dict[str, Any]] = None, @@ -47,6 +48,7 @@ Generate completion using relevant chunks as context. include_paths: bool = False, prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, chat_id: Optional[str] = None, schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None, llm_config: Optional[Dict[str, Any]] = None, @@ -70,7 +72,8 @@ Generate completion using relevant chunks as context. - `hop_depth` (int, optional): Number of relationship hops to traverse in the graph (1-3). Defaults to 1. - `include_paths` (bool, optional): Whether to include relationship paths in the response. Defaults to False. - `prompt_overrides` (QueryPromptOverrides | Dict[str, Any], optional): Optional customizations for entity extraction, resolution, and query prompts -- `folder_name` (str | List[str], optional): Optional folder scope. Accepts a single folder name or a list of folder names. +- `folder_name` (str | List[str], optional): Optional folder scope. Accepts canonical paths (e.g., `/projects/alpha/specs`) or a list of paths/names. +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. - `chat_id` (str, optional): Optional chat session ID for persisting conversation history. - `schema` (Type[BaseModel] | Dict[str, Any], optional): Optional schema for structured output, can be a Pydantic model or a JSON schema dict - `llm_config` (Dict[str, Any], optional): Optional LiteLLM-compatible model configuration (e.g., model name, API key, base URL). Allows overriding the default LLM configuration on a per-query basis. Defaults to None. @@ -134,6 +137,13 @@ For more advanced filtering patterns, see the [Complex Metadata Filtering cookbo filters={"department": "research"}, temperature=0.7 ) + + nested = db.query( + "List open design questions", + folder_name="/projects/alpha", + folder_depth=-1, + k=6, + ) print(response.completion) @@ -152,6 +162,13 @@ For more advanced filtering patterns, see the [Complex Metadata Filtering cookbo filters={"department": "research"}, temperature=0.7 ) + + nested = await db.query( + "List open design questions", + folder_name="/projects/alpha", + folder_depth=-1, + k=6, + ) print(response.completion) diff --git a/python-sdk/query_document.mdx b/python-sdk/query_document.mdx index ba61427..2a6ee33 100644 --- a/python-sdk/query_document.mdx +++ b/python-sdk/query_document.mdx @@ -41,11 +41,11 @@ description: "Run a one-off Morphik On-the-Fly analysis with optional ingestion - `ingest` (bool): Queue the file for ingestion after analysis. - `metadata` (dict): Metadata supplied with the request. When `schema` yields a JSON object, those fields are merged into this metadata before ingestion. - `use_colpali` (bool): Override the embedding strategy used during ingestion. - - `folder_name` (str | list[str]): Folder scope for the queued ingestion. + - `folder_name` (str | list[str]): Folder scope for the queued ingestion (canonical path or list of paths/names; nested parents are created automatically). - `end_user_id` (str): End-user scope for the queued ingestion. Unsupported keys are ignored. - `filename` (str, optional): Filename override when uploading bytes or file-like objects. -- `folder_name` (str | list[str], optional): Folder scope applied to the inline request. Automatically set when calling from folder helpers; merged into `ingestion_options` if not already present. +- `folder_name` (str | list[str], optional): Folder scope applied to the inline request (canonical path or list of paths/names). Automatically set when calling from folder helpers; merged into `ingestion_options` if not already present. - `end_user_id` (str, optional): End-user scope for the inline request. Automatically set when using user scope helpers; merged into `ingestion_options` if not already present. ### Metadata Filters diff --git a/python-sdk/remove_document_from_folder.mdx b/python-sdk/remove_document_from_folder.mdx index 391f0eb..663e739 100644 --- a/python-sdk/remove_document_from_folder.mdx +++ b/python-sdk/remove_document_from_folder.mdx @@ -24,7 +24,7 @@ description: "Remove a document from a folder" ## Parameters -- `folder_id_or_name` (str): Folder identifier. Accepts either the folder's UUID or its name. +- `folder_id_or_name` (str): Folder identifier. Accepts the folder's UUID, name, or canonical path (e.g., `/projects/alpha/specs`; leading slash optional). - `document_id` (str): Identifier of the document to remove. ## Returns @@ -42,7 +42,7 @@ description: "Remove a document from a folder" folder = db.get_folder("marketing_docs") db.remove_document_from_folder(folder.id, "doc_123") - db.remove_document_from_folder("marketing_docs", "doc_456") + db.remove_document_from_folder("/projects/alpha/specs", "doc_456") ``` @@ -53,7 +53,7 @@ description: "Remove a document from a folder" folder = await db.get_folder("marketing_docs") await db.remove_document_from_folder(folder.id, "doc_123") - await db.remove_document_from_folder("marketing_docs", "doc_456") + await db.remove_document_from_folder("/projects/alpha/specs", "doc_456") ``` diff --git a/python-sdk/retrieve_chunks.mdx b/python-sdk/retrieve_chunks.mdx index eaf010e..185ed96 100644 --- a/python-sdk/retrieve_chunks.mdx +++ b/python-sdk/retrieve_chunks.mdx @@ -13,6 +13,7 @@ description: "Retrieve relevant chunks from Morphik" min_score: float = 0.0, use_colpali: bool = True, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, padding: int = 0, output_format: Optional[str] = None, query_image: Optional[str] = None, @@ -28,6 +29,7 @@ description: "Retrieve relevant chunks from Morphik" min_score: float = 0.0, use_colpali: bool = True, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, padding: int = 0, output_format: Optional[str] = None, query_image: Optional[str] = None, @@ -43,7 +45,8 @@ description: "Retrieve relevant chunks from Morphik" - `k` (int, optional): Number of results. Defaults to 4. - `min_score` (float, optional): Minimum similarity threshold. Defaults to 0.0. - `use_colpali` (bool, optional): Whether to use ColPali-style embedding model to retrieve the chunks (only works for documents ingested with `use_colpali=True`). Defaults to True. -- `folder_name` (str | List[str], optional): Optional folder scope. Accepts a single folder name or a list of folder names. +- `folder_name` (str | List[str], optional): Optional folder scope. Accepts canonical paths (e.g., `/projects/alpha/specs`) or a list of paths/names. +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. - `padding` (int, optional): Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only). Defaults to 0. - `output_format` (str, optional): Controls how image chunks are returned: - `"base64"` (default): Returns base64-encoded image data @@ -88,6 +91,12 @@ chunks = db.retrieve_chunks("delta status", filters=filters, k=6) padding=1, output_format="url", # Return image chunks as presigned URLs ) + + nested_chunks = db.retrieve_chunks( + "design decisions", + folder_name="/projects/alpha", + folder_depth=-1, # include nested child folders + ) for chunk in chunks: print(f"Score: {chunk.score}") @@ -112,6 +121,12 @@ chunks = db.retrieve_chunks("delta status", filters=filters, k=6) padding=1, output_format="url", # Return image chunks as presigned URLs ) + + nested_chunks = await db.retrieve_chunks( + "design decisions", + folder_name="/projects/alpha", + folder_depth=-1, + ) for chunk in chunks: print(f"Score: {chunk.score}") diff --git a/python-sdk/retrieve_chunks_grouped.mdx b/python-sdk/retrieve_chunks_grouped.mdx index aaa1142..101f072 100644 --- a/python-sdk/retrieve_chunks_grouped.mdx +++ b/python-sdk/retrieve_chunks_grouped.mdx @@ -14,6 +14,7 @@ description: "Retrieve relevant chunks with grouping for UI display" use_colpali: bool = True, use_reranking: Optional[bool] = None, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, end_user_id: Optional[str] = None, padding: int = 0, output_format: Optional[str] = None, @@ -34,6 +35,7 @@ description: "Retrieve relevant chunks with grouping for UI display" use_colpali: bool = True, use_reranking: Optional[bool] = None, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, end_user_id: Optional[str] = None, padding: int = 0, output_format: Optional[str] = None, @@ -54,7 +56,8 @@ description: "Retrieve relevant chunks with grouping for UI display" - `min_score` (float, optional): Minimum similarity threshold. Defaults to 0.0. - `use_colpali` (bool, optional): Whether to use ColPali-style embedding model. Defaults to True. - `use_reranking` (bool, optional): Override workspace reranking configuration for this request. -- `folder_name` (str | List[str], optional): Optional folder scope (single name or list of names) +- `folder_name` (str | List[str], optional): Optional folder scope. Accepts canonical paths (e.g., `/projects/alpha/specs`) or a list of paths/names. +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. - `end_user_id` (str, optional): Optional end-user scope - `padding` (int, optional): Number of additional chunks/pages to retrieve before and after matched chunks. Defaults to 0. - `output_format` (str, optional): Controls how image chunks are returned: @@ -108,7 +111,8 @@ Filters follow the same JSON syntax across the API. See the [Metadata Filtering query="quarterly results", k=3, padding=2, # Get 2 chunks before/after each match - folder_name="reports", + folder_name="/projects/reports", + folder_depth=-1, ) # With knowledge graph enhancement @@ -150,7 +154,8 @@ Filters follow the same JSON syntax across the API. See the [Metadata Filtering query="quarterly results", k=3, padding=2, # Get 2 chunks before/after each match - folder_name="reports", + folder_name="/projects/reports", + folder_depth=-1, ) # With knowledge graph enhancement diff --git a/python-sdk/retrieve_docs.mdx b/python-sdk/retrieve_docs.mdx index 6e7e601..55d9448 100644 --- a/python-sdk/retrieve_docs.mdx +++ b/python-sdk/retrieve_docs.mdx @@ -14,6 +14,7 @@ description: "Retrieve relevant documents from Morphik" use_colpali: bool = True, use_reranking: Optional[bool] = None, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, ) -> List[DocumentResult] ``` @@ -27,6 +28,7 @@ description: "Retrieve relevant documents from Morphik" use_colpali: bool = True, use_reranking: Optional[bool] = None, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, ) -> List[DocumentResult] ``` @@ -40,7 +42,8 @@ description: "Retrieve relevant documents from Morphik" - `min_score` (float, optional): Minimum similarity threshold. Defaults to 0.0. - `use_colpali` (bool, optional): Whether to use ColPali-style embedding model to retrieve the documents (only works for documents ingested with `use_colpali=True`). Defaults to True. - `use_reranking` (bool, optional): Override workspace reranking configuration for this request. -- `folder_name` (str | List[str], optional): Optional folder scope. Accepts a single folder name or a list of folder names. +- `folder_name` (str | List[str], optional): Optional folder scope. Accepts canonical paths (e.g., `/projects/alpha/specs`) or a list of paths/names. +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. ## Metadata Filters @@ -76,6 +79,12 @@ docs = db.retrieve_docs("budget summary", filters=filters, k=5) k=5, min_score=0.5 ) + + nested_docs = db.retrieve_docs( + "design notes", + folder_name="/projects/alpha", + folder_depth=-1, + ) for doc in docs: print(f"Score: {doc.score}") @@ -95,6 +104,12 @@ docs = db.retrieve_docs("budget summary", filters=filters, k=5) k=5, min_score=0.5 ) + + nested_docs = await db.retrieve_docs( + "design notes", + folder_name="/projects/alpha", + folder_depth=-1, + ) for doc in docs: print(f"Score: {doc.score}") diff --git a/python-sdk/search_documents.mdx b/python-sdk/search_documents.mdx index 98b5655..f4f01c1 100644 --- a/python-sdk/search_documents.mdx +++ b/python-sdk/search_documents.mdx @@ -11,6 +11,7 @@ description: "Search for documents by name or filename" limit: int = 10, filters: Optional[Dict[str, Any]] = None, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, end_user_id: Optional[str] = None, ) -> List[Document] ``` @@ -22,6 +23,7 @@ description: "Search for documents by name or filename" limit: int = 10, filters: Optional[Dict[str, Any]] = None, folder_name: Optional[Union[str, List[str]]] = None, + folder_depth: Optional[int] = None, end_user_id: Optional[str] = None, ) -> List[Document] ``` @@ -33,7 +35,8 @@ description: "Search for documents by name or filename" - `query` (str): Search query for document names/filenames - `limit` (int, optional): Maximum number of documents to return. Defaults to 10. - `filters` (Dict[str, Any], optional): Optional metadata filters -- `folder_name` (str | List[str], optional): Optional folder scope (single name or list of names) +- `folder_name` (str | List[str], optional): Optional folder scope (canonical path or list of paths/names) +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. - `end_user_id` (str, optional): Optional end-user scope ## Returns @@ -79,7 +82,8 @@ docs = db.search_documents("report", filters=filters) # Search within specific folders docs = db.search_documents( query="contract", - folder_name=["legal", "hr"], + folder_name="/projects/legal", + folder_depth=-1, ) # Search scoped to an end user @@ -109,7 +113,8 @@ docs = db.search_documents("report", filters=filters) # Search within specific folders docs = await db.search_documents( query="contract", - folder_name=["legal", "hr"], + folder_name="/projects/legal", + folder_depth=-1, ) # Search scoped to an end user @@ -124,5 +129,5 @@ docs = db.search_documents("report", filters=filters) ## Notes - This method searches document names and filenames, not document content. For content-based search, use [`retrieve_chunks`](./retrieve_chunks) or [`retrieve_docs`](./retrieve_docs). -- The `folder_name` parameter accepts either a single string or a list of folder names for multi-folder search. +- The `folder_name` parameter accepts a canonical path (leading slash optional) or a list of paths/names; combine with `folder_depth` to include descendants. - Results are returned sorted by relevance to the search query. diff --git a/python-sdk/update_graph.mdx b/python-sdk/update_graph.mdx index 52e78c2..965b6e8 100644 --- a/python-sdk/update_graph.mdx +++ b/python-sdk/update_graph.mdx @@ -16,7 +16,9 @@ description: "Update an existing knowledge graph with new documents" updated_graph = db.update_graph( name="research_graph", additional_filters={"category": "new_research"}, - additional_documents=["doc_123", "doc_456"] + additional_documents=["doc_123", "doc_456"], + folder_name="/projects/alpha", + folder_depth=-1, ) print(f"Graph now has {len(updated_graph.entities)} entities") @@ -33,7 +35,9 @@ description: "Update an existing knowledge graph with new documents" updated_graph = await db.update_graph( name="research_graph", additional_filters={"category": "new_research"}, - additional_documents=["doc_123", "doc_456"] + additional_documents=["doc_123", "doc_456"], + folder_name="/projects/alpha", + folder_depth=-1, ) print(f"Graph now has {len(updated_graph.entities)} entities") @@ -48,6 +52,9 @@ description: "Update an existing knowledge graph with new documents" - `additional_filters` (Dict[str, Any], optional): Optional additional metadata filters to determine which new documents to include - `additional_documents` (List[str], optional): Optional list of additional document IDs to include - `prompt_overrides` (GraphPromptOverrides | Dict[str, Any], optional): Optional customizations for entity extraction and resolution prompts +- `folder_name` (str | List[str], optional): Optional folder scope (canonical path or list of paths/names) +- `folder_depth` (int, optional): Folder scope depth. `None`/`0` = exact match, `-1` = include all descendants, `n > 0` = include descendants up to `n` levels deep. +- `end_user_id` (str, optional): Optional end-user scope ## Returns @@ -306,4 +313,4 @@ Return the extracted entities in JSON format with the following structure: - The graph name must match an existing graph that the user has access to. - Either `additional_filters` or `additional_documents` (or both) should be provided; otherwise, no new content will be added to the graph. - When using `additional_filters`, these are applied in addition to any filters used during graph creation. -- The `prompt_overrides` are applied only to this update operation and do not permanently change the configuration of the graph. \ No newline at end of file +- The `prompt_overrides` are applied only to this update operation and do not permanently change the configuration of the graph. diff --git a/self-hosting.mdx b/self-hosting.mdx index 13140e2..ab3aba6 100644 --- a/self-hosting.mdx +++ b/self-hosting.mdx @@ -14,6 +14,34 @@ For users who need to run Morphik on their own infrastructure, we provide two in Please ensure that you have Python 3.12 installed on your machine. Guides for installing Python can be found on the [Python website](https://www.python.org/downloads/release/python-3129/). + + Morphik requires the Rust toolchain for optimized performance operations (binary quantization, base64 encoding, text processing). Install Rust using rustup: + + + + ```bash + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh + ``` + + After installation, restart your terminal or run: + ```bash + source $HOME/.cargo/env + ``` + + + Download and run the installer from [rustup.rs](https://rustup.rs/), or use winget: + ```powershell + winget install Rustlang.Rustup + ``` + + + + Verify the installation: + ```bash + rustc --version + cargo --version + ``` + Morphik requires PostgreSQL with the pgvector extension for vector storage and similarity search capabilities. Follow the installation instructions for your operating system: