From 3e5ea49b407910fd3ec62e3be29cba54799294b4 Mon Sep 17 00:00:00 2001 From: skyflow-himanshu Date: Tue, 17 Feb 2026 16:54:01 +0530 Subject: [PATCH 1/2] SK-1964: add sample for handling deidentify file response in async for python sdk --- samples/detect_api/deidentify_file_async.py | 130 ++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 samples/detect_api/deidentify_file_async.py diff --git a/samples/detect_api/deidentify_file_async.py b/samples/detect_api/deidentify_file_async.py new file mode 100644 index 0000000..2c690f2 --- /dev/null +++ b/samples/detect_api/deidentify_file_async.py @@ -0,0 +1,130 @@ +from skyflow.error import SkyflowError +from skyflow import Env, Skyflow, LogLevel +from skyflow.utils.enums import DetectEntities, MaskingMethod, DetectOutputTranscriptions +from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Transformations, DateTransformation, Bleep, FileInput +from concurrent.futures import ThreadPoolExecutor + +""" + * Skyflow Deidentify File Example + * + * This sample demonstrates how to use all available options for deidentifying files + * using an asynchronous approach. + * Supported file types: images (jpg, png, etc.), pdf, audio (mp3, wav), documents, + * spreadsheets, presentations, structured text. +""" + +def perform_file_deidentification_async(): + try: + # Step 1: Configure Credentials + credentials = { + 'path': '/path/to/credentials.json' # Path to credentials file + } + + # Step 2: Configure Vault + vault_config = { + 'vault_id': '', # Replace with your vault ID + 'cluster_id': '', # Replace with your cluster ID + 'env': Env.PROD, # Deployment environment + 'credentials': credentials + } + + # Step 3: Configure & Initialize Skyflow Client + skyflow_client = ( + Skyflow.builder() + .add_vault_config(vault_config) + .set_log_level(LogLevel.INFO) # Use LogLevel.ERROR in production + .build() + ) + + # Step 4: Create File Object + file_path = '' # Replace with your file path + file = open(file_path, 'rb') + # Step 5: Configure Deidentify File Request with all options + deidentify_request = DeidentifyFileRequest( + file=FileInput(file), # File to de-identify (can also provide a file path) + entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect + allow_regex_list=[''], # Optional: Patterns to allow + restrict_regex_list=[''], # Optional: Patterns to restrict + + # Token format configuration + token_format=TokenFormat( + vault_token=[DetectEntities.SSN], # Use vault tokens for these entities + ), + + # Optional: Custom transformations + # transformations=Transformations( + # shift_dates=DateTransformation( + # max_days=30, + # min_days=10, + # entities=[DetectEntities.DOB] + # ) + # ), + + # Output configuration + output_directory='', # Where to save processed file + wait_time=15, # Max wait time in seconds (max 64) + + # Image-specific options + # output_processed_image=True, # Include processed image in output + # output_ocr_text=True, # Include OCR text in response + # masking_method=MaskingMethod.BLACKBOX, # Masking method for images + + # PDF-specific options + # pixel_density=15, # Pixel density for PDF processing + # max_resolution=2000, # Max resolution for PDF + + # Audio-specific options + # output_processed_audio=True, # Include processed audio + # output_transcription=DetectOutputTranscriptions.PLAINTEXT_TRANSCRIPTION, # Transcription type + + # Audio bleep configuration + + # bleep=Bleep( + # gain=5, # Loudness in dB + # frequency=1000, # Pitch in Hz + # start_padding=0.1, # Padding at start (seconds) + # stop_padding=0.2 # Padding at end (seconds) + # ) + ) + + # Step 6: Call deidentifyFile API + + # Create a thread pool executor + executor = ThreadPoolExecutor(max_workers=1) + + # Wrapper function to call the SDK + def call_deidentify(): + try: + return skyflow_client.detect().deidentify_file(deidentify_request) + except Exception as e: + return e + + # Submit the deidentify_file call to run in background thread + future = executor.submit(call_deidentify) + + def handle_response(future): + result = future.result() + if isinstance(result, SkyflowError): + # Handle Skyflow-specific errors + print('\nSkyflow Error:', { + 'http_code': result.http_code, + 'grpc_code': result.grpc_code, + 'http_status': result.http_status, + 'message': result.message, + 'details': result.details + }) + elif isinstance(result, Exception): + # Handle unexpected errors + print('Unexpected Error:', result) + else: + # Handle Successful Response + print("\nDeidentify File Response:", result) + + future.add_done_callback(handle_response) + + executor.shutdown(wait=True) + + except Exception as error: + # Handle unexpected errors + print('Unexpected Error:', error) + From 14d41aa840347f4897f485fd22aa5530a3d27a54 Mon Sep 17 00:00:00 2001 From: skyflow-himanshu Date: Wed, 18 Feb 2026 17:38:22 +0530 Subject: [PATCH 2/2] SK-1964: addressed review comments --- samples/detect_api/deidentify_file_async.py | 74 ++++++++++----------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/samples/detect_api/deidentify_file_async.py b/samples/detect_api/deidentify_file_async.py index 2c690f2..579dab2 100644 --- a/samples/detect_api/deidentify_file_async.py +++ b/samples/detect_api/deidentify_file_async.py @@ -38,11 +38,10 @@ def perform_file_deidentification_async(): # Step 4: Create File Object file_path = '' # Replace with your file path - file = open(file_path, 'rb') - # Step 5: Configure Deidentify File Request with all options + deidentify_request = DeidentifyFileRequest( - file=FileInput(file), # File to de-identify (can also provide a file path) - entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect + file=FileInput(file_path=file_path), # File to de-identify + # entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect allow_regex_list=[''], # Optional: Patterns to allow restrict_regex_list=[''], # Optional: Patterns to restrict @@ -60,22 +59,22 @@ def perform_file_deidentification_async(): # ) # ), - # Output configuration - output_directory='', # Where to save processed file - wait_time=15, # Max wait time in seconds (max 64) + # Output configuration + output_directory='', # Where to save processed file + wait_time=15, # Max wait time in seconds (max 64) # Image-specific options - # output_processed_image=True, # Include processed image in output - # output_ocr_text=True, # Include OCR text in response - # masking_method=MaskingMethod.BLACKBOX, # Masking method for images + output_processed_image=True, # Include processed image in output + output_ocr_text=True, # Include OCR text in response + masking_method=MaskingMethod.BLACKBOX, # Masking method for images # PDF-specific options - # pixel_density=15, # Pixel density for PDF processing - # max_resolution=2000, # Max resolution for PDF + pixel_density=15, # Pixel density for PDF processing + max_resolution=2000, # Max resolution for PDF # Audio-specific options - # output_processed_audio=True, # Include processed audio - # output_transcription=DetectOutputTranscriptions.PLAINTEXT_TRANSCRIPTION, # Transcription type + output_processed_audio=True, # Include processed audio + output_transcription=DetectOutputTranscriptions.PLAINTEXT_TRANSCRIPTION, # Transcription type # Audio bleep configuration @@ -86,39 +85,34 @@ def perform_file_deidentification_async(): # stop_padding=0.2 # Padding at end (seconds) # ) ) - - # Step 6: Call deidentifyFile API # Create a thread pool executor executor = ThreadPoolExecutor(max_workers=1) - # Wrapper function to call the SDK - def call_deidentify(): - try: - return skyflow_client.detect().deidentify_file(deidentify_request) - except Exception as e: - return e - - # Submit the deidentify_file call to run in background thread - future = executor.submit(call_deidentify) + future = executor.submit( + lambda: skyflow_client.detect().deidentify_file(deidentify_request) + ) def handle_response(future): + exception = future.exception() + if exception is not None: + if isinstance(exception, SkyflowError): + # Handle Skyflow-specific errors + print('\nSkyflow Error:', { + 'http_code': exception.http_code, + 'grpc_code': exception.grpc_code, + 'http_status': exception.http_status, + 'message': exception.message, + 'details': exception.details + }) + else: + # Handle unexpected errors + print('Unexpected Error:', exception) + return + + # Handle Successful Response result = future.result() - if isinstance(result, SkyflowError): - # Handle Skyflow-specific errors - print('\nSkyflow Error:', { - 'http_code': result.http_code, - 'grpc_code': result.grpc_code, - 'http_status': result.http_status, - 'message': result.message, - 'details': result.details - }) - elif isinstance(result, Exception): - # Handle unexpected errors - print('Unexpected Error:', result) - else: - # Handle Successful Response - print("\nDeidentify File Response:", result) + print("\nDeidentify File Response:", result) future.add_done_callback(handle_response)