From 6a24ddd117a3a24ec2f5f38f43cdafb64b99c08d Mon Sep 17 00:00:00 2001 From: Naresh Kumar D Date: Thu, 20 Mar 2025 17:36:30 +0530 Subject: [PATCH] Readme and usage updated Updated the usage and readme for the project. --- README.md | 85 +++++++++++++++++++++-- src/services/osw_qm_calculator_service.py | 2 +- test.py | 34 ++------- 3 files changed, 87 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index b3151d4..4ccb932 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,95 @@ # TDEI-python-osw-quality-metric -Quality metric calculator for OSW record +Quality metric calculator for OSW record. + +# Introduction: +This service calculates the specific quality metric of a given OSW dataset. There are two algorithms that it supports: +- fixed calculation (assumes a random score ) +- xn calculation. Calculates the intersection quality metric based on either input sub-polygons or vornoi generated polygons. + +## Requirements +python 3.10 + +## How to run the project with Python3.10 +#### Create virtual env + +`python3.10 -m venv .venv` + +`source .venv/bin/activate` + +#### Install requirements + +`pip install -r requirements.txt` + +#### Set up env file, create a .env file at project root level + +```shell +QUALITY_REQ_TOPIC +QUALITY_REQ_SUB +QUALITY_RES_TOPIC +PROVIDER=Azure +QUEUECONNECTION=Endpoint=sb://xxxxxxxxxxxxx +STORAGECONNECTION=DefaultEndpointsProtocol=https;xxxxxxxxxxxxx +MAX_CONCURRENT_MESSAGES=xxx # Optional if not provided defaults to 1 +PARTITION_COUNT=xx # Optional number of partitions to use for dask library. defaults to 2 +``` +Note: Replace the endpoints with the actual endpoints of the environment you want to run the service in + +`MAX_CONCURRENT_MESSAGES` is the maximum number of concurrent messages that the service can handle. If not provided, defaults to 1 + +### Run the Server + +`uvicorn src.main:app --reload` + +remove `--reload` for non-debug mode + + +### Run the Server + +`uvicorn src.main:app --reload` + +### Run Unit tests + +#### Run Coverage +`python -m coverage run --source=src -m unittest discover -s tests/unit_tests` + +To run a single test use + +`python -m unittest tests.unit_tests.service.test_osw_confidence_metric_calculator.TestOSWConfidenceMetric.test_calculate_score` + +#### Run Coverage Report +`coverage report` + +#### Run Coverage HTML report +`coverage html` # Incoming message ```json { - "datasetId":"", - "intersection_file":"" -} + "messageType": "mettric-calculation", + "messageId": "message-id-from-msg", + "data": { + "jobId": "0b41ebc5-350c-42d3-90af-3af4ad3628fb", + "data_file": "https://tdeisamplestorage.blob.core.windows.net/osw/test/wenatchee.zip", + "algorithm": "fixed", + "sub_regions_file":"" + } + } ``` # Outgoing message ```json { - "datasetId":"", - "metrics_file":"" + "status":"", + "message":"", + "success":true/false, + "dataset_url":"", + "qm_dataset_url":"" } ``` + +# Run the metrics locally +- Use [test.py](./test.py) to run the metrics on any dataset locally. \ No newline at end of file diff --git a/src/services/osw_qm_calculator_service.py b/src/services/osw_qm_calculator_service.py index 8e4adb2..389fb43 100644 --- a/src/services/osw_qm_calculator_service.py +++ b/src/services/osw_qm_calculator_service.py @@ -45,7 +45,7 @@ def calculate_quality_metric(self, input_file, algorithm_names, output_path, ixn input_file (str): The path to the input file. (dataset.zip file) algorithm_names (list): A list of algorithm names to be used for calculating quality metrics. output_path (str): The path to the output zip file. - + ixn_file (str): The path to the sub-regions polygon file. Returns: None diff --git a/test.py b/test.py index 29465be..a31bfa5 100644 --- a/test.py +++ b/test.py @@ -1,33 +1,13 @@ # # Testing document for testing a simple osm input. # from src.config import Config -# from src.services.osw_qm_calculator_service import OswQmCalculator -# import os -# # import urllib.parse as urlparse -# from urllib.parse import urlparse +from src.services.osw_qm_calculator_service import OswQmCalculator -# config = Config() - -# osw_qm_calculator = OswQmCalculator() - -# asset_folder = config.get_assets_folder() -# download_folder = config.get_download_folder() -# print(asset_folder) -# print(download_folder) -# test_path = os.path.join(asset_folder, 'osm-input.zip') -# test_op_path = os.path.join(download_folder, 'osm-output.zip') -# # osw_qm_calculator.calculate_quality_metric(test_path, ['fixed'],test_op_path) - -# # def get_directory_path(remote_url:str)-> str: -# # # https://tdeisamplestorage.blob.core.windows.net/osw/test_upload/df/fff/500mb_file.zip -# # # should give output test_upload/df/fff/ -# # parsed_url = urlparse(remote_url) -# # dirname = os.path.dirname(parsed_url.path) -# # folder_path = dirname.split('/')[2:] -# # folder_path = '/'.join(folder_path) -# # return folder_path -# # print(get_directory_path('https://tdeisamplestorage.blob.core.windows.net/osw/test_upload/df/fff/500mb_file.zip')) -algorithms = "fixed" +osw_qm_calculator = OswQmCalculator(cores_to_use=1) # Configure the number of cores for dask here. +algorithms = "fixed" # Algorithms to use for calculating quality metrics. algorithm_names = algorithms.split(',') for algorithm in algorithm_names: - print(algorithm) \ No newline at end of file + print(algorithm) + +result = osw_qm_calculator.calculate_quality_metric('src/assets/osm-input.zip', ['fixed'],'downloads/osm-output.zip') # Calculation code. +print(result) \ No newline at end of file