Merge pull request #35 from TaskarCenterAtUW/feature-readme-update

susrisha · web-flow · commit 95ad04c12216 · 2025-03-20T17:39:14.000+05:30
Readme and usage updated
diff --git a/README.md b/README.md
@@ -1,22 +1,95 @@
 # TDEI-python-osw-quality-metric
-Quality metric calculator for OSW record
+Quality metric calculator for OSW record.
+
+# Introduction:
+This service calculates the specific quality metric of a given OSW dataset. There are two algorithms that it supports:
+- fixed calculation (assumes a random score )
+- xn calculation. Calculates the intersection quality metric based on either input sub-polygons or vornoi generated polygons.
+
+## Requirements
+python 3.10
+
+## How to run the project with Python3.10
+#### Create virtual env
+
+`python3.10 -m venv .venv`
+
+`source .venv/bin/activate`
+
+#### Install requirements
+
+`pip install -r requirements.txt`
+
+#### Set up env file, create a .env file at project root level 
+
+```shell
+QUALITY_REQ_TOPIC
+QUALITY_REQ_SUB
+QUALITY_RES_TOPIC
+PROVIDER=Azure
+QUEUECONNECTION=Endpoint=sb://xxxxxxxxxxxxx
+STORAGECONNECTION=DefaultEndpointsProtocol=https;xxxxxxxxxxxxx
+MAX_CONCURRENT_MESSAGES=xxx # Optional if not provided defaults to 1
+PARTITION_COUNT=xx # Optional number of partitions to use for dask library. defaults to 2
+```
+Note: Replace the endpoints with the actual endpoints of the environment you want to run the service in
+
+`MAX_CONCURRENT_MESSAGES` is the maximum number of concurrent messages that the service can handle. If not provided, defaults to 1
+
+### Run the Server 
+
+`uvicorn src.main:app --reload`
+
+remove `--reload` for non-debug mode
+
+
+### Run the Server
+
+`uvicorn src.main:app --reload`
+
+### Run Unit tests
+
+####  Run Coverage
+`python -m coverage run --source=src -m unittest discover -s tests/unit_tests`
+
+To run a single test use
+
+`python -m unittest tests.unit_tests.service.test_osw_confidence_metric_calculator.TestOSWConfidenceMetric.test_calculate_score`
+
+####  Run Coverage Report
+`coverage report`
+
+####  Run Coverage HTML report
+`coverage html`
 
 
 # Incoming message
 
 ```json
 {
-    "datasetId":"",
-    "intersection_file":""
-}
+    "messageType": "mettric-calculation",
+    "messageId": "message-id-from-msg",
+    "data": {
+      "jobId": "0b41ebc5-350c-42d3-90af-3af4ad3628fb",
+      "data_file": "https://tdeisamplestorage.blob.core.windows.net/osw/test/wenatchee.zip",
+      "algorithm": "fixed",
+      "sub_regions_file":""
+    }
+  }
 
 ```
 
 # Outgoing message
 ```json
 {
-    "datasetId":"",
-    "metrics_file":""
+    "status":"",
+    "message":"",
+    "success":true/false,
+    "dataset_url":"",
+    "qm_dataset_url":""
 }
 
 ```
+
+# Run the metrics locally
+- Use [test.py](./test.py) to run the metrics on any dataset locally.
diff --git a/src/services/osw_qm_calculator_service.py b/src/services/osw_qm_calculator_service.py
@@ -45,7 +45,7 @@ def calculate_quality_metric(self, input_file, algorithm_names, output_path, ixn
             input_file (str): The path to the input file. (dataset.zip file)
             algorithm_names (list): A list of algorithm names to be used for calculating quality metrics.
             output_path (str): The path to the output zip file.
-
+            ixn_file (str): The path to the sub-regions polygon file.
         Returns:
             None
 
diff --git a/test.py b/test.py
@@ -1,33 +1,13 @@
 # # Testing document for testing a simple osm input.
 
 # from src.config import Config
-# from src.services.osw_qm_calculator_service import OswQmCalculator
-# import os
-# # import urllib.parse as urlparse
-# from urllib.parse import urlparse
+from src.services.osw_qm_calculator_service import OswQmCalculator
 
-# config = Config()
-
-# osw_qm_calculator = OswQmCalculator()
-
-# asset_folder = config.get_assets_folder()
-# download_folder = config.get_download_folder()
-# print(asset_folder)
-# print(download_folder)
-# test_path = os.path.join(asset_folder, 'osm-input.zip')
-# test_op_path = os.path.join(download_folder, 'osm-output.zip')
-# # osw_qm_calculator.calculate_quality_metric(test_path, ['fixed'],test_op_path)
-
-# # def get_directory_path(remote_url:str)-> str:
-# #         # https://tdeisamplestorage.blob.core.windows.net/osw/test_upload/df/fff/500mb_file.zip
-# #         # should give output test_upload/df/fff/
-# #         parsed_url = urlparse(remote_url)
-# #         dirname = os.path.dirname(parsed_url.path)
-# #         folder_path = dirname.split('/')[2:]
-# #         folder_path = '/'.join(folder_path)
-# #         return folder_path
-# # print(get_directory_path('https://tdeisamplestorage.blob.core.windows.net/osw/test_upload/df/fff/500mb_file.zip'))
-algorithms = "fixed"
+osw_qm_calculator = OswQmCalculator(cores_to_use=1) # Configure the number of cores for dask here.
+algorithms = "fixed" # Algorithms to use for calculating quality metrics.
 algorithm_names = algorithms.split(',')
 for algorithm in algorithm_names:
-    print(algorithm)
+    print(algorithm)
+
+result = osw_qm_calculator.calculate_quality_metric('src/assets/osm-input.zip', ['fixed'],'downloads/osm-output.zip') # Calculation code.
+print(result)