diff --git a/reports/README.md b/reports/README.md index 0f03393..2364d06 100644 --- a/reports/README.md +++ b/reports/README.md @@ -8,6 +8,7 @@ The following reports are currently available: | --- | --- | | abundance_vs_frequency_scatter.ipynb | Abundance vs. frequency scatter plot for each species in a category at a location, indicating rarity at that location | | annual_category_location_heatmap.ipynb | Heatmap of number of sightings of each species in a category at a location during a specified year | +| category_composition_chart.ipynb | Pie chart showing the species composition of a category at a location | | category_life_list.ipynb | Life list for the species in a category, including total sightings and location count | | location_richness_map.ipynb | Interactive map of species richness (number of unique species sighted) by location | | year_on_year_species_location_trend.ipynb | Year-on-year sightings trend for a species, optionally limited to one location | diff --git a/reports/annual_category_location_heatmap.ipynb b/reports/annual_category_location_heatmap.ipynb index 2c4f134..2c94d01 100644 --- a/reports/annual_category_location_heatmap.ipynb +++ b/reports/annual_category_location_heatmap.ipynb @@ -117,7 +117,7 @@ "locations_list = \"-\".join(locations)\n", "clean_locations = re.sub(\"[^0-9a-zA-Z ]+\", \"\", locations_list).replace(\" \", \"-\")\n", "export_file_path = export_folder_path / f\"{year}-{category}-{clean_locations}-Heatmap.xlsx\"\n", - "heatmap_data.to_excel(export_file_path.absolute(), sheet_name=\"Sightings\", index=False)\n", + "heatmap_data.to_excel(export_file_path.absolute(), sheet_name=\"Sightings\", index=True)\n", "\n", "# Print the heatmap data\n", "with pd.option_context('display.max_rows', None,\n", @@ -176,7 +176,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.13.2" }, "vscode": { "interpreter": { diff --git a/reports/category_composition_chart.ipynb b/reports/category_composition_chart.ipynb new file mode 100644 index 0000000..11dd246 --- /dev/null +++ b/reports/category_composition_chart.ipynb @@ -0,0 +1,187 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Category Composition Pie Chart\n", + "\n", + "This notebook generates and exports a pie chart showing the composition of a category at a location. To use it, update the location, category and required export format in the first code cell, below, before running the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Location to report on\n", + "location = \"\"\n", + "\n", + "# Category to report on\n", + "category = \"\"\n", + "\n", + "# Export format for the trend chart:\n", + "# PNG - export as PNG image\n", + "# PDF - export as PDF file\n", + "# - do not export\n", + "export_format = \"PNG\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import sqlparse\n", + "\n", + "# Read the query file\n", + "query_file_path = Path(\"sql\") / \"category_composition.sql\"\n", + "with open(query_file_path.absolute(), \"r\") as f:\n", + " query = f.read().replace(\"\\n\", \" \")\n", + "\n", + "# Replace the location and year placeholders\n", + "query = query.replace(\"$LOCATION\", location) \\\n", + " .replace(\"$CATEGORY\", category)\n", + "\n", + "# Show a pretty-printed form of the query\n", + "print(sqlparse.format(query, reindent=True, keyword_case='upper'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlite3\n", + "import os\n", + "\n", + "# Connect to the database, execute the query and read the results into a dataframe\n", + "database_path = os.environ[\"NATURE_RECORDER_DB\"]\n", + "connection = sqlite3.connect(database_path)\n", + "df = pd.read_sql_query(query, connection, parse_dates=[\"Date\"])\n", + "\n", + "# Check there is some data\n", + "if not df.shape[0]:\n", + " message = f\"No data found for category '{category}' at location '{location}'\"\n", + " raise ValueError(message)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import re\n", + "\n", + "# Group by species and sum number of individuals\n", + "species_counts = df.groupby(\"Species\")[\"Count\"].sum()\n", + "\n", + "# Sort values (optional, for nicer plots)\n", + "species_counts = species_counts.sort_values(ascending=False)\n", + "\n", + "# Create the folder to hold exported reports\n", + "export_folder_path = Path(\"exported\")\n", + "export_folder_path.mkdir(parents=True, exist_ok=True)\n", + "\n", + "# Export the data to Excel\n", + "clean_location = re.sub(\"[^0-9a-zA-Z ]+\", \"\", location).replace(\" \", \"-\")\n", + "clean_category = re.sub(\"[^0-9a-zA-Z ]+\", \"\", category).replace(\" \", \"-\")\n", + "export_file_name = f\"{clean_category}-{clean_location}-Category-Composition\"\n", + "export_file_path = export_folder_path / f\"{export_file_name}.xlsx\"\n", + "species_counts.to_excel(export_file_path.absolute(), sheet_name=\"Category Composition\", index=True)\n", + "\n", + "# Print the scatter plot data\n", + "with pd.option_context('display.max_rows', None,\n", + " 'display.max_columns', None,\n", + " 'display.precision', 3,\n", + " ):\n", + " display(species_counts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Prepare data\n", + "labels = species_counts.index\n", + "sizes = species_counts.values\n", + "percentages = sizes / sizes.sum() * 100\n", + "legend_labels = [f\"{label} ({pct:.1f}%)\" for label, pct in zip(labels, percentages)]\n", + "\n", + "# Create figure with extra vertical space for legend\n", + "fig, ax = plt.subplots(figsize=(8, 8))\n", + "\n", + "wedges, _ = ax.pie(sizes, startangle=90, counterclock=False)\n", + "\n", + "# Equal aspect ratio\n", + "ax.set_aspect('equal')\n", + "ax.set_title(f\"Composition of '{category}' Category by Species\")\n", + "\n", + "# Remove tight_layout, manually control layout instead\n", + "fig.subplots_adjust(bottom=0.3) # Increase this if needed\n", + "\n", + "# Place legend fully outside the plot\n", + "ax.legend(wedges, legend_labels,\n", + " title=\"Species\",\n", + " loc='upper center',\n", + " bbox_to_anchor=(0.5, -0.005),\n", + " ncol=3,\n", + " frameon=False)\n", + "\n", + "\n", + "# Export to PNG\n", + "if export_format.casefold() == \"png\":\n", + " export_file_path = export_folder_path / f\"{export_file_name}.png\"\n", + " plt.savefig(export_file_path.absolute(), format=\"png\", dpi=300, bbox_inches=\"tight\")\n", + "\n", + "# Export to PDF\n", + "if export_format.casefold() == \"pdf\":\n", + " export_file_path = export_folder_path / f\"{export_file_name}.pdf\"\n", + " plt.savefig(export_file_path.absolute(), format=\"pdf\", bbox_inches=\"tight\")\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "f085c86085609b1ab2f295d8cd5b519618e19fd591a6919f4ec2f9290a6745f6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/reports/category_life_list.ipynb b/reports/category_life_list.ipynb index cc75363..ae2fb34 100644 --- a/reports/category_life_list.ipynb +++ b/reports/category_life_list.ipynb @@ -115,7 +115,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "venv", "language": "python", "name": "python3" }, @@ -133,7 +133,7 @@ }, "vscode": { "interpreter": { - "hash": "7a792fcb311f9eb9f3c1b942a8c87ada8484712b89b670347c16a1088e0a1f69" + "hash": "f085c86085609b1ab2f295d8cd5b519618e19fd591a6919f4ec2f9290a6745f6" } } }, diff --git a/reports/location_richness_map.ipynb b/reports/location_richness_map.ipynb index 39b0fe0..6f43e86 100644 --- a/reports/location_richness_map.ipynb +++ b/reports/location_richness_map.ipynb @@ -170,7 +170,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "venv", "language": "python", "name": "python3" }, @@ -189,7 +189,7 @@ "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "7a792fcb311f9eb9f3c1b942a8c87ada8484712b89b670347c16a1088e0a1f69" + "hash": "f085c86085609b1ab2f295d8cd5b519618e19fd591a6919f4ec2f9290a6745f6" } } }, diff --git a/reports/sql/category_composition.sql b/reports/sql/category_composition.sql new file mode 100644 index 0000000..0aab716 --- /dev/null +++ b/reports/sql/category_composition.sql @@ -0,0 +1,7 @@ +SELECT l.Name AS 'Location', sp.Name AS 'Species', c.Name AS 'Category', DATE( s.Date ) AS 'Date', IFNULL( s.Number, 1 ) AS 'Count' +FROM SIGHTINGS s +INNER JOIN SPECIES sp ON sp.Id = s.SpeciesId +INNER JOIN CATEGORIES c ON c.Id = sp.CategoryId +INNER JOIN LOCATIONS l ON l.Id = s.LocationId +WHERE l.Name = '$LOCATION' +AND c.Name = "$CATEGORY"; diff --git a/reports/year_on_year_species_location_trend.ipynb b/reports/year_on_year_species_location_trend.ipynb index a5acf93..f08697c 100644 --- a/reports/year_on_year_species_location_trend.ipynb +++ b/reports/year_on_year_species_location_trend.ipynb @@ -188,7 +188,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.13.2" + }, + "vscode": { + "interpreter": { + "hash": "f085c86085609b1ab2f295d8cd5b519618e19fd591a6919f4ec2f9290a6745f6" + } } }, "nbformat": 4,