diff --git a/deploy/playbooks/04_cron.yml b/deploy/playbooks/04_cron.yml new file mode 100644 index 0000000..76284f1 --- /dev/null +++ b/deploy/playbooks/04_cron.yml @@ -0,0 +1,9 @@ +- name: Scheduled tasks using the bot user + hosts: intbot_app + + tasks: + - name: "Download pretalx data every hour" + ansible.builtin.cron: + name: "Download pretalx data every hour" + minute: "5" # run on the 5th minute of every hour + job: "make prod/cron/pretalx" diff --git a/deploy/templates/app/Makefile.app.j2 b/deploy/templates/app/Makefile.app.j2 index a000963..b247ea6 100644 --- a/deploy/templates/app/Makefile.app.j2 +++ b/deploy/templates/app/Makefile.app.j2 @@ -1,18 +1,22 @@ +MAKE_APP="docker compose run app make" echo: "Dummy target, to not run something accidentally" prod/migrate: - docker compose run app make in-container/migrate + $(MAKE_APP) in-container/migrate prod/shell: - docker compose run app make in-container/shell + $(MAKE_APP) in-container/shell prod/db_shell: - docker compose run app make in-container/db_shell + $(MAKE_APP) in-container/db_shell prod/manage: - docker compose run app make in-container/manage ARG=$(ARG) + $(MAKE_APP) in-container/manage ARG=$(ARG) + +prod/cron/pretalx: + $(MAKE_APP) in-container/manage ARG="download_pretalx_data --event=europython-2025" logs: docker compose logs -f diff --git a/intbot/core/admin.py b/intbot/core/admin.py index 25ada2e..5047065 100644 --- a/intbot/core/admin.py +++ b/intbot/core/admin.py @@ -1,6 +1,6 @@ import json -from core.models import DiscordMessage, Webhook +from core.models import DiscordMessage, PretalxData, Webhook from django.contrib import admin from django.utils.html import format_html @@ -26,12 +26,12 @@ class WebhookAdmin(admin.ModelAdmin): "processed_at", ] - def pretty_meta(self, obj): + def pretty_meta(self, obj: Webhook): return format_html("
{}", json.dumps(obj.meta, indent=4))
pretty_meta.short_description = "Meta"
- def pretty_content(self, obj):
+ def pretty_content(self, obj: Webhook):
return format_html("{}", json.dumps(obj.content, indent=4))
pretty_content.short_description = "Content"
@@ -61,11 +61,38 @@ class DiscordMessageAdmin(admin.ModelAdmin):
"sent_at",
]
- def content_short(self, obj):
+ def content_short(self, obj: DiscordMessage):
# NOTE(artcz) This can create false shortcuts, but for most messages is
# good enough, because most of them are longer than 20 chars
return f"{obj.content[:10]}...{obj.content[-10:]}"
+class PretalxDataAdmin(admin.ModelAdmin):
+ list_display = [
+ "uuid",
+ "resource",
+ "created_at",
+ "modified_at",
+ ]
+ list_filter = [
+ "created_at",
+ "resource",
+ ]
+ readonly_fields = fields = [
+ "uuid",
+ "resource",
+ "pretty_content",
+ "created_at",
+ "modified_at",
+ "processed_at",
+ ]
+
+ def pretty_content(self, obj: PretalxData):
+ return format_html("{}", json.dumps(obj.content, indent=4))
+
+ pretty_content.short_description = "Content"
+
+
admin.site.register(Webhook, WebhookAdmin)
admin.site.register(DiscordMessage, DiscordMessageAdmin)
+admin.site.register(PretalxData, PretalxDataAdmin)
diff --git a/intbot/core/integrations/pretalx.py b/intbot/core/integrations/pretalx.py
new file mode 100644
index 0000000..17be122
--- /dev/null
+++ b/intbot/core/integrations/pretalx.py
@@ -0,0 +1,89 @@
+import logging
+from typing import Any
+
+import httpx
+from core.models import PretalxData
+from django.conf import settings
+
+logger = logging.getLogger(__name__)
+
+PRETALX_EVENTS = [
+ "europython-2022",
+ "europython-2023",
+ "europython-2024",
+ "europython-2025",
+]
+
+ENDPOINTS = {
+ # Questions need to be passed to include answers in the same endpoint,
+ # saving us later time with joining the answers.
+ PretalxData.PretalxResources.submissions: "submissions/?questions=all",
+ PretalxData.PretalxResources.speakers: "speakers/?questions=all",
+}
+
+
+JsonType = dict[str, Any]
+
+
+def get_event_url(event: str) -> str:
+ assert event in PRETALX_EVENTS
+
+ return f"https://pretalx.com/api/events/{event}/"
+
+
+def fetch_pretalx_data(
+ event: str, resource: PretalxData.PretalxResources
+) -> list[JsonType]:
+ headers = {
+ "Authorization": f"Token {settings.PRETALX_API_TOKEN}",
+ "Content-Type": "application/json",
+ }
+
+ base_url = get_event_url(event)
+ endpoint = ENDPOINTS[resource]
+ url = f"{base_url}{endpoint}"
+
+ # Pretalx paginates the output, so we will need to do multiple requests and
+ # then merge multiple pages to one big dictionary
+ results = []
+ page = 0
+
+ # This takes advantage of the fact that url will contain a url to the
+ # next page, until there is more data to fetch. If this is the last page,
+ # then the url will be None (falsy), and thus stop the while loop.
+ while url:
+ page += 1
+ response = httpx.get(url, headers=headers)
+
+ if response.status_code != 200:
+ raise Exception(f"Error {response.status_code}: {response.text}")
+
+ logger.info("Fetching data from %s, page %s", url, page)
+
+ data = response.json()
+ results += data["results"]
+ url = data["next"]
+
+ return results
+
+
+def download_latest_submissions(event: str) -> PretalxData:
+ data = fetch_pretalx_data(event, PretalxData.PretalxResources.submissions)
+
+ pretalx_data = PretalxData.objects.create(
+ resource=PretalxData.PretalxResources.submissions,
+ content=data,
+ )
+
+ return pretalx_data
+
+
+def download_latest_speakers(event: str) -> PretalxData:
+ data = fetch_pretalx_data(event, PretalxData.PretalxResources.speakers)
+
+ pretalx_data = PretalxData.objects.create(
+ resource=PretalxData.PretalxResources.speakers,
+ content=data,
+ )
+
+ return pretalx_data
diff --git a/intbot/core/management/commands/download_pretalx_data.py b/intbot/core/management/commands/download_pretalx_data.py
new file mode 100644
index 0000000..86da3e5
--- /dev/null
+++ b/intbot/core/management/commands/download_pretalx_data.py
@@ -0,0 +1,28 @@
+from core.integrations.pretalx import (
+ PRETALX_EVENTS,
+ download_latest_speakers,
+ download_latest_submissions,
+)
+from django.core.management.base import BaseCommand
+
+
+class Command(BaseCommand):
+ help = "Downloads latest pretalx data"
+
+ def add_arguments(self, parser):
+ # Add keyword argument event
+ parser.add_argument(
+ "--event",
+ choices=PRETALX_EVENTS,
+ help="slug of the event (for example `europython-2025`)",
+ required=True,
+ )
+
+ def handle(self, **kwargs):
+ event = kwargs["event"]
+
+ self.stdout.write(f"Downloading latest speakers from pretalx... {event}")
+ download_latest_speakers(event)
+
+ self.stdout.write(f"Downloading latest submissions from pretalx... {event}")
+ download_latest_submissions(event)
diff --git a/intbot/core/migrations/0005_add_pretalx_data_model.py b/intbot/core/migrations/0005_add_pretalx_data_model.py
new file mode 100644
index 0000000..e3d1a9c
--- /dev/null
+++ b/intbot/core/migrations/0005_add_pretalx_data_model.py
@@ -0,0 +1,43 @@
+# Generated by Django 5.1.4 on 2025-04-18 11:43
+
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("core", "0004_add_inbox_item_model"),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name="PretalxData",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("uuid", models.UUIDField(default=uuid.uuid4)),
+ (
+ "resource",
+ models.CharField(
+ choices=[
+ ("submissions", "Submissions"),
+ ("speakers", "Speakers"),
+ ("schedule", "Schedule"),
+ ],
+ max_length=255,
+ ),
+ ),
+ ("content", models.JSONField()),
+ ("created_at", models.DateTimeField(auto_now_add=True)),
+ ("modified_at", models.DateTimeField(auto_now=True)),
+ ("processed_at", models.DateTimeField(blank=True, null=True)),
+ ],
+ ),
+ ]
diff --git a/intbot/core/models.py b/intbot/core/models.py
index 5a09f63..3192ab0 100644
--- a/intbot/core/models.py
+++ b/intbot/core/models.py
@@ -81,3 +81,32 @@ def summary(self) -> str:
def __str__(self):
return f"{self.uuid} {self.author}: {self.content[:30]}"
+
+
+class PretalxData(models.Model):
+ """
+ Table to store raw data download from pretalx for later parsing.
+
+ We first download data from pretalx to this table, and then fire a separate
+ background task that pulls data from this table and stores in separate
+ "business" tables, like "Proposal" or "Speaker".
+ """
+
+ class PretalxResources(models.TextChoices):
+ submissions = "submissions", "Submissions"
+ speakers = "speakers", "Speakers"
+ schedule = "schedule", "Schedule"
+
+ uuid = models.UUIDField(default=uuid.uuid4)
+ resource = models.CharField(
+ max_length=255,
+ choices=PretalxResources.choices,
+ )
+ content = models.JSONField()
+
+ created_at = models.DateTimeField(auto_now_add=True)
+ modified_at = models.DateTimeField(auto_now=True)
+ processed_at = models.DateTimeField(blank=True, null=True)
+
+ def __str__(self):
+ return f"{self.uuid}"
diff --git a/intbot/intbot/settings.py b/intbot/intbot/settings.py
index 1a7b907..a1d555a 100644
--- a/intbot/intbot/settings.py
+++ b/intbot/intbot/settings.py
@@ -197,6 +197,9 @@ def get(name) -> str:
ZAMMAD_GROUP_SPONSORS = get("ZAMMAD_GROUP_SPONSORS")
ZAMMAD_GROUP_GRANTS = get("ZAMMAD_GROUP_GRANTS")
+# Pretalx
+PRETALX_API_TOKEN = get("PRETALX_API_TOKEN")
+
if DJANGO_ENV == "dev":
DEBUG = True
@@ -282,6 +285,8 @@ def get(name) -> str:
ZAMMAD_GROUP_HELPDESK = "TestZammad Helpdesk"
ZAMMAD_GROUP_BILLING = "TestZammad Billing"
+ PRETALX_API_TOKEN = "Test-Pretalx-API-token"
+
elif DJANGO_ENV == "local_container":
DEBUG = False
diff --git a/intbot/tests/test_admin.py b/intbot/tests/test_admin.py
index d731edc..fd537ce 100644
--- a/intbot/tests/test_admin.py
+++ b/intbot/tests/test_admin.py
@@ -2,7 +2,7 @@
Sanity checks (mostly) if the admin resources are available
"""
-from core.models import DiscordMessage, Webhook
+from core.models import DiscordMessage, PretalxData, Webhook
def test_admin_for_webhooks_sanity_check(admin_client):
@@ -32,3 +32,35 @@ def test_admin_for_discordmessages_sanity_check(admin_client):
assert str(dm.uuid).encode() in response.content
assert dm.channel_id.encode() in response.content
assert dm.channel_name.encode() in response.content
+
+
+def test_admin_list_for_pretalx_data(admin_client):
+ """Simple sanity check if the page loads correctly"""
+ url = "/admin/core/pretalxdata/"
+ pd = PretalxData.objects.create(
+ resource=PretalxData.PretalxResources.speakers,
+ content={},
+ )
+ assert pd.uuid
+
+ response = admin_client.get(url)
+
+ assert response.status_code == 200
+ assert str(pd.uuid).encode() in response.content
+ assert pd.get_resource_display().encode() in response.content
+
+
+def test_admin_change_for_pretalx_data(admin_client):
+ """Simple sanity check if the page loads correctly"""
+ url = "/admin/core/pretalxdata/"
+ pd = PretalxData.objects.create(
+ resource=PretalxData.PretalxResources.speakers,
+ content={},
+ )
+ assert pd.uuid
+
+ response = admin_client.get(f"{url}{pd.pk}/change/")
+
+ assert response.status_code == 200
+ assert str(pd.uuid).encode() in response.content
+ assert pd.get_resource_display().encode() in response.content
diff --git a/intbot/tests/test_integrations/test_pretalx.py b/intbot/tests/test_integrations/test_pretalx.py
new file mode 100644
index 0000000..382c53d
--- /dev/null
+++ b/intbot/tests/test_integrations/test_pretalx.py
@@ -0,0 +1,135 @@
+import pytest
+import respx
+from core.integrations import pretalx
+from core.models import PretalxData
+from httpx import Response
+
+
+def submissions_pages_generator(url):
+ """
+ Generator to simulate pagination.
+
+ Extracted to a generator because we use it in multiple places
+ """
+ yield Response(
+ 200,
+ json={
+ "results": [
+ {"hello": "world"},
+ ],
+ "next": f"{url}&page=2",
+ },
+ )
+
+ yield Response(
+ 200,
+ json={
+ "results": [
+ {"foo": "bar"},
+ ],
+ # It's important to make it last page in tests.
+ # Otherwise it will be infinite loop :)
+ "next": None,
+ },
+ )
+
+
+def speaker_pages_generator(url):
+ """
+ Generator to simulate pagination.
+
+ Extracted to a generator because we use it in multiple places
+ """
+ yield Response(
+ 200,
+ json={
+ "results": [
+ {"hello": "world"},
+ ],
+ "next": f"{url}&page=2",
+ },
+ )
+
+ yield Response(
+ 200,
+ json={
+ "results": [
+ {"foo": "bar"},
+ ],
+ # It's important to make it last page in tests.
+ # Otherwise it will be infinite loop :)
+ "next": None,
+ },
+ )
+
+
+@respx.mock
+def test_fetch_submissions_from_pretalx():
+ url = "https://pretalx.com/api/events/europython-2025/submissions/?questions=all"
+ data = submissions_pages_generator(url)
+ respx.get(url).mock(return_value=next(data))
+ respx.get(url + "&page=2").mock(return_value=next(data))
+
+ submissions = pretalx.fetch_pretalx_data(
+ "europython-2025",
+ PretalxData.PretalxResources.submissions,
+ )
+
+ assert submissions == [
+ {"hello": "world"},
+ {"foo": "bar"},
+ ]
+
+
+@respx.mock
+def test_fetch_speakers_from_pretalx():
+ url = "https://pretalx.com/api/events/europython-2025/speakers/?questions=all"
+ data = speaker_pages_generator(url)
+ respx.get(url).mock(return_value=next(data))
+ respx.get(url + "&page=2").mock(return_value=next(data))
+
+ submissions = pretalx.fetch_pretalx_data(
+ "europython-2025",
+ PretalxData.PretalxResources.speakers,
+ )
+
+ assert submissions == [
+ {"hello": "world"},
+ {"foo": "bar"},
+ ]
+
+
+@respx.mock
+@pytest.mark.django_db
+def test_download_latest_submissions():
+ url = "https://pretalx.com/api/events/europython-2025/submissions/?questions=all"
+ data = submissions_pages_generator(url)
+ respx.get(url).mock(return_value=next(data))
+ respx.get(url + "&page=2").mock(return_value=next(data))
+
+ pretalx.download_latest_submissions("europython-2025")
+
+ pd = PretalxData.objects.get(resource=PretalxData.PretalxResources.submissions)
+ assert pd.resource == "submissions"
+ assert pd.content == [
+ {"hello": "world"},
+ {"foo": "bar"},
+ ]
+
+
+@respx.mock
+@pytest.mark.django_db
+def test_download_latest_speakers():
+ url = "https://pretalx.com/api/events/europython-2025/speakers/?questions=all"
+ data = speaker_pages_generator(url)
+ respx.get(url).mock(return_value=next(data))
+ respx.get(url + "&page=2").mock(return_value=next(data))
+
+ pretalx.download_latest_speakers("europython-2025")
+
+ pd = PretalxData.objects.get(resource=PretalxData.PretalxResources.speakers)
+ assert pd.resource == "speakers"
+ assert pd.content == [
+ {"hello": "world"},
+ {"foo": "bar"},
+ ]