diff --git a/mission-control/docs/reference/crds/canary.mdx b/mission-control/docs/reference/crds/canary.mdx new file mode 100644 index 00000000..e25541ae --- /dev/null +++ b/mission-control/docs/reference/crds/canary.mdx @@ -0,0 +1,148 @@ +--- +title: Canary CRD +--- + +# Canary + +The `Canary` CRD allows you to define health checks for monitoring the availability and performance of your services and infrastructure. + +## Definition + +```yaml +apiVersion: canaries.flanksource.com/v1 +kind: Canary +metadata: + name: example-canary +spec: + # Schedule on which to execute the canary + interval: 30s + + # Specify the number of replicas for this canary + replicas: 1 + + # Checks to run for this canary + http: + - name: example-http-check + url: https://example.com + + dns: + - name: example-dns-check + server: 8.8.8.8 + query: example.com + querytype: A +``` + +## Schema + +The `Canary` resource supports the following top-level fields: + +| Field | Description | +|-------|-------------| +| `spec.interval` | The schedule on which to execute the canary checks | +| `spec.replicas` | The number of replicas for this canary | +| `spec.cloudwatch` | AWS CloudWatch checks | +| `spec.containerdPull` | Containerd image pull checks | +| `spec.containerdPush` | Containerd image push checks | +| `spec.dns` | DNS resolution checks | +| `spec.dockerPull` | Docker image pull checks | +| `spec.dockerPush` | Docker image push checks | +| `spec.elasticsearch` | Elasticsearch health and query checks | +| `spec.exec` | Command execution checks | +| `spec.folder` | File system and folder checks | +| `spec.git` | Git repository checks | +| `spec.github` | GitHub actions and repository checks | +| `spec.helm` | Helm chart checks | +| `spec.http` | HTTP endpoint checks | +| `spec.icmp` | ICMP/Ping checks | +| `spec.jmeter` | JMeter performance tests | +| `spec.junit` | JUnit test checks | +| `spec.kubernetes` | Kubernetes resource checks | +| `spec.ldap` | LDAP connectivity checks | +| `spec.mongodb` | MongoDB health and query checks | +| `spec.mssql` | Microsoft SQL Server checks | +| `spec.mysql` | MySQL database checks | +| `spec.postgres` | PostgreSQL database checks | +| `spec.redis` | Redis health checks | +| `spec.restic` | Restic backup checks | +| `spec.s3` | S3 bucket checks | +| `spec.tcp` | TCP connection checks | +| `spec.webhook` | Webhook checks | + +## Status + +The Canary controller updates the resource with status information, including: + +| Status Field | Description | +|--------------|-------------| +| `status.lastCheck` | The time of the last check | +| `status.status` | The overall status of the canary (Healthy, Unhealthy) | +| `status.message` | A descriptive message about the current status | +| `status.errorMessage` | Detailed error message when checks fail | +| `status.uptime1h` | Availability percentage over the last hour | +| `status.latency1h` | Average response time over the last hour | +| `status.lastTransitionedTime` | The time when the status last changed | + +## Example + +### Basic HTTP Check + +```yaml +apiVersion: canaries.flanksource.com/v1 +kind: Canary +metadata: + name: website-check +spec: + interval: 1m + http: + - name: website + url: https://example.com + thresholdMillis: 500 + responseCodes: [200] + responseContent: "Welcome" + maxSSLExpiry: 7d +``` + +### DNS Check + +```yaml +apiVersion: canaries.flanksource.com/v1 +kind: Canary +metadata: + name: dns-check +spec: + interval: 2m + dns: + - name: google-dns + server: 8.8.8.8:53 + query: google.com + querytype: A + minrecords: 1 +``` + +### Multiple Check Types + +```yaml +apiVersion: canaries.flanksource.com/v1 +kind: Canary +metadata: + name: multi-check +spec: + interval: 5m + http: + - name: api-check + url: https://api.example.com/health + thresholdMillis: 200 + tcp: + - name: database-connection + host: db.example.com + port: 5432 + icmp: + - name: network-check + endpoint: 192.168.1.1 +``` + +## See Also + +- [Canary Checker Documentation](../../canary-checker/index.mdx) +- [HTTP Checks Reference](../../canary-checker/docs/reference/1-http.mdx) +- [TCP Checks Reference](../../canary-checker/docs/reference/1-tcp.mdx) \ No newline at end of file diff --git a/mission-control/docs/reference/crds/component.mdx b/mission-control/docs/reference/crds/component.mdx new file mode 100644 index 00000000..42c47ace --- /dev/null +++ b/mission-control/docs/reference/crds/component.mdx @@ -0,0 +1,152 @@ +--- +title: Component CRD +--- + +# Component + +The `Component` CRD allows you to define and organize components in your infrastructure, enabling you to model, visualize, and track various elements of your system. + +## Definition + +```yaml +apiVersion: canaries.flanksource.com/v1 +kind: Component +metadata: + name: example-component +spec: + # Human readable name of the component + name: Example Service + + # Optional type classification + type: service + + # Description of the component + description: An example component for demonstration purposes + + # Owner of the component + owner: platform-team + + # Icon to represent the component + icon: https://example.com/icons/service.svg + + # Labels for categorization + labels: + environment: production + tier: frontend +``` + +## Schema + +The `Component` resource supports the following fields: + +| Field | Description | +|-------|-------------| +| `spec.name` | Human-readable name of the component | +| `spec.displayName` | Alternative display name for the component | +| `spec.description` | Description of the component's purpose | +| `spec.type` | Type classification (e.g., service, database, app) | +| `spec.subtype` | Sub-classification of the component type | +| `spec.icon` | Icon URL or name to visually represent the component | +| `spec.owner` | Owner of the component (team/person) | +| `spec.labels` | Key-value pairs for categorizing the component | +| `spec.config` | Component-specific configuration | +| `spec.items` | Child items or sub-components | +| `spec.properties` | Additional properties as key-value pairs | +| `spec.checks` | Health checks to perform for this component | +| `spec.dashboards` | Associated dashboards for monitoring | +| `spec.costs` | Cost tracking configuration | +| `spec.ticket` | Ticketing system integration | +| `spec.slo` | Service Level Objectives | +| `spec.statusMessage` | Custom status message | +| `spec.statusReason` | Reason for the current status | + +## Status + +The Component controller updates the resource with status information, including: + +| Status Field | Description | +|--------------|-------------| +| `status.health` | Overall health status of the component | +| `status.costPerMonth` | Monthly cost of the component | +| `status.artifacts` | Associated artifacts | +| `status.dependencies` | Component dependencies | +| `status.incidents` | Active incidents related to the component | +| `status.lastScrape` | Timestamp of the last data scrape | +| `status.lastSeen` | Timestamp when the component was last seen | + +## Examples + +### Basic Component + +```yaml +apiVersion: canaries.flanksource.com/v1 +kind: Component +metadata: + name: frontend-web +spec: + name: Frontend Web Application + type: web-application + description: Customer-facing web application for product browsing and purchasing + owner: web-team + labels: + tier: frontend + environment: production + criticality: high +``` + +### Component with Checks + +```yaml +apiVersion: canaries.flanksource.com/v1 +kind: Component +metadata: + name: payment-service +spec: + name: Payment Processing Service + type: microservice + description: Handles all payment processing operations + owner: payments-team + checks: + http: + - name: api-health + url: https://payments.example.com/health + interval: 1m + thresholdMillis: 300 + tcp: + - name: db-connection + host: payments-db.example.com + port: 5432 + interval: 5m +``` + +### Hierarchical Component + +```yaml +apiVersion: canaries.flanksource.com/v1 +kind: Component +metadata: + name: e-commerce-platform +spec: + name: E-Commerce Platform + type: platform + description: Complete e-commerce solution + owner: platform-team + items: + - name: Web Frontend + type: web-application + owner: web-team + - name: Authentication Service + type: microservice + owner: auth-team + - name: Payment Processing + type: microservice + owner: payments-team + - name: Order Management + type: microservice + owner: orders-team +``` + +## See Also + +- [Topology CRD](./topology.mdx) +- [Component Relationships](../topology/relationships.mdx) \ No newline at end of file diff --git a/mission-control/docs/reference/crds/connection.mdx b/mission-control/docs/reference/crds/connection.mdx new file mode 100644 index 00000000..620cfd26 --- /dev/null +++ b/mission-control/docs/reference/crds/connection.mdx @@ -0,0 +1,168 @@ +--- +title: Connection CRD +--- + +# Connection + +The `Connection` CRD allows you to define connections to external systems and services that can be referenced by other resources in Mission Control. + +## Definition + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Connection +metadata: + name: example-connection +spec: + # Type of connection + type: postgres + + # URL for the connection + url: postgres://user:password@host:5432/dbname + + # Optional name that can be used to reference this connection + name: My Database + + # Labels for categorization + labels: + environment: production + tier: database +``` + +## Schema + +The `Connection` resource supports the following fields: + +| Field | Description | +|-------|-------------| +| `spec.type` | Type of the connection (e.g., postgres, http, aws, etc.) | +| `spec.url` | Connection URL or endpoint | +| `spec.name` | Human-readable name for the connection | +| `spec.description` | Description of the connection | +| `spec.labels` | Key-value pairs for categorizing the connection | +| `spec.icon` | Icon URL or name to visually represent the connection | +| `spec.properties` | Additional connection properties | +| `spec.username` | Username for authentication | +| `spec.password` | Password for authentication (should use secretRef instead for production) | +| `spec.secretRef` | Reference to a Kubernetes secret containing credentials | +| `spec.caCert` | CA certificate for TLS validation | +| `spec.clientCert` | Client certificate for mutual TLS | +| `spec.clientKey` | Client key for mutual TLS | +| `spec.insecureSkipVerify` | Skip TLS verification (not recommended for production) | + +## Connection Types + +The Connection CRD supports various connection types including: + +| Type | Description | +|------|-------------| +| `postgres` | PostgreSQL database connection | +| `mysql` | MySQL database connection | +| `mssql` | Microsoft SQL Server connection | +| `mongodb` | MongoDB connection | +| `http` | HTTP/HTTPS endpoint | +| `aws` | AWS service connection | +| `azure` | Azure service connection | +| `gcp` | Google Cloud Platform connection | +| `kubernetes` | Kubernetes cluster connection | +| `prometheus` | Prometheus metrics connection | +| `elastic` | Elasticsearch connection | +| `ldap` | LDAP directory connection | +| `smtp` | Email server connection | +| `redis` | Redis database connection | +| `webhook` | Webhook connection | + +## Examples + +### Database Connection + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Connection +metadata: + name: production-db +spec: + type: postgres + name: Production Database + description: Main production database for the e-commerce platform + url: postgres://postgres:postgres@db.example.com:5432/production + labels: + environment: production + tier: database + secretRef: + name: db-credentials + namespace: database +``` + +### Cloud Provider Connection + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Connection +metadata: + name: aws-production +spec: + type: aws + name: AWS Production + description: Production AWS account + properties: + region: us-west-2 + secretRef: + name: aws-credentials + key: credentials + labels: + environment: production + provider: aws +``` + +### HTTP API Connection + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Connection +metadata: + name: payment-gateway +spec: + type: http + name: Payment Gateway API + description: Connection to external payment processing service + url: https://api.payment-gateway.com/v1 + secretRef: + name: payment-api-key + key: api-key + properties: + headers: + Content-Type: application/json + Accept: application/json + insecureSkipVerify: false + labels: + service: payments + criticality: high +``` + +### Kubernetes Cluster Connection + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Connection +metadata: + name: production-cluster +spec: + type: kubernetes + name: Production Kubernetes Cluster + description: Main production Kubernetes cluster + properties: + server: https://k8s.example.com:6443 + secretRef: + name: kubeconfig + key: config + labels: + environment: production + infrastructure: kubernetes +``` + +## See Also + +- [Canary CRD](./canary.mdx) - Use connections in health checks +- [Component CRD](./component.mdx) - Associate components with connections +- [Playbook CRD](./playbook.mdx) - Use connections in automated workflows \ No newline at end of file diff --git a/mission-control/docs/reference/crds/incident-rule.mdx b/mission-control/docs/reference/crds/incident-rule.mdx new file mode 100644 index 00000000..616ab5d0 --- /dev/null +++ b/mission-control/docs/reference/crds/incident-rule.mdx @@ -0,0 +1,183 @@ +--- +title: IncidentRule CRD +--- + +# IncidentRule + +The `IncidentRule` CRD allows you to define rules for automatically creating, updating, and managing incidents based on events and conditions in your infrastructure. + +## Definition + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: IncidentRule +metadata: + name: example-incident-rule +spec: + # Source of events to process + source: + type: canary + selector: + matchLabels: + app: frontend + + # Conditions that trigger the rule + condition: + status: unhealthy + duration: 10m + + # Incident creation settings + incident: + title: "Frontend Availability Issue" + severity: high + owner: platform-team + labels: + service: frontend + type: availability +``` + +## Schema + +The `IncidentRule` resource supports the following fields: + +| Field | Description | +|-------|-------------| +| `spec.source` | Source configuration for events | +| `spec.source.type` | Type of event source (canary, component, alert, etc.) | +| `spec.source.selector` | Kubernetes label selector for matching sources | +| `spec.condition` | Conditions that trigger the rule | +| `spec.condition.status` | Required status of the source (e.g., unhealthy) | +| `spec.condition.duration` | Time duration condition must be true before triggering | +| `spec.condition.count` | Number of occurrences required to trigger | +| `spec.condition.message` | Message pattern to match | +| `spec.condition.labels` | Labels that must be present on the source | +| `spec.condition.expression` | CEL expression for complex conditions | +| `spec.incident` | Incident configuration | +| `spec.incident.title` | Title template for the incident | +| `spec.incident.description` | Description template for the incident | +| `spec.incident.severity` | Severity level (critical, high, medium, low) | +| `spec.incident.type` | Type classification for the incident | +| `spec.incident.owner` | Default owner for the incident | +| `spec.incident.labels` | Labels to apply to the incident | +| `spec.incident.components` | Components to associate with the incident | +| `spec.incident.playbooks` | Playbooks to trigger when incident is created | +| `spec.incident.responders` | Initial responders to assign | +| `spec.jira` | JIRA integration settings | +| `spec.pagerduty` | PagerDuty integration settings | +| `spec.teams` | Microsoft Teams integration settings | +| `spec.slack` | Slack integration settings | + +## Examples + +### Basic Canary Failure Rule + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: IncidentRule +metadata: + name: api-availability +spec: + source: + type: canary + selector: + matchLabels: + check: api-health + condition: + status: unhealthy + duration: 5m + incident: + title: "API Availability Issue" + severity: high + owner: api-team + labels: + service: api + type: availability +``` + +### Component Health Rule + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: IncidentRule +metadata: + name: database-health +spec: + source: + type: component + selector: + matchLabels: + type: database + tier: production + condition: + status: unhealthy + duration: 2m + incident: + title: "Database Health Issue - {{.component.name}}" + description: "The database component {{.component.name}} is reporting unhealthy status.\n\nLast error: {{.component.status.message}}" + severity: critical + components: + - "{{.component.id}}" + playbooks: + - database-recovery +``` + +### Alert Manager Integration + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: IncidentRule +metadata: + name: prometheus-alerts +spec: + source: + type: alertmanager + selector: + matchLabels: + severity: critical + condition: + status: firing + duration: 1m + incident: + title: "{{.alert.labels.alertname}}" + description: "{{.alert.annotations.description}}" + severity: "{{.alert.labels.severity}}" + labels: + source: prometheus + pagerduty: + integration: primary-pd-service + severity: critical + slack: + channel: "#incidents" + message: "Critical alert triggered: {{.alert.labels.alertname}}" +``` + +### Complex Condition with Expression + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: IncidentRule +metadata: + name: advanced-rule +spec: + source: + type: component + condition: + expression: | + source.status == "unhealthy" && + (source.labels.tier == "production" || source.labels.criticality == "high") && + duration("10m") + incident: + title: "Service Disruption - {{.component.name}}" + severity: high + type: availability + components: + - "{{.component.id}}" + - "{{range .component.dependencies}}{{.id}}{{end}}" +``` + +## See Also + +- [Incident Management](../../incidents/index.mdx) +- [Canary CRD](./canary.mdx) +- [Component CRD](./component.mdx) +- [Playbook CRD](./playbook.mdx) \ No newline at end of file diff --git a/mission-control/docs/reference/crds/index.mdx b/mission-control/docs/reference/crds/index.mdx new file mode 100644 index 00000000..7b3f5459 --- /dev/null +++ b/mission-control/docs/reference/crds/index.mdx @@ -0,0 +1,36 @@ +--- +title: Custom Resource Definitions (CRDs) +--- + +# Custom Resource Definitions + +Mission Control uses several Custom Resource Definitions (CRDs) to provide extended functionality in Kubernetes. These CRDs allow you to define various resources that Mission Control can manage, monitor, and interact with. + +## Available CRDs + +| CRD | Kind | Description | +|-----|------|-------------| +| [Canary](./canary.mdx) | Canary | Define health checks to monitor the availability of services | +| [Component](./component.mdx) | Component | Define components for organizing and visualizing your infrastructure | +| [Topology](./topology.mdx) | Topology | Define relationships and hierarchies between components | +| [Connection](./connection.mdx) | Connection | Define connections to external systems and services | +| [IncidentRule](./incident-rule.mdx) | IncidentRule | Define rules for incident detection and management | +| [Notification](./notification.mdx) | Notification | Define notification channels and rules | +| [NotificationSilence](./notification-silence.mdx) | NotificationSilence | Define periods when notifications should be silenced | +| [PermissionGroup](./permission-group.mdx) | PermissionGroup | Define groups of permissions for RBAC | +| [Permission](./permission.mdx) | Permission | Define individual permissions for RBAC | +| [Playbook](./playbook.mdx) | Playbook | Define automated workflows for operations | +| [ScrapeConfig](./scrape-config.mdx) | ScrapeConfig | Define configurations for scraping metrics | +| [ScrapePlugin](./scrape-plugin.mdx) | ScrapePlugin | Define plugins for scraping custom data sources | + +## Installation + +These CRDs are automatically installed when you deploy Mission Control using the Helm chart. If you need to install them manually, you can use: + +```bash +kubectl apply -f https://github.com/flanksource/mission-control-chart/tree/main/crd-chart/templates +``` + +## Validation + +Mission Control uses OpenAPI validation schemas to ensure that the resources you create are valid. You can find the full schemas in the [duty repository](https://github.com/flanksource/duty/tree/main/schema/openapi). \ No newline at end of file diff --git a/mission-control/docs/reference/crds/notification-silence.mdx b/mission-control/docs/reference/crds/notification-silence.mdx new file mode 100644 index 00000000..5606cd46 --- /dev/null +++ b/mission-control/docs/reference/crds/notification-silence.mdx @@ -0,0 +1,150 @@ +--- +title: NotificationSilence CRD +--- + +# NotificationSilence + +The `NotificationSilence` CRD allows you to define periods when specific notifications should be silenced to prevent alert fatigue and manage maintenance windows. + +## Definition + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: NotificationSilence +metadata: + name: example-silence +spec: + # Time period for the silence + start: "2023-06-01T10:00:00Z" + end: "2023-06-01T14:00:00Z" + + # Selector for events to silence + selector: + components: + - database-cluster + - api-service + labels: + environment: production + + # Reason for the silence + reason: "Scheduled database maintenance" +``` + +## Schema + +The `NotificationSilence` resource supports the following fields: + +| Field | Description | +|-------|-------------| +| `spec.start` | Start time of the silence period (ISO 8601 format) | +| `spec.end` | End time of the silence period (ISO 8601 format) | +| `spec.duration` | Alternative to end time - duration from start time | +| `spec.selector` | Selector for matching events to silence | +| `spec.selector.severity` | Severity levels to silence | +| `spec.selector.labels` | Labels to match on events | +| `spec.selector.components` | Components to silence | +| `spec.selector.expression` | CEL expression for complex matching | +| `spec.reason` | Reason for the silence period | +| `spec.creator` | Person or system that created the silence | +| `spec.comment` | Additional comments about the silence | +| `spec.jira` | Associated JIRA ticket information | +| `spec.createdBy` | User who created the silence | + +## Status + +The NotificationSilence controller updates the resource with status information: + +| Status Field | Description | +|--------------|-------------| +| `status.active` | Whether the silence is currently active | +| `status.created` | Timestamp when the silence was created | +| `status.updated` | Timestamp when the silence was last updated | +| `status.events` | Count of silenced events | + +## Examples + +### Maintenance Window + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: NotificationSilence +metadata: + name: database-maintenance +spec: + start: "2023-06-01T10:00:00Z" + end: "2023-06-01T14:00:00Z" + selector: + components: + - database-cluster + - database-api + labels: + tier: database + reason: "Scheduled database upgrade" + creator: "Database Team" + comment: "Upgrading to PostgreSQL 15" + jira: "MAINT-1234" +``` + +### Silence by Duration + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: NotificationSilence +metadata: + name: api-deployment +spec: + start: "2023-07-15T08:00:00Z" + duration: 2h + selector: + components: + - api-gateway + - auth-service + labels: + environment: production + reason: "API Gateway Deployment" + creator: "DevOps Team" +``` + +### Weekend Maintenance Window + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: NotificationSilence +metadata: + name: weekend-maintenance +spec: + start: "2023-08-05T22:00:00Z" + end: "2023-08-06T06:00:00Z" + selector: + severity: + - low + - medium + expression: "!contains(labels.keys(), 'critical-path')" + reason: "Weekend system maintenance" + creator: "System Administrator" + comment: "Regular weekly maintenance window" +``` + +### Complex Selector with Expression + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: NotificationSilence +metadata: + name: testing-environment +spec: + start: "2023-09-10T00:00:00Z" + end: "2023-09-12T00:00:00Z" + selector: + expression: | + labels.environment == 'testing' && + (severity == 'low' || severity == 'medium') + reason: "Extended testing period" + creator: "QA Team" +``` + +## See Also + +- [Notification CRD](./notification.mdx) +- [Incident Management](../../incidents/index.mdx) +- [Alert Management](../../incidents/alerts.mdx) \ No newline at end of file diff --git a/mission-control/docs/reference/crds/notification.mdx b/mission-control/docs/reference/crds/notification.mdx new file mode 100644 index 00000000..caaea793 --- /dev/null +++ b/mission-control/docs/reference/crds/notification.mdx @@ -0,0 +1,230 @@ +--- +title: Notification CRD +--- + +# Notification + +The `Notification` CRD allows you to configure notification channels and rules for alerts, incidents, and other events in Mission Control. + +## Definition + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Notification +metadata: + name: example-notification +spec: + # Type of notification channel + type: slack + + # Selector for events to match + selector: + severity: critical + labels: + service: api + + # Channel-specific configuration + slack: + channel: "#alerts" + username: "Mission Control" + icon: ":warning:" + + # Templates for notification content + templates: + title: "Critical Alert: {{.alert.name}}" + body: "Service: {{.component.name}}\nStatus: {{.status}}\nMessage: {{.message}}" +``` + +## Schema + +The `Notification` resource supports the following fields: + +| Field | Description | +|-------|-------------| +| `spec.type` | Type of notification channel (slack, email, teams, webhook, etc.) | +| `spec.selector` | Selector for matching events to notify about | +| `spec.selector.severity` | Severity level to match (critical, high, medium, low) | +| `spec.selector.status` | Status to match (healthy, unhealthy, warning, etc.) | +| `spec.selector.labels` | Labels to match on events | +| `spec.selector.components` | Components to match | +| `spec.selector.expression` | CEL expression for complex matching | +| `spec.templates` | Templates for notification content | +| `spec.templates.title` | Template for notification title | +| `spec.templates.body` | Template for notification body | +| `spec.templates.button` | Template for notification action button | +| `spec.templates.footer` | Template for notification footer | +| `spec.slack` | Slack-specific configuration | +| `spec.email` | Email-specific configuration | +| `spec.teams` | Microsoft Teams-specific configuration | +| `spec.webhook` | Webhook-specific configuration | +| `spec.pagerduty` | PagerDuty-specific configuration | +| `spec.connection` | Reference to a Connection resource | +| `spec.throttle` | Throttling configuration | +| `spec.throttle.period` | Time period for throttling (e.g., 1h, 24h) | +| `spec.throttle.count` | Maximum number of notifications in the period | + +## Notification Channel Types + +The Notification CRD supports various channel types: + +| Type | Description | +|------|-------------| +| `slack` | Slack channel notifications | +| `email` | Email notifications | +| `teams` | Microsoft Teams notifications | +| `webhook` | Custom webhook notifications | +| `pagerduty` | PagerDuty incidents | +| `sms` | SMS text messages | +| `discord` | Discord notifications | +| `telegram` | Telegram messages | + +## Examples + +### Slack Channel for Critical Alerts + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Notification +metadata: + name: critical-alerts-slack +spec: + type: slack + selector: + severity: critical + slack: + channel: "#critical-alerts" + username: "Alert Bot" + icon: ":rotating_light:" + templates: + title: ":red_circle: CRITICAL ALERT: {{.alert.name}}" + body: | + *Component:* {{.component.name}} + *Status:* {{.status}} + *Time:* {{.time | formatTime "Jan 02, 15:04:05 MST"}} + *Details:* {{.message}} + + {{if .runbook}}*Runbook:* {{.runbook}}{{end}} +``` + +### Email Notifications for Production Components + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Notification +metadata: + name: production-email-alerts +spec: + type: email + selector: + labels: + environment: production + email: + to: + - "ops-team@example.com" + - "{{.component.owner}}@example.com" + from: "mission-control@example.com" + subject: "[{{.severity | upper}}] Alert: {{.alert.name}}" + templates: + body: | +
Component: {{.component.name}}
+Status: {{.status}}
+Time: {{.time | formatTime "Jan 02, 15:04:05 MST"}}
+Details: {{.message}}
+ + {{if .dashboard}}{{end}} + throttle: + period: 1h + count: 10 +``` + +### Microsoft Teams Notification with Adaptive Card + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Notification +metadata: + name: teams-notifications +spec: + type: teams + selector: + severity: + - high + - critical + teams: + webhookUrl: "https://outlook.office.com/webhook/..." + templates: + title: "{{.severity | title}} Alert: {{.alert.name}}" + body: | + { + "type": "AdaptiveCard", + "body": [ + { + "type": "TextBlock", + "size": "Medium", + "weight": "Bolder", + "text": "{{.severity | title}} Alert: {{.alert.name}}" + }, + { + "type": "FactSet", + "facts": [ + { + "title": "Component", + "value": "{{.component.name}}" + }, + { + "title": "Status", + "value": "{{.status}}" + }, + { + "title": "Time", + "value": "{{.time | formatTime 'Jan 02, 15:04:05 MST'}}" + } + ] + }, + { + "type": "TextBlock", + "text": "{{.message}}", + "wrap": true + } + ], + "actions": [ + { + "type": "Action.OpenUrl", + "title": "View Component", + "url": "{{.component.url}}" + } + ], + "$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "version": "1.2" + } +``` + +### PagerDuty Integration with Connection + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Notification +metadata: + name: pagerduty-critical +spec: + type: pagerduty + selector: + severity: critical + expression: "component.labels.tier == 'production' && status == 'unhealthy'" + connection: pagerduty-service + templates: + title: "{{.component.name}} - {{.alert.name}}" + body: "{{.message}}" + pagerduty: + severity: critical + component: "{{.component.name}}" + group: "{{.component.labels.team}}" + class: "{{.alert.labels.type}}" +``` + +## See Also + +- [NotificationSilence CRD](./notification-silence.mdx) +- [Connection CRD](./connection.mdx) +- [Incident Management](../../incidents/index.mdx) \ No newline at end of file diff --git a/mission-control/docs/reference/crds/permission-group.mdx b/mission-control/docs/reference/crds/permission-group.mdx new file mode 100644 index 00000000..2da5843f --- /dev/null +++ b/mission-control/docs/reference/crds/permission-group.mdx @@ -0,0 +1,234 @@ +--- +title: PermissionGroup CRD +--- + +# PermissionGroup + +The `PermissionGroup` CRD allows you to define groups of permissions for role-based access control (RBAC) in Mission Control. + +## Definition + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: PermissionGroup +metadata: + name: example-permission-group +spec: + # Human-readable name of the permission group + name: API Team Access + + # Description of the permission group + description: Access permissions for the API team + + # List of permissions in this group + permissions: + - name: component.view + description: View component details + resources: + - components + verbs: + - get + - list + - name: component.edit + description: Edit components + resources: + - components + verbs: + - update + - patch +``` + +## Schema + +The `PermissionGroup` resource supports the following fields: + +| Field | Description | +|-------|-------------| +| `spec.name` | Human-readable name of the permission group | +| `spec.description` | Description of the permission group | +| `spec.labels` | Labels to categorize the permission group | +| `spec.icon` | Icon for the permission group | +| `spec.permissions` | List of permissions in this group | +| `spec.permissions[].name` | Name of the permission | +| `spec.permissions[].description` | Description of the permission | +| `spec.permissions[].resources` | Resources the permission applies to | +| `spec.permissions[].verbs` | Actions allowed on the resources | +| `spec.permissions[].resourceNames` | Specific resource names the permission applies to | +| `spec.permissions[].labels` | Label selectors for resources | +| `spec.permissions[].expression` | CEL expression for complex permission rules | +| `spec.groups` | User groups that have this permission group | +| `spec.users` | Individual users that have this permission group | + +## Examples + +### Basic Developer Access + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: PermissionGroup +metadata: + name: developer-access +spec: + name: Developer Access + description: Standard access for developers + icon: code + permissions: + - name: component.view + description: View components + resources: + - components + verbs: + - get + - list + - name: canary.view + description: View canaries + resources: + - canaries + verbs: + - get + - list + - name: incident.view + description: View incidents + resources: + - incidents + verbs: + - get + - list + groups: + - developers +``` + +### Team-Specific Access + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: PermissionGroup +metadata: + name: backend-team +spec: + name: Backend Team Access + description: Access for the backend development team + permissions: + - name: component.manage + description: Manage backend components + resources: + - components + verbs: + - get + - list + - create + - update + - delete + labels: + team: backend + - name: canary.manage + description: Manage backend canaries + resources: + - canaries + verbs: + - get + - list + - create + - update + - delete + labels: + team: backend + groups: + - backend-developers + - backend-ops + users: + - backend-lead@example.com +``` + +### Admin Access + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: PermissionGroup +metadata: + name: admin-access +spec: + name: Admin Access + description: Full administrative access + permissions: + - name: admin.all + description: All administrative functions + resources: + - "*" + verbs: + - "*" + groups: + - system-administrators + users: + - admin@example.com +``` + +### Complex Resource-Specific Permissions + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: PermissionGroup +metadata: + name: devops-team +spec: + name: DevOps Team Access + description: Access for DevOps engineers + permissions: + - name: component.manage + description: Manage all components + resources: + - components + verbs: + - get + - list + - create + - update + - delete + - name: canary.manage + description: Manage canaries + resources: + - canaries + verbs: + - get + - list + - create + - update + - delete + - name: connection.manage + description: Manage connections + resources: + - connections + verbs: + - get + - list + - create + - update + - delete + - name: notification.manage + description: Manage notifications + resources: + - notifications + - notificationsilences + verbs: + - get + - list + - create + - update + - delete + - name: incident.respond + description: Respond to incidents + resources: + - incidents + verbs: + - get + - list + - update + - patch + groups: + - devops-engineers +``` + +## See Also + +- [Permission CRD](./permission.mdx) +- [Role-Based Access Control](../rbac.mdx) \ No newline at end of file diff --git a/mission-control/docs/reference/crds/permission.mdx b/mission-control/docs/reference/crds/permission.mdx new file mode 100644 index 00000000..870c59f7 --- /dev/null +++ b/mission-control/docs/reference/crds/permission.mdx @@ -0,0 +1,205 @@ +--- +title: Permission CRD +--- + +# Permission + +The `Permission` CRD allows you to define individual permissions for role-based access control (RBAC) in Mission Control. + +## Definition + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Permission +metadata: + name: example-permission +spec: + # Human-readable name of the permission + name: View Components + + # Description of the permission + description: Allows viewing component details + + # Resources the permission applies to + resources: + - components + + # Actions allowed on the resources + verbs: + - get + - list + + # User groups that have this permission + groups: + - viewers + - developers +``` + +## Schema + +The `Permission` resource supports the following fields: + +| Field | Description | +|-------|-------------| +| `spec.name` | Human-readable name of the permission | +| `spec.description` | Description of the permission | +| `spec.resources` | Resources the permission applies to | +| `spec.verbs` | Actions allowed on the resources | +| `spec.resourceNames` | Specific resource names the permission applies to | +| `spec.labels` | Label selectors for resources | +| `spec.expression` | CEL expression for complex permission rules | +| `spec.groups` | User groups that have this permission | +| `spec.users` | Individual users that have this permission | + +## Resource Types + +Common resource types include: + +| Resource | Description | +|----------|-------------| +| `components` | Component resources | +| `canaries` | Canary health check resources | +| `connections` | Connection resources | +| `incidents` | Incident resources | +| `notifications` | Notification resources | +| `notificationsilences` | Notification silence resources | +| `playbooks` | Playbook resources | +| `topologies` | Topology resources | + +## Verbs + +Allowed verbs (actions) include: + +| Verb | Description | +|------|-------------| +| `get` | Retrieve a specific resource | +| `list` | List resources | +| `watch` | Watch for changes to resources | +| `create` | Create new resources | +| `update` | Update existing resources | +| `patch` | Partially update resources | +| `delete` | Delete resources | +| `*` | All actions | + +## Examples + +### Basic View Permission + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Permission +metadata: + name: view-components +spec: + name: View Components + description: Allows viewing component details + resources: + - components + verbs: + - get + - list + groups: + - viewers + - developers +``` + +### Resource Management Permission + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Permission +metadata: + name: manage-canaries +spec: + name: Manage Canaries + description: Full management of canary checks + resources: + - canaries + verbs: + - get + - list + - create + - update + - delete + groups: + - operators + users: + - sre-lead@example.com +``` + +### Team-Specific Resource Permission + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Permission +metadata: + name: frontend-components +spec: + name: Manage Frontend Components + description: Manage components related to the frontend + resources: + - components + verbs: + - get + - list + - update + - patch + labels: + team: frontend + groups: + - frontend-team +``` + +### Specific Resource Names + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Permission +metadata: + name: manage-production-db +spec: + name: Manage Production Database + description: Manage the production database component + resources: + - components + resourceNames: + - production-database + - db-replica + verbs: + - get + - update + - patch + groups: + - database-admins +``` + +### Complex Expression + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Permission +metadata: + name: manage-non-production +spec: + name: Manage Non-Production Resources + description: Manage resources in non-production environments + resources: + - components + - canaries + - connections + verbs: + - get + - list + - create + - update + - delete + expression: "resource.labels.environment != 'production'" + groups: + - developers + - testers +``` + +## See Also + +- [PermissionGroup CRD](./permission-group.mdx) +- [Role-Based Access Control](../rbac.mdx) \ No newline at end of file diff --git a/mission-control/docs/reference/crds/playbook.mdx b/mission-control/docs/reference/crds/playbook.mdx new file mode 100644 index 00000000..3337b37f --- /dev/null +++ b/mission-control/docs/reference/crds/playbook.mdx @@ -0,0 +1,325 @@ +--- +title: Playbook CRD +--- + +# Playbook + +The `Playbook` CRD allows you to define automated workflows and runbooks for handling operational tasks, incidents, and maintenance activities. + +## Definition + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Playbook +metadata: + name: example-playbook +spec: + # Human-readable name of the playbook + name: Database Failover + + # Description of the playbook + description: Automated process for database failover + + # Playbook execution steps + steps: + - name: Check Database Status + check: + type: sql + connection: primary-db + query: SELECT pg_is_in_recovery(); + + - name: Trigger Failover + if: $.steps[0].output == false + exec: + connection: primary-db + command: pg_ctl promote +``` + +## Schema + +The `Playbook` resource supports the following fields: + +| Field | Description | +|-------|-------------| +| `spec.name` | Human-readable name of the playbook | +| `spec.description` | Description of the playbook's purpose | +| `spec.icon` | Icon to represent the playbook | +| `spec.labels` | Labels to categorize the playbook | +| `spec.type` | Type classification of the playbook | +| `spec.schedule` | Schedule for automatic execution (cron format) | +| `spec.timeout` | Maximum execution time for the playbook | +| `spec.parameters` | Input parameters for the playbook | +| `spec.steps` | Execution steps of the playbook | +| `spec.steps[].name` | Name of the step | +| `spec.steps[].description` | Description of the step | +| `spec.steps[].if` | Conditional expression for step execution | +| `spec.steps[].exec` | Command execution action | +| `spec.steps[].http` | HTTP request action | +| `spec.steps[].approval` | Human approval action | +| `spec.steps[].kubernetes` | Kubernetes resource action | +| `spec.steps[].check` | Health check action | +| `spec.steps[].alert` | Alert creation/update action | +| `spec.steps[].script` | Script execution action | +| `spec.steps[].playbook` | Nested playbook execution | +| `spec.steps[].template` | Template rendering action | +| `spec.steps[].inputs` | User input collection | +| `spec.steps[].log` | Logging action | +| `spec.steps[].wait` | Wait for a condition | +| `spec.steps[].timeout` | Step-specific timeout | +| `spec.steps[].retries` | Retry configuration | +| `spec.onSuccess` | Actions to execute on successful completion | +| `spec.onFailure` | Actions to execute on failure | + +## Examples + +### Incident Response Playbook + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Playbook +metadata: + name: api-incident-response +spec: + name: API Service Incident Response + description: Automated steps for diagnosing and recovering API service + icon: medkit + type: incident + steps: + - name: Check API Status + check: + type: http + url: https://api.example.com/health + timeout: 5s + + - name: Restart API Service + if: $.steps[0].status == "failed" + kubernetes: + action: restart + resource: deployment + name: api-service + namespace: production + + - name: Verify Recovery + wait: 30s + check: + type: http + url: https://api.example.com/health + timeout: 5s + + - name: Escalate to On-Call + if: $.steps[2].status == "failed" + alert: + severity: critical + title: "API Service Failed to Recover" + description: "Automatic recovery of the API service failed after restart" + assignee: "oncall@example.com" +``` + +### Database Maintenance Playbook + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Playbook +metadata: + name: db-maintenance +spec: + name: Database Maintenance + description: Scheduled database maintenance tasks + type: maintenance + schedule: "0 1 * * 0" # Every Sunday at 1 AM + parameters: + - name: backup + type: boolean + default: true + description: Whether to perform a backup before maintenance + steps: + - name: Pre-maintenance Backup + if: $.parameters.backup == true + exec: + connection: database-server + command: pg_dump -Fc -f /backups/pre_maintenance_$(date +%Y%m%d).dump mydatabase + + - name: Notify Maintenance Start + notification: + channels: + - slack-ops + message: "Database maintenance starting" + + - name: Set Read-Only Mode + exec: + connection: database-server + command: psql -c "ALTER SYSTEM SET default_transaction_read_only = on;" + + - name: Run VACUUM ANALYZE + exec: + connection: database-server + command: psql -c "VACUUM ANALYZE;" + + - name: Run Index Maintenance + exec: + connection: database-server + command: psql -f /scripts/reindex.sql + + - name: Restore Read-Write Mode + exec: + connection: database-server + command: psql -c "ALTER SYSTEM SET default_transaction_read_only = off;" + + - name: Reload Configuration + exec: + connection: database-server + command: psql -c "SELECT pg_reload_conf();" + + - name: Verify Database Health + check: + type: sql + connection: database + query: "SELECT 1;" + onSuccess: + notification: + channels: + - slack-ops + message: "Database maintenance completed successfully" + onFailure: + notification: + channels: + - slack-ops + - pagerduty-dba + message: "Database maintenance failed: {{.error}}" +``` + +### Interactive Approval Workflow + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Playbook +metadata: + name: production-deployment +spec: + name: Production Deployment + description: Workflow for deploying to production with approvals + type: deployment + parameters: + - name: version + type: string + required: true + description: Version to deploy + steps: + - name: Deploy to Staging + kubernetes: + action: apply + manifest: | + apiVersion: apps/v1 + kind: Deployment + metadata: + name: app-staging + namespace: staging + spec: + template: + spec: + containers: + - name: app + image: myapp:{{$.parameters.version}} + + - name: Run Integration Tests + exec: + connection: ci-server + command: run-tests --env staging + + - name: Request Production Approval + approval: + title: "Approve Production Deployment" + description: "Version {{$.parameters.version}} is ready for production. Tests passed in staging." + approvers: + - team-leads + - operations + requiredApprovals: 2 + timeout: 24h + + - name: Deploy to Production + if: $.steps[2].approved == true + kubernetes: + action: apply + manifest: | + apiVersion: apps/v1 + kind: Deployment + metadata: + name: app + namespace: production + spec: + template: + spec: + containers: + - name: app + image: myapp:{{$.parameters.version}} + + - name: Verify Production + wait: 2m + check: + type: http + url: https://app.example.com/health + timeout: 10s +``` + +### Complex Conditional Workflow + +```yaml +apiVersion: mission-control.flanksource.com/v1 +kind: Playbook +metadata: + name: scaling-workflow +spec: + name: Auto-scaling Workflow + description: Dynamic scaling based on system metrics + type: operations + steps: + - name: Check CPU Usage + check: + type: prometheus + connection: monitoring + query: avg(container_cpu_usage_seconds_total{namespace="production"}) + + - name: Check Memory Usage + check: + type: prometheus + connection: monitoring + query: avg(container_memory_usage_bytes{namespace="production"}) + + - name: Scale Up Workers + if: > + $.steps[0].output > 0.8 || + $.steps[1].output > 0.85 + kubernetes: + action: scale + resource: deployment + name: workers + namespace: production + replicas: 10 + + - name: Scale Down Workers + if: > + $.steps[0].output < 0.3 && + $.steps[1].output < 0.4 + kubernetes: + action: scale + resource: deployment + name: workers + namespace: production + replicas: 3 + + - name: Notify Operations + if: $.steps[2].status == "success" || $.steps[3].status == "success" + notification: + channels: + - slack-ops + message: > + Automatic scaling applied: + {{if eq $.steps[2].status "success"}}Scaled UP to 10 replicas{{end}} + {{if eq $.steps[3].status "success"}}Scaled DOWN to 3 replicas{{end}} +``` + +## See Also + +- [Playbook Documentation](../playbooks/index.mdx) +- [Scripting](../scripting/index.mdx) +- [Incident Management](../../incidents/index.mdx) \ No newline at end of file diff --git a/mission-control/docs/reference/crds/scrape-config.mdx b/mission-control/docs/reference/crds/scrape-config.mdx new file mode 100644 index 00000000..71593f80 --- /dev/null +++ b/mission-control/docs/reference/crds/scrape-config.mdx @@ -0,0 +1,203 @@ +--- +title: ScrapeConfig CRD +--- + +# ScrapeConfig + +The `ScrapeConfig` CRD allows you to define configurations for scraping data from various sources to populate components, relationships, and other resources in Mission Control. + +## Definition + +```yaml +apiVersion: configs.flanksource.com/v1 +kind: ScrapeConfig +metadata: + name: example-scrape-config +spec: + # Source to scrape data from + source: + type: kubernetes + connection: production-cluster + + # How to transform the scraped data + transform: + components: + - name: "{{.metadata.name}}" + type: "kubernetes.{{.kind}}" + labels: + namespace: "{{.metadata.namespace}}" +``` + +## Schema + +The `ScrapeConfig` resource supports the following fields: + +| Field | Description | +|-------|-------------| +| `spec.schedule` | Schedule for the scrape job (cron format) | +| `spec.source` | Source configuration for data scraping | +| `spec.source.type` | Type of data source (kubernetes, aws, azure, etc.) | +| `spec.source.connection` | Connection to use for the source | +| `spec.source.resource` | Resource type to scrape | +| `spec.source.query` | Query to filter resources | +| `spec.source.selector` | Selector to filter resources | +| `spec.transform` | Transformation configuration | +| `spec.transform.components` | Component transformation rules | +| `spec.transform.relationships` | Relationship transformation rules | +| `spec.transform.properties` | Property transformation rules | +| `spec.transform.labels` | Label transformation rules | +| `spec.transform.template` | Custom transformation template | +| `spec.transform.script` | Custom transformation script | +| `spec.plugins` | Plugins to use for transformation | +| `spec.timeout` | Timeout for the scrape job | +| `spec.backoff` | Backoff configuration for retries | + +## Examples + +### Kubernetes Resources Scrape + +```yaml +apiVersion: configs.flanksource.com/v1 +kind: ScrapeConfig +metadata: + name: k8s-deployments +spec: + schedule: "*/10 * * * *" # Every 10 minutes + source: + type: kubernetes + connection: production-cluster + resource: deployments + transform: + components: + - name: "{{.metadata.name}}" + type: kubernetes.deployment + icon: kubernetes + description: "Kubernetes Deployment in {{.metadata.namespace}}" + labels: + namespace: "{{.metadata.namespace}}" + app: "{{index .metadata.labels \"app\" | default \"\"}}" + properties: + replicas: "{{.spec.replicas}}" + strategy: "{{.spec.strategy.type}}" + selector: "{{.spec.selector | toJson}}" + image: "{{(index .spec.template.spec.containers 0).image}}" +``` + +### AWS EC2 Instances Scrape + +```yaml +apiVersion: configs.flanksource.com/v1 +kind: ScrapeConfig +metadata: + name: aws-ec2-instances +spec: + schedule: "*/30 * * * *" # Every 30 minutes + source: + type: aws + connection: aws-production + resource: ec2 + transform: + components: + - name: "EC2 {{.InstanceId}}" + type: aws.ec2 + icon: ec2 + description: "{{tags.Name | default .InstanceId}}" + labels: + region: "{{.Region}}" + type: "{{.InstanceType}}" + environment: "{{index .Tags \"Environment\" | default \"\"}}" + properties: + state: "{{.State.Name}}" + privateIp: "{{.PrivateIpAddress}}" + publicIp: "{{.PublicIpAddress | default \"\"}}" + launchTime: "{{.LaunchTime | formatTime}}" + securityGroups: "{{range .SecurityGroups}}{{.GroupName}}, {{end}}" + ami: "{{.ImageId}}" +``` + +### Database Schema Scrape + +```yaml +apiVersion: configs.flanksource.com/v1 +kind: ScrapeConfig +metadata: + name: postgres-schema +spec: + schedule: "0 */6 * * *" # Every 6 hours + source: + type: sql + connection: production-db + query: | + SELECT + t.table_name, + t.table_schema, + obj_description((t.table_schema || '.' || t.table_name)::regclass) as description, + (SELECT COUNT(*) FROM information_schema.columns c WHERE c.table_name = t.table_name AND c.table_schema = t.table_schema) as column_count + FROM information_schema.tables t + WHERE t.table_schema NOT IN ('pg_catalog', 'information_schema') + ORDER BY t.table_schema, t.table_name + transform: + components: + - name: "{{.table_name}}" + type: database.table + icon: table + description: "{{.description | default (printf \"Table %s.%s\" .table_schema .table_name)}}" + labels: + schema: "{{.table_schema}}" + database: "production" + properties: + columnCount: "{{.column_count}}" +``` + +### API Service Scrape with Relationships + +```yaml +apiVersion: configs.flanksource.com/v1 +kind: ScrapeConfig +metadata: + name: api-services +spec: + schedule: "*/15 * * * *" # Every 15 minutes + source: + type: http + connection: service-registry + url: /api/services + transform: + components: + - name: "{{.name}}" + type: service.api + icon: api + description: "{{.description}}" + labels: + version: "{{.version}}" + team: "{{.team}}" + environment: "{{.environment}}" + properties: + endpoint: "{{.endpoint}}" + status: "{{.status}}" + lastDeployed: "{{.lastDeployTime | formatTime}}" + relationships: + - source: + selector: + id: "{{.name}}" + target: + selector: + id: "{{.database}}" + relationship: dependsOn + properties: + connectionString: "{{.connectionDetails.type}}://{{.connectionDetails.host}}:{{.connectionDetails.port}}/{{.connectionDetails.database}}" + - source: + selector: + id: "{{.name}}" + target: + selector: + id: "{{range .dependencies}}{{.}},{{end}}" + relationship: dependsOn +``` + +## See Also + +- [ScrapePlugin CRD](./scrape-plugin.mdx) +- [Component CRD](./component.mdx) +- [Topology CRD](./topology.mdx) +- [Connection CRD](./connection.mdx) \ No newline at end of file diff --git a/mission-control/docs/reference/crds/scrape-plugin.mdx b/mission-control/docs/reference/crds/scrape-plugin.mdx new file mode 100644 index 00000000..3f318199 --- /dev/null +++ b/mission-control/docs/reference/crds/scrape-plugin.mdx @@ -0,0 +1,458 @@ +--- +title: ScrapePlugin CRD +--- + +# ScrapePlugin + +The `ScrapePlugin` CRD allows you to define custom plugins for scraping and transforming data from various sources into components, relationships, and other resources in Mission Control. + +## Definition + +```yaml +apiVersion: configs.flanksource.com/v1 +kind: ScrapePlugin +metadata: + name: example-scrape-plugin +spec: + # Script that defines the plugin functionality + script: | + function fetch(source) { + // Custom data fetching logic + return fetchFromExternalAPI(source.url, source.headers); + } + + function transform(data) { + // Custom data transformation logic + return data.map(item => ({ + name: item.name, + type: 'custom.service', + properties: { + status: item.status, + version: item.version + } + })); + } +``` + +## Schema + +The `ScrapePlugin` resource supports the following fields: + +| Field | Description | +|-------|-------------| +| `spec.name` | Name of the plugin | +| `spec.description` | Description of the plugin's purpose | +| `spec.script` | JavaScript/TypeScript code for the plugin | +| `spec.language` | Script language (javascript or typescript) | +| `spec.dependencies` | External module dependencies | +| `spec.source` | Default source configuration | +| `spec.transform` | Default transformation configuration | +| `spec.parameters` | Plugin parameters and their defaults | +| `spec.schedule` | Default schedule for the plugin | +| `spec.timeout` | Default timeout for the plugin | + +## Plugin Script Functions + +A ScrapePlugin script can implement several functions: + +| Function | Description | +|----------|-------------| +| `fetch(source)` | Fetches data from the source | +| `transform(data, source)` | Transforms the fetched data | +| `validate(config)` | Validates the scrape configuration | +| `components(data, source)` | Generates components from the data | +| `relationships(data, source)` | Generates relationships from the data | +| `properties(data, source)` | Generates properties from the data | +| `labels(data, source)` | Generates labels from the data | +| `metrics(data, source)` | Generates metrics from the data | + +## Examples + +### Custom API Integration Plugin + +```yaml +apiVersion: configs.flanksource.com/v1 +kind: ScrapePlugin +metadata: + name: custom-api-plugin +spec: + name: Custom API Integration + description: Scrapes data from a custom API service + language: javascript + parameters: + - name: apiKey + type: string + required: true + description: API Key for authentication + - name: region + type: string + default: us-east-1 + description: Region to fetch data from + script: | + async function fetch(source) { + const apiKey = source.parameters.apiKey; + const region = source.parameters.region || 'us-east-1'; + const response = await fetch(`https://api.example.com/${region}/services`, { + headers: { + 'Authorization': `Bearer ${apiKey}`, + 'Content-Type': 'application/json' + } + }); + + if (!response.ok) { + throw new Error(`API request failed: ${response.statusText}`); + } + + return await response.json(); + } + + function components(data, source) { + return data.services.map(service => ({ + name: service.name, + type: 'custom.service', + icon: service.type === 'database' ? 'database' : 'service', + description: service.description, + labels: { + type: service.type, + region: source.parameters.region, + environment: service.environment + }, + properties: { + status: service.status, + version: service.version, + endpoint: service.endpoint, + lastUpdated: service.lastUpdatedAt + } + })); + } + + function relationships(data, source) { + const relationships = []; + + data.services.forEach(service => { + if (service.dependencies) { + service.dependencies.forEach(dep => { + relationships.push({ + source: { + selector: { + id: service.name + } + }, + target: { + selector: { + id: dep.name + } + }, + relationship: 'dependsOn', + properties: { + type: dep.type, + critical: dep.critical ? 'true' : 'false' + } + }); + }); + } + }); + + return relationships; + } +``` + +### Log Analysis Plugin + +```yaml +apiVersion: configs.flanksource.com/v1 +kind: ScrapePlugin +metadata: + name: log-analysis-plugin +spec: + name: Log Analysis + description: Analyzes log files to extract component health and relationships + language: javascript + dependencies: + - lodash + script: | + const _ = require('lodash'); + + async function fetch(source) { + const logs = await readLogsFromSource(source.path, source.patterns); + return parseLogEntries(logs); + } + + function readLogsFromSource(path, patterns) { + // Implementation to read logs from files, S3, etc. + // This is a placeholder - actual implementation would depend on source type + return []; + } + + function parseLogEntries(logs) { + // Parse log entries into structured data + // This is a placeholder + return logs.map(log => { + try { + return JSON.parse(log); + } catch (e) { + return { + raw: log, + parsed: false, + timestamp: extractTimestamp(log) + }; + } + }); + } + + function extractTimestamp(logLine) { + // Extract timestamp from log line + // This is a placeholder + const match = logLine.match(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/); + return match ? match[0] : null; + } + + function components(data, source) { + // Group logs by component + const componentLogs = _.groupBy(data, log => log.component || 'unknown'); + + return Object.entries(componentLogs).map(([componentName, logs]) => { + // Calculate error rate and other metrics + const errorLogs = logs.filter(log => log.level === 'error' || log.level === 'fatal'); + const errorRate = errorLogs.length / logs.length; + + return { + name: componentName, + type: 'application', + status: errorRate > 0.1 ? 'unhealthy' : 'healthy', + properties: { + logCount: logs.length, + errorCount: errorLogs.length, + errorRate: errorRate.toFixed(2), + lastSeen: _.maxBy(logs, 'timestamp')?.timestamp + } + }; + }); + } + + function relationships(data, source) { + const relationships = []; + + // Find service calls in logs + data.forEach(log => { + if (log.caller && log.callee) { + relationships.push({ + source: { + selector: { + id: log.caller + } + }, + target: { + selector: { + id: log.callee + } + }, + relationship: 'calls', + properties: { + latency: log.latency, + status: log.status, + timestamp: log.timestamp + } + }); + } + }); + + return relationships; + } +``` + +### Infrastructure Discovery Plugin + +```yaml +apiVersion: configs.flanksource.com/v1 +kind: ScrapePlugin +metadata: + name: infrastructure-discovery +spec: + name: Infrastructure Discovery + description: Discovers and maps infrastructure components across multiple platforms + language: typescript + parameters: + - name: depth + type: number + default: 2 + description: Depth of relationship discovery + script: | + interface DiscoverySource { + type: string; + connection: string; + parameters: { + depth: number; + [key: string]: any; + }; + } + + interface Component { + id?: string; + name: string; + type: string; + icon?: string; + description?: string; + labels?: Record