diff --git a/modules/20-services-apps/crawl4ai/.env.example b/modules/20-services-apps/crawl4ai/.env.example new file mode 100644 index 0000000..c1c3c4c --- /dev/null +++ b/modules/20-services-apps/crawl4ai/.env.example @@ -0,0 +1,18 @@ +# Crawl4AI Configuration +PORT=11235 + +# API Keys for LLM providers used by Crawl4AI +# OpenAI API key for GPT models +OPENAI_API_KEY= +# DeepSeek API key +DEEPSEEK_API_KEY= +# Anthropic API key for Claude models +ANTHROPIC_API_KEY= +# Groq API key +GROQ_API_KEY= +# Together API key +TOGETHER_API_KEY= +# Mistral API key +MISTRAL_API_KEY= +# Google Gemini API token +GEMINI_API_TOKEN= diff --git a/modules/20-services-apps/crawl4ai/README.md b/modules/20-services-apps/crawl4ai/README.md new file mode 100644 index 0000000..71e3812 --- /dev/null +++ b/modules/20-services-apps/crawl4ai/README.md @@ -0,0 +1,91 @@ +# Crawl4AI Module + +This module deploys [Crawl4AI](https://github.com/unclecode/crawl4ai), a web crawling and AI analysis tool, as a Docker container in the homelab environment. + +## Overview + +The Crawl4AI module: + +- Deploys the `unclecode/crawl4ai` Docker container +- Configures resource limits and reservations for memory +- Provides shared memory access for Chrome/Chromium performance +- Supports custom configuration through volume mounting +- Provides service definition for integration with networking modules + +## Usage + +```hcl +module "crawl4ai" { + source = "./modules/20-services-apps/crawl4ai" + volume_path = "/path/to/volumes" + networks = ["homelab-network"] +} +``` + +## Variables + +| Variable | Description | Type | Default | +| --------------------- | ------------------------------------------------- | -------------- | ----------- | +| `image_tag` | Tag of the Crawl4AI image to use | `string` | `"latest"` | +| `volume_path` | Host path for Crawl4AI data volumes | `string` | - | +| `networks` | List of networks to attach the container to | `list(string)` | `[]` | + +## Outputs + +| Output | Description | +| -------------------- | ---------------------------------------------------------- | +| `service_definition` | Service definition for integration with networking modules | + +## Service Definition + +This module outputs a service definition that is used by the networking modules to expose the service. + +```hcl +{ + name = "crawl4ai" + primary_port = 11235 + endpoint = "http://crawl4ai:11235" +} +``` + +## Environment Variables + +Crawl4AI requires API keys for various LLM providers. These are configured through a `.env` file in the module directory. You should create this file based on the provided `.env.example` template: + +- `OPENAI_API_KEY`: OpenAI API key +- `DEEPSEEK_API_KEY`: DeepSeek API key +- `ANTHROPIC_API_KEY`: Anthropic API key +- `GROQ_API_KEY`: Groq API key +- `TOGETHER_API_KEY`: Together API key +- `MISTRAL_API_KEY`: Mistral API key +- `GEMINI_API_TOKEN`: Gemini API token + +## Configuration + +Crawl4AI requires a custom configuration file. This is mounted from `${volume_path}/crawl4ai/config.yml` to `/app/config.yml` in the container. + +## Ports + +Crawl4AI exposes one port, which is mapped to host ports defined in the `.env` file: +1. Frontend (port 11235) - The main web interface for accessing games + +## Example Integration in Main Configuration + +```hcl +module "crawl4ai" { + source = "./modules/20-services-apps/crawl4ai" + volume_path = module.system_globals.volume_host + networks = [module.services.homelab_docker_network_name] + memory_limit = 8192 # 8GB if you need more memory +} + +# The service definition is automatically included in the services output +module "services" { + source = "./modules/services" + # ... + service_definitions = [ + module.crawl4ai.service_definition, + # Other service definitions + ] +} +``` diff --git a/modules/20-services-apps/crawl4ai/main.tf b/modules/20-services-apps/crawl4ai/main.tf new file mode 100644 index 0000000..61200be --- /dev/null +++ b/modules/20-services-apps/crawl4ai/main.tf @@ -0,0 +1,99 @@ +terraform { + required_providers { + dotenv = { + source = "germanbrew/dotenv" + } + } +} + +variable "image_tag" { + description = "Tag of the Crawl4AI image to use" + type = string + default = "latest" +} + +variable "volume_path" { + description = "Host path for Crawl4AI data volumes" + type = string +} + +variable "networks" { + description = "List of networks to which the container should be attached" + type = list(string) + default = [] +} + +locals { + container_name = "crawl4ai" + image = "unclecode/crawl4ai" + image_tag = var.image_tag + monitoring = true + service_port = provider::dotenv::get_by_key("PORT", local.env_file) + env_file = "${path.module}/.env" + + # Define volumes + default_volumes = [ + { + container_path = "/dev/shm" + host_path = "/dev/shm" + read_only = false + }, + { + container_path = "/app/config.yml" + host_path = "${var.volume_path}/config.yml" + read_only = false + } + ] + + # Define ports + ports = [ + { + internal = local.service_port + external = local.service_port + protocol = "tcp" + } + ] + + # Environment variables + env_vars = { + OPENAI_API_KEY = provider::dotenv::get_by_key("OPENAI_API_KEY", local.env_file) + DEEPSEEK_API_KEY = provider::dotenv::get_by_key("DEEPSEEK_API_KEY", local.env_file) + ANTHROPIC_API_KEY = provider::dotenv::get_by_key("ANTHROPIC_API_KEY", local.env_file) + GROQ_API_KEY = provider::dotenv::get_by_key("GROQ_API_KEY", local.env_file) + TOGETHER_API_KEY = provider::dotenv::get_by_key("TOGETHER_API_KEY", local.env_file) + MISTRAL_API_KEY = provider::dotenv::get_by_key("MISTRAL_API_KEY", local.env_file) + GEMINI_API_TOKEN = provider::dotenv::get_by_key("GEMINI_API_TOKEN", local.env_file) + } + + # Healthcheck configuration + healthcheck = { + test = ["CMD", "curl", "-f", "http://localhost:${local.service_port}/health"] + interval = "30s" + timeout = "10s" + retries = 3 + start_period = "40s" + } +} + +module "crawl4ai" { + source = "../../10-services-generic/docker-service" + container_name = local.container_name + image = local.image + tag = local.image_tag + volumes = local.default_volumes + ports = local.ports + env_vars = local.env_vars + networks = var.networks + monitoring = local.monitoring + healthcheck = local.healthcheck + user = "appuser" +} + +output "service_definition" { + description = "General service definition with optional ingress configuration" + value = { + name = local.container_name + primary_port = local.service_port + endpoint = "http://${local.container_name}:${local.service_port}" + } +} diff --git a/services/main.tf b/services/main.tf index 2aa4620..4ec6c6b 100644 --- a/services/main.tf +++ b/services/main.tf @@ -36,6 +36,12 @@ module "calibre" { networks = [module.homelab_docker_network.name] } +module "crawl4ai" { + source = "${local.module_dir}/20-services-apps/crawl4ai" + volume_path = "${local.volume_host}/crawl4ai" + networks = [module.homelab_docker_network.name] +} + module "emulatorjs" { source = "${local.module_dir}/20-services-apps/emulatorjs" volume_path = "${local.volume_host}/emulatorjs" @@ -53,6 +59,12 @@ module "linkwarden" { networks = [module.homelab_docker_network.name] } +module "n8n" { + source = "${local.module_dir}/20-services-apps/n8n" + volume_path = "${local.volume_host}/n8n" + networks = [module.homelab_docker_network.name] +} + module "nocodb" { source = "${local.module_dir}/20-services-apps/nocodb" volume_path = "${local.volume_host}/nocodb" @@ -77,12 +89,6 @@ module "pterodactyl_wings" { networks = [module.homelab_docker_network.name] } -module "n8n" { - source = "${local.module_dir}/20-services-apps/n8n" - volume_path = "${local.volume_host}/n8n" - networks = [module.homelab_docker_network.name] -} - module "searxng" { source = "${local.module_dir}/20-services-apps/searxng" volume_path = "${local.volume_host}/searxng" diff --git a/services/outputs.tf b/services/outputs.tf index 25ff714..0726a0f 100644 --- a/services/outputs.tf +++ b/services/outputs.tf @@ -7,14 +7,15 @@ output "service_definitions" { module.actualbudget.service_definition, module.affine.service_definition, module.calibre.service_definition, + module.crawl4ai.service_definition, module.emulatorjs.service_definition, module.glance.service_definition, module.linkwarden.service_definition, + module.n8n.service_definition, module.nocodb.service_definition, module.ntfy.service_definition, module.pterodactyl_wings.service_definition, module.pterodactyl_panel.service_definition, - module.n8n.service_definition, module.searxng.service_definition ] }