feat: add crawl4ai

This commit is contained in:
Yuris Cakranegara
2025-06-30 22:22:08 +10:00
parent 82d8ca0463
commit a63f144bf1
5 changed files with 222 additions and 7 deletions

View File

@@ -0,0 +1,18 @@
# Crawl4AI Configuration
PORT=11235
# API Keys for LLM providers used by Crawl4AI
# OpenAI API key for GPT models
OPENAI_API_KEY=
# DeepSeek API key
DEEPSEEK_API_KEY=
# Anthropic API key for Claude models
ANTHROPIC_API_KEY=
# Groq API key
GROQ_API_KEY=
# Together API key
TOGETHER_API_KEY=
# Mistral API key
MISTRAL_API_KEY=
# Google Gemini API token
GEMINI_API_TOKEN=

View File

@@ -0,0 +1,91 @@
# Crawl4AI Module
This module deploys [Crawl4AI](https://github.com/unclecode/crawl4ai), a web crawling and AI analysis tool, as a Docker container in the homelab environment.
## Overview
The Crawl4AI module:
- Deploys the `unclecode/crawl4ai` Docker container
- Configures resource limits and reservations for memory
- Provides shared memory access for Chrome/Chromium performance
- Supports custom configuration through volume mounting
- Provides service definition for integration with networking modules
## Usage
```hcl
module "crawl4ai" {
source = "./modules/20-services-apps/crawl4ai"
volume_path = "/path/to/volumes"
networks = ["homelab-network"]
}
```
## Variables
| Variable | Description | Type | Default |
| --------------------- | ------------------------------------------------- | -------------- | ----------- |
| `image_tag` | Tag of the Crawl4AI image to use | `string` | `"latest"` |
| `volume_path` | Host path for Crawl4AI data volumes | `string` | - |
| `networks` | List of networks to attach the container to | `list(string)` | `[]` |
## Outputs
| Output | Description |
| -------------------- | ---------------------------------------------------------- |
| `service_definition` | Service definition for integration with networking modules |
## Service Definition
This module outputs a service definition that is used by the networking modules to expose the service.
```hcl
{
name = "crawl4ai"
primary_port = 11235
endpoint = "http://crawl4ai:11235"
}
```
## Environment Variables
Crawl4AI requires API keys for various LLM providers. These are configured through a `.env` file in the module directory. You should create this file based on the provided `.env.example` template:
- `OPENAI_API_KEY`: OpenAI API key
- `DEEPSEEK_API_KEY`: DeepSeek API key
- `ANTHROPIC_API_KEY`: Anthropic API key
- `GROQ_API_KEY`: Groq API key
- `TOGETHER_API_KEY`: Together API key
- `MISTRAL_API_KEY`: Mistral API key
- `GEMINI_API_TOKEN`: Gemini API token
## Configuration
Crawl4AI requires a custom configuration file. This is mounted from `${volume_path}/crawl4ai/config.yml` to `/app/config.yml` in the container.
## Ports
Crawl4AI exposes one port, which is mapped to host ports defined in the `.env` file:
1. Frontend (port 11235) - The main web interface for accessing games
## Example Integration in Main Configuration
```hcl
module "crawl4ai" {
source = "./modules/20-services-apps/crawl4ai"
volume_path = module.system_globals.volume_host
networks = [module.services.homelab_docker_network_name]
memory_limit = 8192 # 8GB if you need more memory
}
# The service definition is automatically included in the services output
module "services" {
source = "./modules/services"
# ...
service_definitions = [
module.crawl4ai.service_definition,
# Other service definitions
]
}
```

View File

@@ -0,0 +1,99 @@
terraform {
required_providers {
dotenv = {
source = "germanbrew/dotenv"
}
}
}
variable "image_tag" {
description = "Tag of the Crawl4AI image to use"
type = string
default = "latest"
}
variable "volume_path" {
description = "Host path for Crawl4AI data volumes"
type = string
}
variable "networks" {
description = "List of networks to which the container should be attached"
type = list(string)
default = []
}
locals {
container_name = "crawl4ai"
image = "unclecode/crawl4ai"
image_tag = var.image_tag
monitoring = true
service_port = provider::dotenv::get_by_key("PORT", local.env_file)
env_file = "${path.module}/.env"
# Define volumes
default_volumes = [
{
container_path = "/dev/shm"
host_path = "/dev/shm"
read_only = false
},
{
container_path = "/app/config.yml"
host_path = "${var.volume_path}/config.yml"
read_only = false
}
]
# Define ports
ports = [
{
internal = local.service_port
external = local.service_port
protocol = "tcp"
}
]
# Environment variables
env_vars = {
OPENAI_API_KEY = provider::dotenv::get_by_key("OPENAI_API_KEY", local.env_file)
DEEPSEEK_API_KEY = provider::dotenv::get_by_key("DEEPSEEK_API_KEY", local.env_file)
ANTHROPIC_API_KEY = provider::dotenv::get_by_key("ANTHROPIC_API_KEY", local.env_file)
GROQ_API_KEY = provider::dotenv::get_by_key("GROQ_API_KEY", local.env_file)
TOGETHER_API_KEY = provider::dotenv::get_by_key("TOGETHER_API_KEY", local.env_file)
MISTRAL_API_KEY = provider::dotenv::get_by_key("MISTRAL_API_KEY", local.env_file)
GEMINI_API_TOKEN = provider::dotenv::get_by_key("GEMINI_API_TOKEN", local.env_file)
}
# Healthcheck configuration
healthcheck = {
test = ["CMD", "curl", "-f", "http://localhost:${local.service_port}/health"]
interval = "30s"
timeout = "10s"
retries = 3
start_period = "40s"
}
}
module "crawl4ai" {
source = "../../10-services-generic/docker-service"
container_name = local.container_name
image = local.image
tag = local.image_tag
volumes = local.default_volumes
ports = local.ports
env_vars = local.env_vars
networks = var.networks
monitoring = local.monitoring
healthcheck = local.healthcheck
user = "appuser"
}
output "service_definition" {
description = "General service definition with optional ingress configuration"
value = {
name = local.container_name
primary_port = local.service_port
endpoint = "http://${local.container_name}:${local.service_port}"
}
}

View File

@@ -36,6 +36,12 @@ module "calibre" {
networks = [module.homelab_docker_network.name]
}
module "crawl4ai" {
source = "${local.module_dir}/20-services-apps/crawl4ai"
volume_path = "${local.volume_host}/crawl4ai"
networks = [module.homelab_docker_network.name]
}
module "emulatorjs" {
source = "${local.module_dir}/20-services-apps/emulatorjs"
volume_path = "${local.volume_host}/emulatorjs"
@@ -53,6 +59,12 @@ module "linkwarden" {
networks = [module.homelab_docker_network.name]
}
module "n8n" {
source = "${local.module_dir}/20-services-apps/n8n"
volume_path = "${local.volume_host}/n8n"
networks = [module.homelab_docker_network.name]
}
module "nocodb" {
source = "${local.module_dir}/20-services-apps/nocodb"
volume_path = "${local.volume_host}/nocodb"
@@ -77,12 +89,6 @@ module "pterodactyl_wings" {
networks = [module.homelab_docker_network.name]
}
module "n8n" {
source = "${local.module_dir}/20-services-apps/n8n"
volume_path = "${local.volume_host}/n8n"
networks = [module.homelab_docker_network.name]
}
module "searxng" {
source = "${local.module_dir}/20-services-apps/searxng"
volume_path = "${local.volume_host}/searxng"

View File

@@ -7,14 +7,15 @@ output "service_definitions" {
module.actualbudget.service_definition,
module.affine.service_definition,
module.calibre.service_definition,
module.crawl4ai.service_definition,
module.emulatorjs.service_definition,
module.glance.service_definition,
module.linkwarden.service_definition,
module.n8n.service_definition,
module.nocodb.service_definition,
module.ntfy.service_definition,
module.pterodactyl_wings.service_definition,
module.pterodactyl_panel.service_definition,
module.n8n.service_definition,
module.searxng.service_definition
]
}