feat: add crawl4ai
This commit is contained in:
18
modules/20-services-apps/crawl4ai/.env.example
Normal file
18
modules/20-services-apps/crawl4ai/.env.example
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Crawl4AI Configuration
|
||||||
|
PORT=11235
|
||||||
|
|
||||||
|
# API Keys for LLM providers used by Crawl4AI
|
||||||
|
# OpenAI API key for GPT models
|
||||||
|
OPENAI_API_KEY=
|
||||||
|
# DeepSeek API key
|
||||||
|
DEEPSEEK_API_KEY=
|
||||||
|
# Anthropic API key for Claude models
|
||||||
|
ANTHROPIC_API_KEY=
|
||||||
|
# Groq API key
|
||||||
|
GROQ_API_KEY=
|
||||||
|
# Together API key
|
||||||
|
TOGETHER_API_KEY=
|
||||||
|
# Mistral API key
|
||||||
|
MISTRAL_API_KEY=
|
||||||
|
# Google Gemini API token
|
||||||
|
GEMINI_API_TOKEN=
|
||||||
91
modules/20-services-apps/crawl4ai/README.md
Normal file
91
modules/20-services-apps/crawl4ai/README.md
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
# Crawl4AI Module
|
||||||
|
|
||||||
|
This module deploys [Crawl4AI](https://github.com/unclecode/crawl4ai), a web crawling and AI analysis tool, as a Docker container in the homelab environment.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The Crawl4AI module:
|
||||||
|
|
||||||
|
- Deploys the `unclecode/crawl4ai` Docker container
|
||||||
|
- Configures resource limits and reservations for memory
|
||||||
|
- Provides shared memory access for Chrome/Chromium performance
|
||||||
|
- Supports custom configuration through volume mounting
|
||||||
|
- Provides service definition for integration with networking modules
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
module "crawl4ai" {
|
||||||
|
source = "./modules/20-services-apps/crawl4ai"
|
||||||
|
volume_path = "/path/to/volumes"
|
||||||
|
networks = ["homelab-network"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Variables
|
||||||
|
|
||||||
|
| Variable | Description | Type | Default |
|
||||||
|
| --------------------- | ------------------------------------------------- | -------------- | ----------- |
|
||||||
|
| `image_tag` | Tag of the Crawl4AI image to use | `string` | `"latest"` |
|
||||||
|
| `volume_path` | Host path for Crawl4AI data volumes | `string` | - |
|
||||||
|
| `networks` | List of networks to attach the container to | `list(string)` | `[]` |
|
||||||
|
|
||||||
|
## Outputs
|
||||||
|
|
||||||
|
| Output | Description |
|
||||||
|
| -------------------- | ---------------------------------------------------------- |
|
||||||
|
| `service_definition` | Service definition for integration with networking modules |
|
||||||
|
|
||||||
|
## Service Definition
|
||||||
|
|
||||||
|
This module outputs a service definition that is used by the networking modules to expose the service.
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
{
|
||||||
|
name = "crawl4ai"
|
||||||
|
primary_port = 11235
|
||||||
|
endpoint = "http://crawl4ai:11235"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
Crawl4AI requires API keys for various LLM providers. These are configured through a `.env` file in the module directory. You should create this file based on the provided `.env.example` template:
|
||||||
|
|
||||||
|
- `OPENAI_API_KEY`: OpenAI API key
|
||||||
|
- `DEEPSEEK_API_KEY`: DeepSeek API key
|
||||||
|
- `ANTHROPIC_API_KEY`: Anthropic API key
|
||||||
|
- `GROQ_API_KEY`: Groq API key
|
||||||
|
- `TOGETHER_API_KEY`: Together API key
|
||||||
|
- `MISTRAL_API_KEY`: Mistral API key
|
||||||
|
- `GEMINI_API_TOKEN`: Gemini API token
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Crawl4AI requires a custom configuration file. This is mounted from `${volume_path}/crawl4ai/config.yml` to `/app/config.yml` in the container.
|
||||||
|
|
||||||
|
## Ports
|
||||||
|
|
||||||
|
Crawl4AI exposes one port, which is mapped to host ports defined in the `.env` file:
|
||||||
|
1. Frontend (port 11235) - The main web interface for accessing games
|
||||||
|
|
||||||
|
## Example Integration in Main Configuration
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
module "crawl4ai" {
|
||||||
|
source = "./modules/20-services-apps/crawl4ai"
|
||||||
|
volume_path = module.system_globals.volume_host
|
||||||
|
networks = [module.services.homelab_docker_network_name]
|
||||||
|
memory_limit = 8192 # 8GB if you need more memory
|
||||||
|
}
|
||||||
|
|
||||||
|
# The service definition is automatically included in the services output
|
||||||
|
module "services" {
|
||||||
|
source = "./modules/services"
|
||||||
|
# ...
|
||||||
|
service_definitions = [
|
||||||
|
module.crawl4ai.service_definition,
|
||||||
|
# Other service definitions
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
99
modules/20-services-apps/crawl4ai/main.tf
Normal file
99
modules/20-services-apps/crawl4ai/main.tf
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
dotenv = {
|
||||||
|
source = "germanbrew/dotenv"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "image_tag" {
|
||||||
|
description = "Tag of the Crawl4AI image to use"
|
||||||
|
type = string
|
||||||
|
default = "latest"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "volume_path" {
|
||||||
|
description = "Host path for Crawl4AI data volumes"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "networks" {
|
||||||
|
description = "List of networks to which the container should be attached"
|
||||||
|
type = list(string)
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
container_name = "crawl4ai"
|
||||||
|
image = "unclecode/crawl4ai"
|
||||||
|
image_tag = var.image_tag
|
||||||
|
monitoring = true
|
||||||
|
service_port = provider::dotenv::get_by_key("PORT", local.env_file)
|
||||||
|
env_file = "${path.module}/.env"
|
||||||
|
|
||||||
|
# Define volumes
|
||||||
|
default_volumes = [
|
||||||
|
{
|
||||||
|
container_path = "/dev/shm"
|
||||||
|
host_path = "/dev/shm"
|
||||||
|
read_only = false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
container_path = "/app/config.yml"
|
||||||
|
host_path = "${var.volume_path}/config.yml"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Define ports
|
||||||
|
ports = [
|
||||||
|
{
|
||||||
|
internal = local.service_port
|
||||||
|
external = local.service_port
|
||||||
|
protocol = "tcp"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Environment variables
|
||||||
|
env_vars = {
|
||||||
|
OPENAI_API_KEY = provider::dotenv::get_by_key("OPENAI_API_KEY", local.env_file)
|
||||||
|
DEEPSEEK_API_KEY = provider::dotenv::get_by_key("DEEPSEEK_API_KEY", local.env_file)
|
||||||
|
ANTHROPIC_API_KEY = provider::dotenv::get_by_key("ANTHROPIC_API_KEY", local.env_file)
|
||||||
|
GROQ_API_KEY = provider::dotenv::get_by_key("GROQ_API_KEY", local.env_file)
|
||||||
|
TOGETHER_API_KEY = provider::dotenv::get_by_key("TOGETHER_API_KEY", local.env_file)
|
||||||
|
MISTRAL_API_KEY = provider::dotenv::get_by_key("MISTRAL_API_KEY", local.env_file)
|
||||||
|
GEMINI_API_TOKEN = provider::dotenv::get_by_key("GEMINI_API_TOKEN", local.env_file)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Healthcheck configuration
|
||||||
|
healthcheck = {
|
||||||
|
test = ["CMD", "curl", "-f", "http://localhost:${local.service_port}/health"]
|
||||||
|
interval = "30s"
|
||||||
|
timeout = "10s"
|
||||||
|
retries = 3
|
||||||
|
start_period = "40s"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module "crawl4ai" {
|
||||||
|
source = "../../10-services-generic/docker-service"
|
||||||
|
container_name = local.container_name
|
||||||
|
image = local.image
|
||||||
|
tag = local.image_tag
|
||||||
|
volumes = local.default_volumes
|
||||||
|
ports = local.ports
|
||||||
|
env_vars = local.env_vars
|
||||||
|
networks = var.networks
|
||||||
|
monitoring = local.monitoring
|
||||||
|
healthcheck = local.healthcheck
|
||||||
|
user = "appuser"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "service_definition" {
|
||||||
|
description = "General service definition with optional ingress configuration"
|
||||||
|
value = {
|
||||||
|
name = local.container_name
|
||||||
|
primary_port = local.service_port
|
||||||
|
endpoint = "http://${local.container_name}:${local.service_port}"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -36,6 +36,12 @@ module "calibre" {
|
|||||||
networks = [module.homelab_docker_network.name]
|
networks = [module.homelab_docker_network.name]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
module "crawl4ai" {
|
||||||
|
source = "${local.module_dir}/20-services-apps/crawl4ai"
|
||||||
|
volume_path = "${local.volume_host}/crawl4ai"
|
||||||
|
networks = [module.homelab_docker_network.name]
|
||||||
|
}
|
||||||
|
|
||||||
module "emulatorjs" {
|
module "emulatorjs" {
|
||||||
source = "${local.module_dir}/20-services-apps/emulatorjs"
|
source = "${local.module_dir}/20-services-apps/emulatorjs"
|
||||||
volume_path = "${local.volume_host}/emulatorjs"
|
volume_path = "${local.volume_host}/emulatorjs"
|
||||||
@@ -53,6 +59,12 @@ module "linkwarden" {
|
|||||||
networks = [module.homelab_docker_network.name]
|
networks = [module.homelab_docker_network.name]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
module "n8n" {
|
||||||
|
source = "${local.module_dir}/20-services-apps/n8n"
|
||||||
|
volume_path = "${local.volume_host}/n8n"
|
||||||
|
networks = [module.homelab_docker_network.name]
|
||||||
|
}
|
||||||
|
|
||||||
module "nocodb" {
|
module "nocodb" {
|
||||||
source = "${local.module_dir}/20-services-apps/nocodb"
|
source = "${local.module_dir}/20-services-apps/nocodb"
|
||||||
volume_path = "${local.volume_host}/nocodb"
|
volume_path = "${local.volume_host}/nocodb"
|
||||||
@@ -77,12 +89,6 @@ module "pterodactyl_wings" {
|
|||||||
networks = [module.homelab_docker_network.name]
|
networks = [module.homelab_docker_network.name]
|
||||||
}
|
}
|
||||||
|
|
||||||
module "n8n" {
|
|
||||||
source = "${local.module_dir}/20-services-apps/n8n"
|
|
||||||
volume_path = "${local.volume_host}/n8n"
|
|
||||||
networks = [module.homelab_docker_network.name]
|
|
||||||
}
|
|
||||||
|
|
||||||
module "searxng" {
|
module "searxng" {
|
||||||
source = "${local.module_dir}/20-services-apps/searxng"
|
source = "${local.module_dir}/20-services-apps/searxng"
|
||||||
volume_path = "${local.volume_host}/searxng"
|
volume_path = "${local.volume_host}/searxng"
|
||||||
|
|||||||
@@ -7,14 +7,15 @@ output "service_definitions" {
|
|||||||
module.actualbudget.service_definition,
|
module.actualbudget.service_definition,
|
||||||
module.affine.service_definition,
|
module.affine.service_definition,
|
||||||
module.calibre.service_definition,
|
module.calibre.service_definition,
|
||||||
|
module.crawl4ai.service_definition,
|
||||||
module.emulatorjs.service_definition,
|
module.emulatorjs.service_definition,
|
||||||
module.glance.service_definition,
|
module.glance.service_definition,
|
||||||
module.linkwarden.service_definition,
|
module.linkwarden.service_definition,
|
||||||
|
module.n8n.service_definition,
|
||||||
module.nocodb.service_definition,
|
module.nocodb.service_definition,
|
||||||
module.ntfy.service_definition,
|
module.ntfy.service_definition,
|
||||||
module.pterodactyl_wings.service_definition,
|
module.pterodactyl_wings.service_definition,
|
||||||
module.pterodactyl_panel.service_definition,
|
module.pterodactyl_panel.service_definition,
|
||||||
module.n8n.service_definition,
|
|
||||||
module.searxng.service_definition
|
module.searxng.service_definition
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user