feat: production hardening + smart subpage scanning with layout dedup

Security:
- Add CRON_SECRET auth to /api/cron/* endpoints
- Add admin role verification to /api/admin/* routes
- Add org membership check to /api/billing/usage
- Add security headers (HSTS, X-Frame-Options, CSP, etc.)
- Add env variable validation at startup
- Add rate limiting to backend API (30 req/min per IP)

Infrastructure:
- Multi-stage Dockerfiles with non-root user + healthchecks
- Updated cron workflow to pass CRON_SECRET header
- Updated .env.example with all optional vars

Smart subpage scanning:
- Crawler now computes template_hash (DOM structure without content)
- Scanner scans ALL unique-layout pages, not just main page
- Pages with same layout (e.g. product pages) scanned only once
- Deduplication by template_hash, fallback to content_hash
- Main page always scanned with high priority
- Re-checks subscription limits before each page scan

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Dennis
2026-03-06 07:44:32 +01:00
parent d8de0a973a
commit 1c545c93b4
18 changed files with 498 additions and 59 deletions
+27 -16
View File
@@ -1,25 +1,36 @@
# Use the official Node.js image.
FROM node:18
# --- Stage 1: Build ---
FROM node:20-slim AS builder
# OPTIONAL: Falls in der Base kein Chrome enthalten ist,
# müsstest du hier noch "apt-get update" + "apt-get install chromium" oder ähnliches ausführen,
# z. B.:
RUN apt-get update && apt-get install -y chromium
# Create and change to the app directory.
WORKDIR /app
# Copy application dependency manifests to the container image.
COPY package*.json ./
RUN npm ci
# Install production dependencies.
RUN npm install
# Copy local code to the container image.
COPY . .
# Build the TypeScript code
RUN npm run build
# Run the web service on container startup.
# --- Stage 2: Production ---
FROM node:20-slim AS runtime
RUN apt-get update && apt-get install -y --no-install-recommends chromium \
&& rm -rf /var/lib/apt/lists/*
ENV CHROME_BIN=/usr/bin/chromium
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
WORKDIR /app
RUN groupadd -r app && useradd -r -g app -d /app app
COPY --from=builder --chown=app:app /app/dist ./dist
COPY --from=builder --chown=app:app /app/node_modules ./node_modules
COPY --from=builder --chown=app:app /app/package.json ./
USER app
EXPOSE 5000
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD node -e "const h=require('http');h.get('http://localhost:5000/health',(r)=>{process.exit(r.statusCode===200?0:1)}).on('error',()=>process.exit(1))"
CMD ["node", "dist/index.js"]
+26
View File
@@ -5,10 +5,36 @@ import lighthouseRouter from "./routes/lighthouse.js";
dotenv.config();
// Rate limiting (simple in-memory for single instance)
const rateLimitMap = new Map<string, { count: number; resetAt: number }>();
const RATE_LIMIT_WINDOW = 60_000; // 1 minute
const RATE_LIMIT_MAX = 30; // requests per window
function rateLimit(req: Request, res: Response, next: () => void) {
const ip = req.ip || req.headers["x-forwarded-for"] || "unknown";
const key = String(ip);
const now = Date.now();
const entry = rateLimitMap.get(key);
if (!entry || now > entry.resetAt) {
rateLimitMap.set(key, { count: 1, resetAt: now + RATE_LIMIT_WINDOW });
return next();
}
if (entry.count >= RATE_LIMIT_MAX) {
res.status(429).json({ error: "Too many requests" });
return;
}
entry.count++;
next();
}
const app = express();
app.use(cors({ origin: process.env.CORS_ORIGIN || "*" }));
app.use(express.json());
app.use(rateLimit);
app.get("/health", (_req: Request, res: Response) => {
res.status(200).json({ status: "ok", timestamp: new Date().toISOString() });