feat: production hardening + smart subpage scanning with layout dedup
Security: - Add CRON_SECRET auth to /api/cron/* endpoints - Add admin role verification to /api/admin/* routes - Add org membership check to /api/billing/usage - Add security headers (HSTS, X-Frame-Options, CSP, etc.) - Add env variable validation at startup - Add rate limiting to backend API (30 req/min per IP) Infrastructure: - Multi-stage Dockerfiles with non-root user + healthchecks - Updated cron workflow to pass CRON_SECRET header - Updated .env.example with all optional vars Smart subpage scanning: - Crawler now computes template_hash (DOM structure without content) - Scanner scans ALL unique-layout pages, not just main page - Pages with same layout (e.g. product pages) scanned only once - Deduplication by template_hash, fallback to content_hash - Main page always scanned with high priority - Re-checks subscription limits before each page scan Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -18,3 +18,14 @@ CORS_ORIGIN=http://localhost:3000
|
|||||||
NEXT_PUBLIC_SUPABASE_URL=https://your-project.supabase.co
|
NEXT_PUBLIC_SUPABASE_URL=https://your-project.supabase.co
|
||||||
NEXT_PUBLIC_SUPABASE_ANON_KEY=your-anon-key
|
NEXT_PUBLIC_SUPABASE_ANON_KEY=your-anon-key
|
||||||
SUPABASE_SERVICE_ROLE_KEY=your-service-role-key
|
SUPABASE_SERVICE_ROLE_KEY=your-service-role-key
|
||||||
|
|
||||||
|
# ── Security ────────────────────────────────
|
||||||
|
# Required in production: protects /api/cron/* endpoints
|
||||||
|
CRON_SECRET=generate-a-random-secret-here
|
||||||
|
|
||||||
|
# ── Optional Services ───────────────────────
|
||||||
|
# Email notifications (Resend — free tier: 3000 emails/mo)
|
||||||
|
RESEND_API_KEY=re_your_resend_key
|
||||||
|
|
||||||
|
# Lighthouse backend URL (for automated scans)
|
||||||
|
LIGHTHOUSE_SERVICE_URL=http://localhost:5000
|
||||||
|
|||||||
@@ -1,25 +1,36 @@
|
|||||||
# Use the official Node.js image.
|
# --- Stage 1: Build ---
|
||||||
FROM node:18
|
FROM node:20-slim AS builder
|
||||||
|
|
||||||
# OPTIONAL: Falls in der Base kein Chrome enthalten ist,
|
|
||||||
# müsstest du hier noch "apt-get update" + "apt-get install chromium" oder ähnliches ausführen,
|
|
||||||
# z. B.:
|
|
||||||
RUN apt-get update && apt-get install -y chromium
|
|
||||||
|
|
||||||
# Create and change to the app directory.
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Copy application dependency manifests to the container image.
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
|
RUN npm ci
|
||||||
|
|
||||||
# Install production dependencies.
|
|
||||||
RUN npm install
|
|
||||||
|
|
||||||
# Copy local code to the container image.
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
# Build the TypeScript code
|
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Run the web service on container startup.
|
# --- Stage 2: Production ---
|
||||||
|
FROM node:20-slim AS runtime
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends chromium \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
ENV CHROME_BIN=/usr/bin/chromium
|
||||||
|
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN groupadd -r app && useradd -r -g app -d /app app
|
||||||
|
|
||||||
|
COPY --from=builder --chown=app:app /app/dist ./dist
|
||||||
|
COPY --from=builder --chown=app:app /app/node_modules ./node_modules
|
||||||
|
COPY --from=builder --chown=app:app /app/package.json ./
|
||||||
|
|
||||||
|
USER app
|
||||||
|
|
||||||
|
EXPOSE 5000
|
||||||
|
|
||||||
|
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
||||||
|
CMD node -e "const h=require('http');h.get('http://localhost:5000/health',(r)=>{process.exit(r.statusCode===200?0:1)}).on('error',()=>process.exit(1))"
|
||||||
|
|
||||||
CMD ["node", "dist/index.js"]
|
CMD ["node", "dist/index.js"]
|
||||||
|
|||||||
@@ -5,10 +5,36 @@ import lighthouseRouter from "./routes/lighthouse.js";
|
|||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
|
// Rate limiting (simple in-memory for single instance)
|
||||||
|
const rateLimitMap = new Map<string, { count: number; resetAt: number }>();
|
||||||
|
const RATE_LIMIT_WINDOW = 60_000; // 1 minute
|
||||||
|
const RATE_LIMIT_MAX = 30; // requests per window
|
||||||
|
|
||||||
|
function rateLimit(req: Request, res: Response, next: () => void) {
|
||||||
|
const ip = req.ip || req.headers["x-forwarded-for"] || "unknown";
|
||||||
|
const key = String(ip);
|
||||||
|
const now = Date.now();
|
||||||
|
const entry = rateLimitMap.get(key);
|
||||||
|
|
||||||
|
if (!entry || now > entry.resetAt) {
|
||||||
|
rateLimitMap.set(key, { count: 1, resetAt: now + RATE_LIMIT_WINDOW });
|
||||||
|
return next();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entry.count >= RATE_LIMIT_MAX) {
|
||||||
|
res.status(429).json({ error: "Too many requests" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
entry.count++;
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
|
||||||
const app = express();
|
const app = express();
|
||||||
|
|
||||||
app.use(cors({ origin: process.env.CORS_ORIGIN || "*" }));
|
app.use(cors({ origin: process.env.CORS_ORIGIN || "*" }));
|
||||||
app.use(express.json());
|
app.use(express.json());
|
||||||
|
app.use(rateLimit);
|
||||||
|
|
||||||
app.get("/health", (_req: Request, res: Response) => {
|
app.get("/health", (_req: Request, res: Response) => {
|
||||||
res.status(200).json({ status: "ok", timestamp: new Date().toISOString() });
|
res.status(200).json({ status: "ok", timestamp: new Date().toISOString() });
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ jobs:
|
|||||||
DEPLOYMENT_URL="${DEPLOYMENT_URL:-https://your-domain.com}"
|
DEPLOYMENT_URL="${DEPLOYMENT_URL:-https://your-domain.com}"
|
||||||
echo "Running uptime checks at: $DEPLOYMENT_URL/api/cron/uptime"
|
echo "Running uptime checks at: $DEPLOYMENT_URL/api/cron/uptime"
|
||||||
|
|
||||||
response=$(curl -s -w "\n%{http_code}" "$DEPLOYMENT_URL/api/cron/uptime")
|
response=$(curl -s -w "\n%{http_code}" -H "Authorization: Bearer $CRON_SECRET" "$DEPLOYMENT_URL/api/cron/uptime")
|
||||||
http_code=$(echo "$response" | tail -n1)
|
http_code=$(echo "$response" | tail -n1)
|
||||||
response_body=$(echo "$response" | head -n -1)
|
response_body=$(echo "$response" | head -n -1)
|
||||||
|
|
||||||
@@ -41,6 +41,7 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
env:
|
env:
|
||||||
DEPLOYMENT_URL: ${{ secrets.DEPLOYMENT_URL }}
|
DEPLOYMENT_URL: ${{ secrets.DEPLOYMENT_URL }}
|
||||||
|
CRON_SECRET: ${{ secrets.CRON_SECRET }} CRON_SECRET: ${{ secrets.CRON_SECRET }}
|
||||||
|
|
||||||
scan:
|
scan:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -51,7 +52,7 @@ jobs:
|
|||||||
DEPLOYMENT_URL="${DEPLOYMENT_URL:-https://your-domain.com}"
|
DEPLOYMENT_URL="${DEPLOYMENT_URL:-https://your-domain.com}"
|
||||||
echo "Triggering scan at: $DEPLOYMENT_URL/api/cron/scan?mode=all"
|
echo "Triggering scan at: $DEPLOYMENT_URL/api/cron/scan?mode=all"
|
||||||
|
|
||||||
response=$(curl -s -w "\n%{http_code}" -X POST "$DEPLOYMENT_URL/api/cron/scan?mode=all")
|
response=$(curl -s -w "\n%{http_code}" -X POST -H "Authorization: Bearer $CRON_SECRET" "$DEPLOYMENT_URL/api/cron/scan?mode=all")
|
||||||
http_code=$(echo "$response" | tail -n1)
|
http_code=$(echo "$response" | tail -n1)
|
||||||
response_body=$(echo "$response" | head -n -1)
|
response_body=$(echo "$response" | head -n -1)
|
||||||
|
|
||||||
|
|||||||
@@ -1,16 +1,38 @@
|
|||||||
FROM node:18
|
# --- Stage 1: Dependencies ---
|
||||||
|
FROM node:20-slim AS deps
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package.json package-lock.json ./
|
COPY package.json package-lock.json ./
|
||||||
|
RUN npm ci
|
||||||
|
|
||||||
# Disable the oxide engine so it falls back to JS
|
# --- Stage 2: Build ---
|
||||||
ENV TAILWIND_DISABLE_OXIDE=1
|
FROM node:20-slim AS builder
|
||||||
|
WORKDIR /app
|
||||||
RUN npm install
|
COPY --from=deps /app/node_modules ./node_modules
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
ENV NEXT_TELEMETRY_DISABLED=1
|
||||||
|
ENV TAILWIND_DISABLE_OXIDE=1
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
CMD ["npm", "run", "start"]
|
# --- Stage 3: Production ---
|
||||||
|
FROM node:20-slim AS runtime
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN groupadd -r app && useradd -r -g app -d /app app
|
||||||
|
|
||||||
|
COPY --from=builder --chown=app:app /app/.next/standalone ./
|
||||||
|
COPY --from=builder --chown=app:app /app/.next/static ./.next/static
|
||||||
|
COPY --from=builder --chown=app:app /app/public ./public
|
||||||
|
|
||||||
|
USER app
|
||||||
|
|
||||||
|
EXPOSE 3000
|
||||||
|
|
||||||
|
ENV PORT=3000
|
||||||
|
ENV HOSTNAME="0.0.0.0"
|
||||||
|
ENV NODE_ENV=production
|
||||||
|
ENV NEXT_TELEMETRY_DISABLED=1
|
||||||
|
|
||||||
|
HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
|
||||||
|
CMD node -e "const h=require('http');h.get('http://localhost:3000/api/health',(r)=>{process.exit(r.statusCode===200?0:1)}).on('error',()=>process.exit(1))"
|
||||||
|
|
||||||
|
CMD ["node", "server.js"]
|
||||||
|
|||||||
@@ -203,4 +203,8 @@ CREATE TABLE IF NOT EXISTS alert_configurations (
|
|||||||
created_at timestamp with time zone DEFAULT now(),
|
created_at timestamp with time zone DEFAULT now(),
|
||||||
updated_at timestamp with time zone DEFAULT now()
|
updated_at timestamp with time zone DEFAULT now()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Add template_hash to pages table for layout deduplication
|
||||||
|
ALTER TABLE pages ADD COLUMN IF NOT EXISTS template_hash VARCHAR;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_pages_template_hash ON pages(template_hash) WHERE template_hash IS NOT NULL;
|
||||||
);
|
);
|
||||||
@@ -1,10 +1,27 @@
|
|||||||
import type { NextConfig } from "next";
|
import type { NextConfig } from "next";
|
||||||
|
|
||||||
|
const securityHeaders = [
|
||||||
|
{ key: "X-DNS-Prefetch-Control", value: "on" },
|
||||||
|
{ key: "Strict-Transport-Security", value: "max-age=63072000; includeSubDomains; preload" },
|
||||||
|
{ key: "X-Frame-Options", value: "SAMEORIGIN" },
|
||||||
|
{ key: "X-Content-Type-Options", value: "nosniff" },
|
||||||
|
{ key: "Referrer-Policy", value: "origin-when-cross-origin" },
|
||||||
|
{ key: "Permissions-Policy", value: "camera=(), microphone=(), geolocation=()" },
|
||||||
|
];
|
||||||
|
|
||||||
const nextConfig: NextConfig = {
|
const nextConfig: NextConfig = {
|
||||||
eslint: {
|
eslint: {
|
||||||
// Do not fail production builds due to ESLint errors
|
// Do not fail production builds due to ESLint errors
|
||||||
ignoreDuringBuilds: true,
|
ignoreDuringBuilds: true,
|
||||||
},
|
},
|
||||||
|
async headers() {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
source: "/(.*)",
|
||||||
|
headers: securityHeaders,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
export default nextConfig;
|
export default nextConfig;
|
||||||
|
|||||||
@@ -207,6 +207,7 @@ CREATE TABLE IF NOT EXISTS pages (
|
|||||||
title VARCHAR,
|
title VARCHAR,
|
||||||
description TEXT,
|
description TEXT,
|
||||||
content_hash VARCHAR,
|
content_hash VARCHAR,
|
||||||
|
template_hash VARCHAR,
|
||||||
content_type VARCHAR,
|
content_type VARCHAR,
|
||||||
status_code INTEGER,
|
status_code INTEGER,
|
||||||
is_active BOOLEAN DEFAULT true,
|
is_active BOOLEAN DEFAULT true,
|
||||||
|
|||||||
@@ -1,12 +1,17 @@
|
|||||||
import { NextResponse } from "next/server";
|
import { NextResponse } from "next/server";
|
||||||
import { getSupabaseAdmin } from "@/lib/admin";
|
import { getSupabaseAdmin } from "@/lib/admin";
|
||||||
|
import { requireAdmin } from "@/lib/apiAuth";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/admin/organizations
|
* GET /api/admin/organizations
|
||||||
*
|
*
|
||||||
* List all organizations with usage stats.
|
* List all organizations with usage stats.
|
||||||
|
* Requires admin or owner role.
|
||||||
*/
|
*/
|
||||||
export async function GET(request: Request) {
|
export async function GET(request: Request) {
|
||||||
|
const auth = await requireAdmin(request);
|
||||||
|
if (auth instanceof NextResponse) return auth;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const supabase = getSupabaseAdmin();
|
const supabase = getSupabaseAdmin();
|
||||||
const url = new URL(request.url);
|
const url = new URL(request.url);
|
||||||
@@ -68,6 +73,9 @@ export async function GET(request: Request) {
|
|||||||
* Update organization: change tier, deactivate, etc.
|
* Update organization: change tier, deactivate, etc.
|
||||||
*/
|
*/
|
||||||
export async function PATCH(request: Request) {
|
export async function PATCH(request: Request) {
|
||||||
|
const auth = await requireAdmin(request);
|
||||||
|
if (auth instanceof NextResponse) return auth;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const supabase = getSupabaseAdmin();
|
const supabase = getSupabaseAdmin();
|
||||||
const { organizationId, updates } = await request.json();
|
const { organizationId, updates } = await request.json();
|
||||||
|
|||||||
@@ -1,12 +1,17 @@
|
|||||||
import { NextResponse } from "next/server";
|
import { NextResponse } from "next/server";
|
||||||
import { getSupabaseAdmin } from "@/lib/admin";
|
import { getSupabaseAdmin } from "@/lib/admin";
|
||||||
|
import { requireAdmin } from "@/lib/apiAuth";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/admin/stats
|
* GET /api/admin/stats
|
||||||
*
|
*
|
||||||
* Returns system-wide statistics for the admin dashboard.
|
* Returns system-wide statistics for the admin dashboard.
|
||||||
|
* Requires admin or owner role.
|
||||||
*/
|
*/
|
||||||
export async function GET() {
|
export async function GET(request: Request) {
|
||||||
|
const auth = await requireAdmin(request);
|
||||||
|
if (auth instanceof NextResponse) return auth;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const supabase = getSupabaseAdmin();
|
const supabase = getSupabaseAdmin();
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,18 @@
|
|||||||
import { NextResponse } from "next/server";
|
import { NextResponse } from "next/server";
|
||||||
import { getSupabaseAdmin } from "@/lib/admin";
|
import { getSupabaseAdmin } from "@/lib/admin";
|
||||||
|
import { requireAdmin } from "@/lib/apiAuth";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/admin/users
|
* GET /api/admin/users
|
||||||
*
|
*
|
||||||
* List all users with their organization memberships and usage stats.
|
* List all users with their organization memberships and usage stats.
|
||||||
* Query params: ?page=1&limit=20&search=keyword
|
* Query params: ?page=1&limit=20&search=keyword
|
||||||
|
* Requires admin or owner role.
|
||||||
*/
|
*/
|
||||||
export async function GET(request: Request) {
|
export async function GET(request: Request) {
|
||||||
|
const auth = await requireAdmin(request);
|
||||||
|
if (auth instanceof NextResponse) return auth;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const supabase = getSupabaseAdmin();
|
const supabase = getSupabaseAdmin();
|
||||||
const url = new URL(request.url);
|
const url = new URL(request.url);
|
||||||
@@ -79,6 +84,9 @@ export async function GET(request: Request) {
|
|||||||
* Body: { userId, action, value }
|
* Body: { userId, action, value }
|
||||||
*/
|
*/
|
||||||
export async function PATCH(request: Request) {
|
export async function PATCH(request: Request) {
|
||||||
|
const auth = await requireAdmin(request);
|
||||||
|
if (auth instanceof NextResponse) return auth;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const supabase = getSupabaseAdmin();
|
const supabase = getSupabaseAdmin();
|
||||||
const { userId, action, value } = await request.json();
|
const { userId, action, value } = await request.json();
|
||||||
@@ -152,6 +160,9 @@ export async function PATCH(request: Request) {
|
|||||||
* Body: { userId }
|
* Body: { userId }
|
||||||
*/
|
*/
|
||||||
export async function DELETE(request: Request) {
|
export async function DELETE(request: Request) {
|
||||||
|
const auth = await requireAdmin(request);
|
||||||
|
if (auth instanceof NextResponse) return auth;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const supabase = getSupabaseAdmin();
|
const supabase = getSupabaseAdmin();
|
||||||
const { userId } = await request.json();
|
const { userId } = await request.json();
|
||||||
|
|||||||
@@ -1,16 +1,17 @@
|
|||||||
import { NextResponse } from "next/server";
|
import { NextResponse } from "next/server";
|
||||||
import { getSupabaseAdmin } from "@/lib/admin";
|
import { getSupabaseAdmin } from "@/lib/admin";
|
||||||
import { TIER_LIMITS } from "@/services/tierLimits";
|
import { TIER_LIMITS } from "@/services/tierLimits";
|
||||||
|
import { requireOrgMembership } from "@/lib/apiAuth";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/billing/usage
|
* GET /api/billing/usage
|
||||||
*
|
*
|
||||||
* Returns current usage vs tier limits for an organization.
|
* Returns current usage vs tier limits for an organization.
|
||||||
|
* Requires authenticated user who is a member of the organization.
|
||||||
* Query params: ?organizationId=xxx
|
* Query params: ?organizationId=xxx
|
||||||
*/
|
*/
|
||||||
export async function GET(request: Request) {
|
export async function GET(request: Request) {
|
||||||
try {
|
try {
|
||||||
const supabase = getSupabaseAdmin();
|
|
||||||
const url = new URL(request.url);
|
const url = new URL(request.url);
|
||||||
const organizationId = url.searchParams.get("organizationId");
|
const organizationId = url.searchParams.get("organizationId");
|
||||||
|
|
||||||
@@ -18,6 +19,12 @@ export async function GET(request: Request) {
|
|||||||
return NextResponse.json({ error: "organizationId required" }, { status: 400 });
|
return NextResponse.json({ error: "organizationId required" }, { status: 400 });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify caller belongs to this organization
|
||||||
|
const auth = await requireOrgMembership(organizationId, request);
|
||||||
|
if (auth instanceof NextResponse) return auth;
|
||||||
|
|
||||||
|
const supabase = getSupabaseAdmin();
|
||||||
|
|
||||||
// Get organization with tier info
|
// Get organization with tier info
|
||||||
const { data: org, error: orgError } = await supabase
|
const { data: org, error: orgError } = await supabase
|
||||||
.from("organizations")
|
.from("organizations")
|
||||||
|
|||||||
@@ -2,8 +2,12 @@ import { NextResponse } from "next/server";
|
|||||||
import { scanScheduler } from "@/services/scanScheduler";
|
import { scanScheduler } from "@/services/scanScheduler";
|
||||||
import { lighthouseScanner } from "@/services/lighthouseScanner";
|
import { lighthouseScanner } from "@/services/lighthouseScanner";
|
||||||
import { logError } from "@/utils/errorUtils";
|
import { logError } from "@/utils/errorUtils";
|
||||||
|
import { verifyCronSecret } from "@/lib/apiAuth";
|
||||||
|
|
||||||
export async function GET(request: Request) {
|
export async function GET(request: Request) {
|
||||||
|
const authError = verifyCronSecret(request);
|
||||||
|
if (authError) return authError;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const url = new URL(request.url);
|
const url = new URL(request.url);
|
||||||
const mode = url.searchParams.get("mode") || "all"; // "scheduled", "change_detection", "all"
|
const mode = url.searchParams.get("mode") || "all"; // "scheduled", "change_detection", "all"
|
||||||
|
|||||||
@@ -1,16 +1,21 @@
|
|||||||
import { NextResponse } from "next/server";
|
import { NextResponse } from "next/server";
|
||||||
import { performUptimeChecks, evaluateUptimeAlerts } from "@/services/uptimeService";
|
import { performUptimeChecks, evaluateUptimeAlerts } from "@/services/uptimeService";
|
||||||
|
import { verifyCronSecret } from "@/lib/apiAuth";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/cron/uptime
|
* GET /api/cron/uptime
|
||||||
*
|
*
|
||||||
* Performs uptime checks on all active websites and evaluates alert rules.
|
* Performs uptime checks on all active websites and evaluates alert rules.
|
||||||
* Designed to be called by a cron job (e.g., GitHub Actions, Vercel Cron, or external scheduler).
|
* Designed to be called by a cron job (e.g., GitHub Actions, Vercel Cron, or external scheduler).
|
||||||
|
* Requires CRON_SECRET authorization in production.
|
||||||
*
|
*
|
||||||
* Query params:
|
* Query params:
|
||||||
* - alerts=true (default) — also evaluate alert rules after checks
|
* - alerts=true (default) — also evaluate alert rules after checks
|
||||||
*/
|
*/
|
||||||
export async function GET(request: Request) {
|
export async function GET(request: Request) {
|
||||||
|
const authError = verifyCronSecret(request);
|
||||||
|
if (authError) return authError;
|
||||||
|
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -0,0 +1,148 @@
|
|||||||
|
import { createClient } from "@supabase/supabase-js";
|
||||||
|
import { NextResponse } from "next/server";
|
||||||
|
import { getSupabaseAdmin } from "./admin";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verify CRON_SECRET for cron endpoints.
|
||||||
|
* Returns null if valid, or a NextResponse error if invalid.
|
||||||
|
*/
|
||||||
|
export function verifyCronSecret(request: Request): NextResponse | null {
|
||||||
|
const authHeader = request.headers.get("authorization");
|
||||||
|
const cronSecret = process.env.CRON_SECRET;
|
||||||
|
|
||||||
|
if (!cronSecret) {
|
||||||
|
// If no secret configured, allow in development only
|
||||||
|
if (process.env.NODE_ENV === "development") return null;
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "CRON_SECRET not configured" },
|
||||||
|
{ status: 500 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (authHeader !== `Bearer ${cronSecret}`) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "Unauthorized" },
|
||||||
|
{ status: 401 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AuthResult {
|
||||||
|
userId: string;
|
||||||
|
role: string | null;
|
||||||
|
organizationId: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Authenticate the current user from the request cookies or Authorization header.
|
||||||
|
* Returns user info or a NextResponse error.
|
||||||
|
*/
|
||||||
|
export async function authenticateUser(request?: Request): Promise<AuthResult | NextResponse> {
|
||||||
|
try {
|
||||||
|
// Try to get the access token from the Authorization header or cookies
|
||||||
|
let accessToken: string | null = null;
|
||||||
|
|
||||||
|
if (request) {
|
||||||
|
const authHeader = request.headers.get("authorization");
|
||||||
|
if (authHeader?.startsWith("Bearer ")) {
|
||||||
|
accessToken = authHeader.slice(7);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to extract from Supabase auth cookie
|
||||||
|
if (!accessToken) {
|
||||||
|
const cookieHeader = request.headers.get("cookie") || "";
|
||||||
|
const match = cookieHeader.match(/sb-[^=]+-auth-token=([^;]+)/);
|
||||||
|
if (match) {
|
||||||
|
try {
|
||||||
|
const decoded = decodeURIComponent(match[1]);
|
||||||
|
const parsed = JSON.parse(decoded);
|
||||||
|
accessToken = parsed?.[0] || parsed?.access_token || null;
|
||||||
|
} catch {
|
||||||
|
// Cookie might be the token directly
|
||||||
|
accessToken = match[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!accessToken) {
|
||||||
|
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify the token using Supabase
|
||||||
|
const supabase = createClient(
|
||||||
|
process.env.NEXT_PUBLIC_SUPABASE_URL!,
|
||||||
|
process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!
|
||||||
|
);
|
||||||
|
|
||||||
|
const { data: { user }, error } = await supabase.auth.getUser(accessToken);
|
||||||
|
|
||||||
|
if (error || !user) {
|
||||||
|
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get user details (role, org)
|
||||||
|
const admin = getSupabaseAdmin();
|
||||||
|
const { data: userData } = await admin
|
||||||
|
.from("users")
|
||||||
|
.select("organization_id, role")
|
||||||
|
.eq("id", user.id)
|
||||||
|
.single();
|
||||||
|
|
||||||
|
return {
|
||||||
|
userId: user.id,
|
||||||
|
role: (userData?.role as string) || user.user_metadata?.role || null,
|
||||||
|
organizationId: (userData?.organization_id as string) || null,
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
return NextResponse.json({ error: "Authentication failed" }, { status: 401 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Require admin role. Returns AuthResult if authorized, or a NextResponse error.
|
||||||
|
*/
|
||||||
|
export async function requireAdmin(request?: Request): Promise<AuthResult | NextResponse> {
|
||||||
|
const auth = await authenticateUser(request);
|
||||||
|
if (auth instanceof NextResponse) return auth;
|
||||||
|
|
||||||
|
if (auth.role !== "owner" && auth.role !== "admin") {
|
||||||
|
return NextResponse.json({ error: "Forbidden: admin access required" }, { status: 403 });
|
||||||
|
}
|
||||||
|
|
||||||
|
return auth;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Require membership in the given organization.
|
||||||
|
*/
|
||||||
|
export async function requireOrgMembership(
|
||||||
|
organizationId: string,
|
||||||
|
request?: Request
|
||||||
|
): Promise<AuthResult | NextResponse> {
|
||||||
|
const auth = await authenticateUser(request);
|
||||||
|
if (auth instanceof NextResponse) return auth;
|
||||||
|
|
||||||
|
// Admins/owners can access any org
|
||||||
|
if (auth.role === "owner" || auth.role === "admin") return auth;
|
||||||
|
|
||||||
|
// Check org membership
|
||||||
|
const admin = getSupabaseAdmin();
|
||||||
|
const { data: membership } = await admin
|
||||||
|
.from("organization_members")
|
||||||
|
.select("id")
|
||||||
|
.eq("user_id", auth.userId)
|
||||||
|
.eq("organization_id", organizationId)
|
||||||
|
.single();
|
||||||
|
|
||||||
|
if (!membership) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "Forbidden: not a member of this organization" },
|
||||||
|
{ status: 403 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return auth;
|
||||||
|
}
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
/**
|
||||||
|
* Validates that required environment variables are present.
|
||||||
|
* Call this at server startup or in API routes.
|
||||||
|
*/
|
||||||
|
const REQUIRED_SERVER_VARS = [
|
||||||
|
"NEXT_PUBLIC_SUPABASE_URL",
|
||||||
|
"NEXT_PUBLIC_SUPABASE_ANON_KEY",
|
||||||
|
];
|
||||||
|
|
||||||
|
const REQUIRED_FOR_ADMIN = [
|
||||||
|
"SUPABASE_SERVICE_ROLE_KEY",
|
||||||
|
];
|
||||||
|
|
||||||
|
const OPTIONAL_VARS = [
|
||||||
|
"CRON_SECRET",
|
||||||
|
"RESEND_API_KEY",
|
||||||
|
"LIGHTHOUSE_SERVICE_URL",
|
||||||
|
];
|
||||||
|
|
||||||
|
export function validateEnv(): { valid: boolean; missing: string[]; warnings: string[] } {
|
||||||
|
const missing: string[] = [];
|
||||||
|
const warnings: string[] = [];
|
||||||
|
|
||||||
|
for (const v of REQUIRED_SERVER_VARS) {
|
||||||
|
if (!process.env[v]) missing.push(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const v of REQUIRED_FOR_ADMIN) {
|
||||||
|
if (!process.env[v]) missing.push(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const v of OPTIONAL_VARS) {
|
||||||
|
if (!process.env[v]) warnings.push(`${v} not set — related features will be disabled`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!process.env.CRON_SECRET && process.env.NODE_ENV === "production") {
|
||||||
|
warnings.push("CRON_SECRET not set — cron endpoints are unprotected in production!");
|
||||||
|
}
|
||||||
|
|
||||||
|
return { valid: missing.length === 0, missing, warnings };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auto-validate on import (server-side only)
|
||||||
|
if (typeof window === "undefined") {
|
||||||
|
const { valid, missing, warnings } = validateEnv();
|
||||||
|
if (!valid) {
|
||||||
|
console.error(`[ENV] Missing required environment variables: ${missing.join(", ")}`);
|
||||||
|
}
|
||||||
|
for (const w of warnings) {
|
||||||
|
console.warn(`[ENV] ${w}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -219,6 +219,7 @@ export class NewCrawlerService {
|
|||||||
try {
|
try {
|
||||||
const urlObj = new URL(url);
|
const urlObj = new URL(url);
|
||||||
const contentHash = await this.computeContentHash(html);
|
const contentHash = await this.computeContentHash(html);
|
||||||
|
const templateHash = await this.computeTemplateHash(html);
|
||||||
|
|
||||||
// Check if page already exists
|
// Check if page already exists
|
||||||
const { data: existingPage } = await getSupabaseAdmin()
|
const { data: existingPage } = await getSupabaseAdmin()
|
||||||
@@ -236,6 +237,7 @@ export class NewCrawlerService {
|
|||||||
title,
|
title,
|
||||||
description,
|
description,
|
||||||
content_hash: contentHash,
|
content_hash: contentHash,
|
||||||
|
template_hash: templateHash,
|
||||||
last_crawled_at: new Date().toISOString(),
|
last_crawled_at: new Date().toISOString(),
|
||||||
metadata: {
|
metadata: {
|
||||||
crawl_session_id: this.sessionId,
|
crawl_session_id: this.sessionId,
|
||||||
@@ -254,6 +256,7 @@ export class NewCrawlerService {
|
|||||||
title,
|
title,
|
||||||
description,
|
description,
|
||||||
content_hash: contentHash,
|
content_hash: contentHash,
|
||||||
|
template_hash: templateHash,
|
||||||
content_type: "text/html",
|
content_type: "text/html",
|
||||||
status_code: 200,
|
status_code: 200,
|
||||||
depth: this.currentDepth,
|
depth: this.currentDepth,
|
||||||
@@ -272,6 +275,51 @@ export class NewCrawlerService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute a template hash from HTML — strips text content and dynamic attributes,
|
||||||
|
* keeping only the DOM structure (tag names, class names, hierarchy).
|
||||||
|
* Pages with the same layout (e.g., product pages) will share the same template_hash.
|
||||||
|
*/
|
||||||
|
private async computeTemplateHash(html: string): Promise<string> {
|
||||||
|
try {
|
||||||
|
const { JSDOM: JSDOMParser } = await import("jsdom");
|
||||||
|
const dom = new JSDOMParser(html);
|
||||||
|
const skeleton = this.extractDomSkeleton(dom.window.document.body);
|
||||||
|
return this.computeContentHash(skeleton);
|
||||||
|
} catch {
|
||||||
|
// Fallback: hash the raw HTML if JSDOM fails
|
||||||
|
return this.computeContentHash(html);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract a structural skeleton of the DOM: tag names + class names only.
|
||||||
|
* This ignores text content, ids, data attributes, images, etc.
|
||||||
|
*/
|
||||||
|
private extractDomSkeleton(element: Element | null): string {
|
||||||
|
if (!element) return "";
|
||||||
|
|
||||||
|
const parts: string[] = [];
|
||||||
|
const walk = (el: Element, depth: number) => {
|
||||||
|
if (depth > 20) return; // prevent infinite recursion
|
||||||
|
const tag = el.tagName.toLowerCase();
|
||||||
|
// Skip script, style, svg, noscript — they're not layout
|
||||||
|
if (["script", "style", "svg", "noscript"].includes(tag)) return;
|
||||||
|
|
||||||
|
const classes = el.className && typeof el.className === "string"
|
||||||
|
? el.className.split(/\s+/).sort().join(".")
|
||||||
|
: "";
|
||||||
|
parts.push(`${" ".repeat(depth)}<${tag}${classes ? "." + classes : ""}>`);
|
||||||
|
|
||||||
|
for (const child of Array.from(el.children)) {
|
||||||
|
walk(child, depth + 1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
walk(element, 0);
|
||||||
|
return parts.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
private extractLinks(document: Document, baseUrl: string): string[] {
|
private extractLinks(document: Document, baseUrl: string): string[] {
|
||||||
const links = Array.from(document.querySelectorAll("a[href]"))
|
const links = Array.from(document.querySelectorAll("a[href]"))
|
||||||
.map((link) => {
|
.map((link) => {
|
||||||
|
|||||||
@@ -105,7 +105,8 @@ export class ScanScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process a single scheduled scan
|
* Process a single scheduled scan — scans ALL unique-layout pages, not just the main page.
|
||||||
|
* Groups pages by template_hash to avoid scanning duplicate layouts (e.g., product pages).
|
||||||
*/
|
*/
|
||||||
private async processScheduledScan(scheduledScan: ScheduledScan): Promise<void> {
|
private async processScheduledScan(scheduledScan: ScheduledScan): Promise<void> {
|
||||||
try {
|
try {
|
||||||
@@ -123,36 +124,55 @@ export class ScanScheduler {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the main page for this website
|
// Get ALL active pages for this website, grouped by template_hash
|
||||||
const { data: page, error: pageError } = await this.supabase
|
const { data: pages, error: pageError } = await this.supabase
|
||||||
.from('pages')
|
.from('pages')
|
||||||
.select('id')
|
.select('id, url, path, template_hash, depth, content_hash')
|
||||||
.eq('website_id', scheduledScan.websiteId)
|
.eq('website_id', scheduledScan.websiteId)
|
||||||
.eq('is_active', true)
|
.eq('is_active', true)
|
||||||
.order('created_at', { ascending: false })
|
.order('depth', { ascending: true })
|
||||||
.limit(1)
|
.order('created_at', { ascending: true });
|
||||||
.single();
|
|
||||||
|
|
||||||
if (pageError || !page) {
|
if (pageError || !pages || pages.length === 0) {
|
||||||
logError('No active page found for scheduled scan', pageError, {
|
logError('No active pages found for scheduled scan', pageError, {
|
||||||
websiteId: scheduledScan.websiteId,
|
websiteId: scheduledScan.websiteId,
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform scans for each device type
|
// Deduplicate pages by template_hash — scan only one page per unique layout
|
||||||
|
const uniquePages = this.deduplicateByLayout(pages);
|
||||||
|
|
||||||
|
console.info(JSON.stringify({
|
||||||
|
level: 'info',
|
||||||
|
event: 'scan_pages_selected',
|
||||||
|
websiteId: scheduledScan.websiteId,
|
||||||
|
totalPages: pages.length,
|
||||||
|
uniqueLayouts: uniquePages.length,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Scan each unique page
|
||||||
|
for (const page of uniquePages) {
|
||||||
|
// Re-check limits before each scan
|
||||||
|
const { canScan: stillCanScan } = await lighthouseScanner.checkSubscriptionLimits(
|
||||||
|
scheduledScan.organizationId
|
||||||
|
);
|
||||||
|
if (!stillCanScan) break;
|
||||||
|
|
||||||
for (const deviceType of scheduledScan.deviceTypes) {
|
for (const deviceType of scheduledScan.deviceTypes) {
|
||||||
const scanConfig: ScanConfig = {
|
const scanConfig: ScanConfig = {
|
||||||
websiteId: scheduledScan.websiteId,
|
websiteId: scheduledScan.websiteId,
|
||||||
pageId: page.id as string,
|
pageId: page.id as string,
|
||||||
deviceType,
|
deviceType,
|
||||||
categories: scheduledScan.categories,
|
categories: scheduledScan.categories,
|
||||||
priority: 'medium',
|
priority: (page.depth as number) === 0 ? 'high' : 'medium',
|
||||||
triggeredBy: 'scheduled',
|
triggeredBy: 'scheduled',
|
||||||
};
|
};
|
||||||
|
|
||||||
await lighthouseScanner.performScan(scanConfig);
|
await lighthouseScanner.performScan(scanConfig);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update the last run time
|
// Update the last run time
|
||||||
await this.updateLastRunTime(scheduledScan.websiteId);
|
await this.updateLastRunTime(scheduledScan.websiteId);
|
||||||
@@ -161,6 +181,45 @@ export class ScanScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deduplicate pages by template_hash.
|
||||||
|
* If a template_hash exists, only scan the shallowest (lowest depth) page with that hash.
|
||||||
|
* Pages without template_hash are always included (treated as unique).
|
||||||
|
* The main page (depth=0) is always included.
|
||||||
|
*/
|
||||||
|
private deduplicateByLayout(pages: Record<string, unknown>[]): Record<string, unknown>[] {
|
||||||
|
const seenHashes = new Set<string>();
|
||||||
|
const result: Record<string, unknown>[] = [];
|
||||||
|
|
||||||
|
for (const page of pages) {
|
||||||
|
const depth = Number(page.depth ?? 0);
|
||||||
|
const templateHash = page.template_hash as string | null;
|
||||||
|
|
||||||
|
// Always include the main page
|
||||||
|
if (depth === 0) {
|
||||||
|
if (templateHash) seenHashes.add(templateHash);
|
||||||
|
result.push(page);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no template_hash, fall back to content_hash for dedup
|
||||||
|
const hash = templateHash || (page.content_hash as string | null);
|
||||||
|
|
||||||
|
if (!hash) {
|
||||||
|
// No hash at all — include it (unique by default)
|
||||||
|
result.push(page);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!seenHashes.has(hash)) {
|
||||||
|
seenHashes.add(hash);
|
||||||
|
result.push(page);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check for website changes and trigger scans if needed
|
* Check for website changes and trigger scans if needed
|
||||||
*/
|
*/
|
||||||
@@ -213,24 +272,23 @@ export class ScanScheduler {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the main page
|
// Get all unique-layout pages (not just main)
|
||||||
const { data: page, error: pageError } = await this.supabase
|
const { data: pages, error: pageError } = await this.supabase
|
||||||
.from('pages')
|
.from('pages')
|
||||||
.select('id')
|
.select('id, url, path, template_hash, depth, content_hash')
|
||||||
.eq('website_id', website.id)
|
.eq('website_id', website.id)
|
||||||
.eq('is_active', true)
|
.eq('is_active', true)
|
||||||
.order('created_at', { ascending: false })
|
.order('depth', { ascending: true });
|
||||||
.limit(1)
|
|
||||||
.single();
|
|
||||||
|
|
||||||
if (pageError || !page) {
|
if (pageError || !pages || pages.length === 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trigger a high-priority scan due to changes
|
// Scan main page with high priority on change
|
||||||
|
const mainPage = pages[0];
|
||||||
const scanConfig: ScanConfig = {
|
const scanConfig: ScanConfig = {
|
||||||
websiteId: website.id,
|
websiteId: website.id,
|
||||||
pageId: page.id as string,
|
pageId: mainPage.id as string,
|
||||||
deviceType: 'desktop', // Start with desktop for change detection
|
deviceType: 'desktop', // Start with desktop for change detection
|
||||||
categories: ['performance', 'accessibility', 'seo', 'best_practices'],
|
categories: ['performance', 'accessibility', 'seo', 'best_practices'],
|
||||||
priority: 'high',
|
priority: 'high',
|
||||||
|
|||||||
Reference in New Issue
Block a user