These instructions have been tested on a M1 MacBook with podman, your mileage may vary. Note that running playwright/chrome as root might be dangerous so don’t use this for scraping or untrusted content unless you know what you’re doing.
Actually, never feed untrusted content into your LLM and always sandbox it as much as possible. Otherwise you will sooner or later be a sad panda.
Instruction
Put this into .devcontainer/devcontainer.json
{
"name": "Playwright Dev Environment",
"image": "mcr.microsoft.com/playwright:v1.55.0-noble",
"postCreateCommand": "npm install -g @anthropic-ai/claude-code",
"customizations": {
"vscode": {
"extensions": [
"Anthropic.claude-code"
]
}
},
//"remoteUser": "pwuser",
"runArgs": [
"--ipc=host",
"--security-opt=seccomp=unconfined"
],
"capAdd": [
"SYS_ADMIN"
],
"mounts": [
"source=claude-code-bashhistory-${devcontainerId},target=/commandhistory,type=volume",
"source=claude-code-config-${devcontainerId},target=/home/pwuser/.claude,type=volume"
],
"workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=delegated",
"workspaceFolder": "/workspace",
"containerEnv": {
"NODE_OPTIONS": "--max-old-space-size=4096",
"CLAUDE_CONFIG_DIR": "/home/pwuser/.claude"
}
}
Then create something like this (or ask your llm to do it)
#!/usr/bin/env node
// Playwright-based headless renderer with console capture
// Usage:
// node pw-screenshot.mjs --url https://example.com --out out.png
// node pw-screenshot.mjs --html-file page.html --out out.png
// echo "<h1>Hello</h1>" | node pw-screenshot.mjs --html-stdin --out out.png
import fs from 'node:fs';
import path from 'node:path';
import process from 'node:process';
import { chromium } from 'playwright';
function parseArgs(argv) {
const args = {
url: null,
htmlFile: null,
htmlStdin: false,
out: 'screenshot.png',
wait: 'networkidle', // playwright: load | domcontentloaded | networkidle
fullPage: true,
timeout: 60000,
viewport: '1280x800',
console: null,
emulateMedia: null,
};
for (let i = 2; i < argv.length; i++) {
const k = argv[i];
const v = argv[i + 1];
switch (k) {
case '--url': args.url = v; i++; break;
case '--html-file': args.htmlFile = v; i++; break;
case '--html-stdin': args.htmlStdin = true; break;
case '--out': args.out = v; i++; break;
case '--wait': args.wait = normalizeWait(v); i++; break;
case '--fullpage': args.fullPage = v !== 'false'; if (v === 'false') i++; break;
case '--no-fullpage': args.fullPage = false; break;
case '--timeout': args.timeout = Number(v); i++; break;
case '--viewport': args.viewport = v; i++; break;
case '--console': args.console = v; i++; break;
case '--emulate-media': args.emulateMedia = v; i++; break;
case '--help':
case '-h': printHelp(); process.exit(0);
default: break;
}
}
return args;
}
function normalizeWait(v) {
const val = String(v).toLowerCase();
if (val === 'networkidle0' || val === 'networkidle2') return 'networkidle';
if (val === 'domcontentloaded' || val === 'load' || val === 'networkidle') return val;
throw new Error(`Invalid --wait '${v}'. Use load|domcontentloaded|networkidle`);
}
function printHelp() {
const help = `
Usage:
node pw-screenshot.mjs [--url URL | --html-file FILE | --html-stdin] [--out PATH]
Options:
--url URL Navigate to the URL and render.
--html-file FILE Load HTML from file and render.
--html-stdin Read HTML from stdin and render.
--out PATH Screenshot output path (default: screenshot.png).
--wait MODE load | domcontentloaded | networkidle (default: networkidle).
--fullpage BOOL true/false; or --no-fullpage (default: true).
--timeout MS Navigation/content timeout in ms (default: 60000).
--viewport WxH Viewport, e.g., 1280x800 (default: 1280x800).
--console PATH Write page console JSONL to PATH or '-' for stdout (default: stderr only).
--emulate-media TYPE Emulate media type: screen | print.
--help Show this help.
`;
process.stderr.write(help);
}
function parseViewport(spec) {
const m = String(spec).toLowerCase().trim().match(/^(\d+)x(\d+)$/);
if (!m) throw new Error(`Invalid --viewport '${spec}'. Expected WxH, e.g., 1280x800`);
return { width: Number(m[1]), height: Number(m[2]) };
}
function openConsoleStream(dest) {
if (!dest) return null;
if (dest === '-') return process.stdout;
return fs.createWriteStream(dest, { flags: 'a' });
}
function writeConsole(stream, record) {
const line = JSON.stringify(record) + '\n';
if (stream) stream.write(line); else process.stderr.write(`[console] ${record.type} ${record.text}\n`);
}
async function readStdin() {
return await new Promise(resolve => {
let data = '';
process.stdin.setEncoding('utf8');
process.stdin.on('data', chunk => { data += chunk; });
process.stdin.on('end', () => resolve(data));
});
}
async function main() {
const args = parseArgs(process.argv);
if (!(args.url || args.htmlFile || args.htmlStdin)) { printHelp(); process.exit(2); }
const viewport = parseViewport(args.viewport);
const consoleStream = openConsoleStream(args.console);
let browser;
try {
browser = await chromium.launch({ headless: true, args: ['--no-sandbox', '--disable-dev-shm-usage'] });
const context = await browser.newContext({ viewport });
const page = await context.newPage();
page.on('console', msg => {
writeConsole(consoleStream, { ts: new Date().toISOString(), type: msg.type(), text: msg.text() });
});
page.on('pageerror', err => {
writeConsole(consoleStream, { ts: new Date().toISOString(), type: 'pageerror', text: String(err) });
});
if (args.emulateMedia) await page.emulateMedia({ media: args.emulateMedia });
if (args.url) {
await page.goto(args.url, { waitUntil: args.wait, timeout: args.timeout });
} else {
const html = args.htmlFile ? fs.readFileSync(path.resolve(args.htmlFile), 'utf8') : await readStdin();
await page.setContent(html, { waitUntil: args.wait, timeout: args.timeout });
}
await page.screenshot({ path: args.out, fullPage: !!args.fullPage });
process.stdout.write(`Saved screenshot to ${args.out}\n`);
} finally {
if (browser) await browser.close().catch(() => {});
if (consoleStream && consoleStream !== process.stdout) consoleStream.end();
}
}
main().catch(err => { process.stderr.write(String(err.stack || err) + '\n'); process.exit(1); });
After that you can instruct claude or some other coding agent (that can view images, e.g. not codex currently) to view the result and describe it.
Hopefully this makes the agents more useful as they can inspect and see if the change they did actually had the expected result.
I have not tested this very much yet, but it’s one of the pet issues I’ve when having agents do any changes to a webpage. The lack of feedback loop for them means I’ve had to view and explain the result.