myinternetarchive/worker (sha256:90dbaf4b176bfd08154a36ed5779990ba87441fd684cf6fb1002568ad170bcc7)
Published 2026-03-20 00:54:57 +00:00 by nico
Installation
docker pull code.hej.cloud/nico/myinternetarchive/worker@sha256:90dbaf4b176bfd08154a36ed5779990ba87441fd684cf6fb1002568ad170bcc7sha256:90dbaf4b176bfd08154a36ed5779990ba87441fd684cf6fb1002568ad170bcc7Image layers
| ARG RELEASE |
| ARG LAUNCHPAD_BUILD_ARCH |
| LABEL org.opencontainers.image.ref.name=ubuntu |
| LABEL org.opencontainers.image.version=24.04 |
| ADD file:1ae27d2ef4369361104b699712f3897141e394785df5d193d67b44626f57eb87 in / |
| CMD ["/bin/bash"] |
| ARG BROWSER=brave |
| ENV BROWSER=brave |
| ARG BROWSER_VERSION=1.87.192 |
| ENV BROWSER_VERSION=1.87.192 |
| RUN |2 BROWSER=brave BROWSER_VERSION=1.87.192 /bin/sh -c echo "ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true" | debconf-set-selections # buildkit |
| RUN |2 BROWSER=brave BROWSER_VERSION=1.87.192 /bin/sh -c apt-get update -y && apt-get install --no-install-recommends -qqy software-properties-common && apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -qqy build-essential locales-all redis-server apt-transport-https curl git socat jq xvfb x11vnc gosu gpg gpg-agent ca-certificates libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libxkbcommon0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 libpango-1.0-0 libcairo2 libasound2t64 libgtk-3-0 libxtst6 xdg-utils libc-bin hicolor-icon-theme python3-pip python3-dev python3-venv fonts-arphic-ukai fonts-arphic-uming fonts-freefont-ttf fonts-gfs-neohellenic fonts-indic fonts-ipafont-mincho fonts-ipafont-gothic fonts-kacst fonts-liberation fonts-noto-cjk fonts-noto-color-emoji fonts-roboto fonts-stix fonts-thai-tlwg fonts-sil-padauk fonts-ubuntu fonts-unfonts-core fonts-wqy-zenhei msttcorefonts libu2f-udev libvulkan1 openssh-client sshpass autossh # buildkit |
| RUN |2 BROWSER=brave BROWSER_VERSION=1.87.192 /bin/sh -c curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && curl -sL https://deb.nodesource.com/setup_20.x -o /tmp/nodesource_setup.sh && bash /tmp/nodesource_setup.sh && apt-get update -y && apt-get install -qqy nodejs yarn && apt-get clean && rm -rf /var/lib/apt/lists/* # buildkit |
| ARG TARGETARCH=amd64 |
| ARG TARGETPLATFORM=linux/amd64 |
| RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c curl -fsSLo /usr/share/keyrings/brave-browser-archive-keyring.gpg https://brave-browser-apt-release.s3.brave.com/brave-browser-archive-keyring.gpg && echo "deb [signed-by=/usr/share/keyrings/brave-browser-archive-keyring.gpg arch=amd64] https://brave-browser-apt-release.s3.brave.com/ stable main"|tee /etc/apt/sources.list.d/brave-browser-release.list && apt-get update # buildkit |
| RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c if [ "$BROWSER_VERSION" = "latest" ] ; then debname=$(curl -sL "https://api.github.com/repos/brave/brave-browser/releases/latest" | grep "$TARGETARCH.deb\",$" | cut -d : -f 2,3 | tr -d \",\,,\[:space:]) && tagname=$(curl -sL "https://api.github.com/repos/brave/brave-browser/releases/latest" | grep "tag_name" | cut -d : -f 2,3 | tr -d \",\,,\[:space:]) && curl -sL "https://github.com/brave/brave-browser/releases/download/$tagname/$debname" -o brave.deb ; else debname=$(curl -sL "https://api.github.com/repos/brave/brave-browser/releases/tags/v${BROWSER_VERSION}" | grep "$TARGETARCH.deb\",$" | cut -d : -f 2,3 | tr -d \",\,,\[:space:]) && curl -sL "https://github.com/brave/brave-browser/releases/download/v${BROWSER_VERSION}/$debname" -o brave.deb ; fi # buildkit |
| RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c echo "installing Brave from $TARGETPLATFORM"; dpkg -i brave.deb; apt-get -f install -y; rm -f brave.deb # buildkit |
| RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c ln -s /usr/bin/brave-browser /usr/bin/chromium-browser # buildkit |
| RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c /usr/bin/brave-browser --version # buildkit |
| RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c apt-get clean && rm -rf /var/lib/apt/lists/* # buildkit |
| LABEL org.opencontainers.image.vendor=Webrecorder <https://webrecorder.net/> |
| LABEL org.opencontainers.image.source=https://github.com/webrecorder/browsertrix-crawler |
| LABEL org.opencontainers.image.documentation=https://crawler.docs.browsertrix.com/ |
| LABEL org.opencontainers.image.licenses=AGPL-3.0-or-later |
| ARG BROWSER_VERSION=1.87.192 |
| ENV GEOMETRY=1360x1020x16 BROWSER_VERSION=1.87.192 BROWSER_BIN=google-chrome OPENSSL_CONF=/app/openssl.conf VNC_PASS=vncpassw0rd! DETACHED_CHILD_PROC=1 |
| EXPOSE [6080/tcp 9222/tcp 9223/tcp] |
| WORKDIR /app |
| ADD package.json yarn.lock /app/ # buildkit |
| ARG REBUILD |
| RUN |2 BROWSER_VERSION=1.87.192 REBUILD= /bin/sh -c mkdir -p /tmp/ads && cd /tmp/ads && curl -vs -o ad-hosts.txt https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts && cat ad-hosts.txt | grep '^0.0.0.0 '| awk '{ print $2; }' | grep -v '0.0.0.0' | jq --raw-input --slurp 'split("\n")' > /app/ad-hosts.json && rm /tmp/ads/ad-hosts.txt # buildkit |
| RUN |2 BROWSER_VERSION=1.87.192 REBUILD= /bin/sh -c yarn install --network-timeout 1000000 # buildkit |
| ADD tsconfig.json /app/ # buildkit |
| ADD src /app/src # buildkit |
| RUN |2 BROWSER_VERSION=1.87.192 REBUILD= /bin/sh -c yarn run tsc # buildkit |
| ADD config/ /app/ # buildkit |
| ADD html/ /app/html/ # buildkit |
| ARG RWP_VERSION=2.4.3 |
| ADD https://cdn.jsdelivr.net/npm/replaywebpage@2.4.3/ui.js /app/html/rwp/ # buildkit |
| ADD https://cdn.jsdelivr.net/npm/replaywebpage@2.4.3/sw.js /app/html/rwp/ # buildkit |
| ADD https://cdn.jsdelivr.net/npm/replaywebpage@2.4.3/adblock/adblock.gz /app/html/rwp/adblock.gz # buildkit |
| RUN |3 BROWSER_VERSION=1.87.192 REBUILD= RWP_VERSION=2.4.3 /bin/sh -c chmod a+x /app/dist/main.js /app/dist/create-login-profile.js /app/dist/indexer.js && chmod a+r /app/html/rwp/* # buildkit |
| RUN |3 BROWSER_VERSION=1.87.192 REBUILD= RWP_VERSION=2.4.3 /bin/sh -c ln -s /app/dist/main.js /usr/bin/crawl; ln -s /app/dist/main.js /usr/bin/qa; ln -s /app/dist/create-login-profile.js /usr/bin/create-login-profile; ln -s /app/dist/indexer.js /usr/bin/indexer; # buildkit |
| RUN |3 BROWSER_VERSION=1.87.192 REBUILD= RWP_VERSION=2.4.3 /bin/sh -c mkdir -p /app/behaviors # buildkit |
| WORKDIR /crawls |
| RUN |3 BROWSER_VERSION=1.87.192 REBUILD= RWP_VERSION=2.4.3 /bin/sh -c mkdir -p /etc/brave/policies/managed/ # buildkit |
| ADD config/policies /etc/brave/policies/managed/ # buildkit |
| ADD docker-entrypoint.sh /docker-entrypoint.sh # buildkit |
| ENTRYPOINT ["/docker-entrypoint.sh"] |
| CMD ["crawl"] |
| WORKDIR /svc |
| COPY package*.json ./ # buildkit |
| COPY apps/api/package.json apps/api/package.json # buildkit |
| COPY apps/worker/package.json apps/worker/package.json # buildkit |
| COPY apps/web/package.json apps/web/package.json # buildkit |
| COPY packages/shared/package.json packages/shared/package.json # buildkit |
| RUN /bin/sh -c set -eux; node --version; if command -v npm >/dev/null 2>&1; then :; elif command -v apt-get >/dev/null 2>&1; then apt-get update && apt-get install -y npm && rm -rf /var/lib/apt/lists/*; else echo "npm not found"; exit 1; fi # buildkit |
| RUN /bin/sh -c npm install # buildkit |
| COPY . . # buildkit |
| RUN /bin/sh -c npm run -w @mia/shared build && npm run -w @mia/worker build # buildkit |
| ENV NODE_ENV=production BROWSERTRIX_CRAWL_DIR=/crawls |
| WORKDIR /crawls |
| CMD ["node" "/svc/apps/worker/dist/index.js"] |
Labels
| Key | Value |
|---|---|
| org.opencontainers.image.documentation | https://crawler.docs.browsertrix.com/ |
| org.opencontainers.image.licenses | AGPL-3.0-or-later |
| org.opencontainers.image.ref.name | ubuntu |
| org.opencontainers.image.source | https://github.com/webrecorder/browsertrix-crawler |
| org.opencontainers.image.vendor | Webrecorder <https://webrecorder.net/> |
| org.opencontainers.image.version | 24.04 |
Details
2026-03-20 00:54:57 +00:00
Versions (16)
View all
Container
1
OCI / Docker
linux/amd64
AGPL-3.0-or-later
1.5 GiB
es-search
2026-03-20
pagination-v2
2026-03-20
pagination
2026-03-20
async-spawn
2026-03-20
error-logging
2026-03-20