myinternetarchive/worker (sha256:e81c44ec15b9a5d361ae7cbc359fadcd59ba36f58cd1e3686f4ddb6462870c16)

Published 2026-03-20 10:23:49 +00:00 by nico

Installation

docker pull code.hej.cloud/nico/myinternetarchive/worker@sha256:e81c44ec15b9a5d361ae7cbc359fadcd59ba36f58cd1e3686f4ddb6462870c16
sha256:e81c44ec15b9a5d361ae7cbc359fadcd59ba36f58cd1e3686f4ddb6462870c16

Image layers

ARG RELEASE
ARG LAUNCHPAD_BUILD_ARCH
LABEL org.opencontainers.image.ref.name=ubuntu
LABEL org.opencontainers.image.version=24.04
ADD file:1ae27d2ef4369361104b699712f3897141e394785df5d193d67b44626f57eb87 in /
CMD ["/bin/bash"]
ARG BROWSER=brave
ENV BROWSER=brave
ARG BROWSER_VERSION=1.87.192
ENV BROWSER_VERSION=1.87.192
RUN |2 BROWSER=brave BROWSER_VERSION=1.87.192 /bin/sh -c echo "ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true" | debconf-set-selections # buildkit
RUN |2 BROWSER=brave BROWSER_VERSION=1.87.192 /bin/sh -c apt-get update -y && apt-get install --no-install-recommends -qqy software-properties-common && apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -qqy build-essential locales-all redis-server apt-transport-https curl git socat jq xvfb x11vnc gosu gpg gpg-agent ca-certificates libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libxkbcommon0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 libpango-1.0-0 libcairo2 libasound2t64 libgtk-3-0 libxtst6 xdg-utils libc-bin hicolor-icon-theme python3-pip python3-dev python3-venv fonts-arphic-ukai fonts-arphic-uming fonts-freefont-ttf fonts-gfs-neohellenic fonts-indic fonts-ipafont-mincho fonts-ipafont-gothic fonts-kacst fonts-liberation fonts-noto-cjk fonts-noto-color-emoji fonts-roboto fonts-stix fonts-thai-tlwg fonts-sil-padauk fonts-ubuntu fonts-unfonts-core fonts-wqy-zenhei msttcorefonts libu2f-udev libvulkan1 openssh-client sshpass autossh # buildkit
RUN |2 BROWSER=brave BROWSER_VERSION=1.87.192 /bin/sh -c curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && curl -sL https://deb.nodesource.com/setup_20.x -o /tmp/nodesource_setup.sh && bash /tmp/nodesource_setup.sh && apt-get update -y && apt-get install -qqy nodejs yarn && apt-get clean && rm -rf /var/lib/apt/lists/* # buildkit
ARG TARGETARCH=amd64
ARG TARGETPLATFORM=linux/amd64
RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c curl -fsSLo /usr/share/keyrings/brave-browser-archive-keyring.gpg https://brave-browser-apt-release.s3.brave.com/brave-browser-archive-keyring.gpg && echo "deb [signed-by=/usr/share/keyrings/brave-browser-archive-keyring.gpg arch=amd64] https://brave-browser-apt-release.s3.brave.com/ stable main"|tee /etc/apt/sources.list.d/brave-browser-release.list && apt-get update # buildkit
RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c if [ "$BROWSER_VERSION" = "latest" ] ; then debname=$(curl -sL "https://api.github.com/repos/brave/brave-browser/releases/latest" | grep "$TARGETARCH.deb\",$" | cut -d : -f 2,3 | tr -d \",\,,\[:space:]) && tagname=$(curl -sL "https://api.github.com/repos/brave/brave-browser/releases/latest" | grep "tag_name" | cut -d : -f 2,3 | tr -d \",\,,\[:space:]) && curl -sL "https://github.com/brave/brave-browser/releases/download/$tagname/$debname" -o brave.deb ; else debname=$(curl -sL "https://api.github.com/repos/brave/brave-browser/releases/tags/v${BROWSER_VERSION}" | grep "$TARGETARCH.deb\",$" | cut -d : -f 2,3 | tr -d \",\,,\[:space:]) && curl -sL "https://github.com/brave/brave-browser/releases/download/v${BROWSER_VERSION}/$debname" -o brave.deb ; fi # buildkit
RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c echo "installing Brave from $TARGETPLATFORM"; dpkg -i brave.deb; apt-get -f install -y; rm -f brave.deb # buildkit
RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c ln -s /usr/bin/brave-browser /usr/bin/chromium-browser # buildkit
RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c /usr/bin/brave-browser --version # buildkit
RUN |4 BROWSER=brave BROWSER_VERSION=1.87.192 TARGETARCH=amd64 TARGETPLATFORM=linux/amd64 /bin/sh -c apt-get clean && rm -rf /var/lib/apt/lists/* # buildkit
LABEL org.opencontainers.image.vendor=Webrecorder <https://webrecorder.net/>
LABEL org.opencontainers.image.source=https://github.com/webrecorder/browsertrix-crawler
LABEL org.opencontainers.image.documentation=https://crawler.docs.browsertrix.com/
LABEL org.opencontainers.image.licenses=AGPL-3.0-or-later
ARG BROWSER_VERSION=1.87.192
ENV GEOMETRY=1360x1020x16 BROWSER_VERSION=1.87.192 BROWSER_BIN=google-chrome OPENSSL_CONF=/app/openssl.conf VNC_PASS=vncpassw0rd! DETACHED_CHILD_PROC=1
EXPOSE [6080/tcp 9222/tcp 9223/tcp]
WORKDIR /app
ADD package.json yarn.lock /app/ # buildkit
ARG REBUILD
RUN |2 BROWSER_VERSION=1.87.192 REBUILD= /bin/sh -c mkdir -p /tmp/ads && cd /tmp/ads && curl -vs -o ad-hosts.txt https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts && cat ad-hosts.txt | grep '^0.0.0.0 '| awk '{ print $2; }' | grep -v '0.0.0.0' | jq --raw-input --slurp 'split("\n")' > /app/ad-hosts.json && rm /tmp/ads/ad-hosts.txt # buildkit
RUN |2 BROWSER_VERSION=1.87.192 REBUILD= /bin/sh -c yarn install --network-timeout 1000000 # buildkit
ADD tsconfig.json /app/ # buildkit
ADD src /app/src # buildkit
RUN |2 BROWSER_VERSION=1.87.192 REBUILD= /bin/sh -c yarn run tsc # buildkit
ADD config/ /app/ # buildkit
ADD html/ /app/html/ # buildkit
ARG RWP_VERSION=2.4.3
ADD https://cdn.jsdelivr.net/npm/replaywebpage@2.4.3/ui.js /app/html/rwp/ # buildkit
ADD https://cdn.jsdelivr.net/npm/replaywebpage@2.4.3/sw.js /app/html/rwp/ # buildkit
ADD https://cdn.jsdelivr.net/npm/replaywebpage@2.4.3/adblock/adblock.gz /app/html/rwp/adblock.gz # buildkit
RUN |3 BROWSER_VERSION=1.87.192 REBUILD= RWP_VERSION=2.4.3 /bin/sh -c chmod a+x /app/dist/main.js /app/dist/create-login-profile.js /app/dist/indexer.js && chmod a+r /app/html/rwp/* # buildkit
RUN |3 BROWSER_VERSION=1.87.192 REBUILD= RWP_VERSION=2.4.3 /bin/sh -c ln -s /app/dist/main.js /usr/bin/crawl; ln -s /app/dist/main.js /usr/bin/qa; ln -s /app/dist/create-login-profile.js /usr/bin/create-login-profile; ln -s /app/dist/indexer.js /usr/bin/indexer; # buildkit
RUN |3 BROWSER_VERSION=1.87.192 REBUILD= RWP_VERSION=2.4.3 /bin/sh -c mkdir -p /app/behaviors # buildkit
WORKDIR /crawls
RUN |3 BROWSER_VERSION=1.87.192 REBUILD= RWP_VERSION=2.4.3 /bin/sh -c mkdir -p /etc/brave/policies/managed/ # buildkit
ADD config/policies /etc/brave/policies/managed/ # buildkit
ADD docker-entrypoint.sh /docker-entrypoint.sh # buildkit
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["crawl"]
WORKDIR /svc
COPY package*.json ./ # buildkit
COPY apps/api/package.json apps/api/package.json # buildkit
COPY apps/worker/package.json apps/worker/package.json # buildkit
COPY apps/web/package.json apps/web/package.json # buildkit
COPY packages/shared/package.json packages/shared/package.json # buildkit
RUN /bin/sh -c set -eux; node --version; if command -v npm >/dev/null 2>&1; then :; elif command -v apt-get >/dev/null 2>&1; then apt-get update && apt-get install -y npm && rm -rf /var/lib/apt/lists/*; else echo "npm not found"; exit 1; fi # buildkit
RUN /bin/sh -c npm install # buildkit
COPY . . # buildkit
RUN /bin/sh -c npm run -w @mia/shared build && npm run -w @mia/worker build # buildkit
ENV NODE_ENV=production BROWSERTRIX_CRAWL_DIR=/crawls
WORKDIR /crawls
CMD ["node" "/svc/apps/worker/dist/index.js"]

Labels

Key Value
org.opencontainers.image.documentation https://crawler.docs.browsertrix.com/
org.opencontainers.image.licenses AGPL-3.0-or-later
org.opencontainers.image.ref.name ubuntu
org.opencontainers.image.source https://github.com/webrecorder/browsertrix-crawler
org.opencontainers.image.vendor Webrecorder <https://webrecorder.net/>
org.opencontainers.image.version 24.04
Details
Container
2026-03-20 10:23:49 +00:00
1
OCI / Docker
linux/amd64
AGPL-3.0-or-later
1.5 GiB
Versions (18) View all
extract-fix 2026-03-20
warm-v1 2026-03-20
es-search 2026-03-20
pagination-v2 2026-03-20
pagination 2026-03-20