From 6cb20272e41f3d2e3533c9737bd01615c871687f Mon Sep 17 00:00:00 2001 From: Chris Evich Date: Fri, 6 Jan 2023 11:53:11 -0500 Subject: [PATCH] Fix podman leaking conmon processes When running in the background without a full-blown init system, `podman system service` will leak `conmon` processes for every gitlab-runner job that executes via the docker socket API. These `conmon` processes almost immediately becomes zombies, and are never cleaned up. Eventually the zombies will consume all available PIDs. Many attempts to fix this in various ways have all failed. In all cases the GitLab Runner process will start behaving strangely (or fail completely) after an amount of time dependent on its usage executing jobs. Fix this by entirely reimplementing *pipglr* to utilize systemd and a pair of lingering user-slices. One for podman, another for the gitlab runner. Include a systemd timer service to affect runner cleanup, periodically. Also update documentation and examples accordingly. Signed-off-by: Chris Evich --- Containerfile | 222 ++++++++++++----------------------- README.md | 213 +++++++++++++++------------------ config.toml | 7 -- containers.conf | 13 ++ gitlab-runner-wrapper | 28 ----- kmsglog.conf | 3 + podman-in-podman-maintenance | 26 ---- podman.service | 18 +++ podman.socket | 11 ++ prune.service | 6 + prune.timer | 6 + runner.service | 7 ++ setup.sh | 109 +++++++++++++++++ xpackages.txt | 21 ++++ 14 files changed, 361 insertions(+), 329 deletions(-) delete mode 100644 config.toml create mode 100644 containers.conf delete mode 100644 gitlab-runner-wrapper create mode 100644 kmsglog.conf delete mode 100644 podman-in-podman-maintenance create mode 100644 podman.service create mode 100644 podman.socket create mode 100644 prune.service create mode 100644 prune.timer create mode 100644 runner.service create mode 100644 setup.sh create mode 100644 xpackages.txt diff --git a/Containerfile b/Containerfile index 8db73b4..a1219d6 100644 --- a/Containerfile +++ b/Containerfile @@ -1,157 +1,81 @@ -# pipglr/Containerfile -# -# Builds a Podman-in-Podman Gitlab-Runner image for -# executing Gitlab CI/CD jobs. Requires configuration -# steps specific to Gitlab projects. For more info. see -# https://docs.gitlab.com/runner/executors/docker.html#use-podman-to-run-docker-commands -# +FROM quay.io/centos/centos:stream9 -FROM quay.io/podman/stable:v4.3.1 +ADD /setup.sh /xpackages.txt /root/ +ADD /containers.conf /home/podman/.config/containers/containers.conf +ADD /podman.service /podman.socket /prune.service /prune.timer /home/podman/.config/systemd/user/ +ADD /runner.service /home/runner/.config/systemd/user/ +ADD kmsglog.conf /etc/systemd/system.conf.d/ -# This is a list of packages to remove and/or exclude from the image. -# Primarily this is done for security reasons, should a runner process -# escape confinement. Having fewer things to poke, lowers the attack -# surface-area. -# -# This list was formed manually by running these commands in the base image: -# for package in $(rpm -qa); do \ -# if dnf erase $package; then echo "$package" >> exclude; fi; \ -# done; \ -# cat exclude -# -# After adding those packages to this file, the container build was run -# and package list adjusted, untill no dependency errors were raised. -ARG EXCLUDE_PACKAGES="\ - fedora-repos-modular \ - findutils \ - libxcrypt-compat \ - openldap-compat \ - podman-gvproxy \ - rootfiles \ - sudo \ - vim-minimal \ - yum" +# Allow image-builders to choose another version becides "latest" should +# an incompatible change be introduced. +ARG RUNNER_VERSION=latest -# Base-image runs as user 'podman', temporarily switch to root -# for installation/setup. -USER root -# Helper for comparison in future RUN operations (DO NOT USE) -ARG _DNFCMD="dnf --setopt=tsflags=nodocs -y" -# Set this instead, if (for example) you want to volume-mount in /var/cache/dnf -ARG DNFCMD="${_DNFCMD}" -# Avoid installing any documentation to keep image small -# During install, excluding packages is meaningless if already installed -RUN set -x && \ - rm -f /etc/dnf/protected.d/sudo.conf && \ - rm -f /etc/dnf/protected.d/yum.conf && \ - $DNFCMD remove ${EXCLUDE_PACKAGES} +# Permit building containers for alternate architectures. At the time +# of this commit, only 'arm64' is available. +ARG TARGETARCH=amd64 -# Enable callers to customize the runner version as needed, otherwise -# assume this image will be version-tagged, so it's fine to grab the latest. -ARG RUNNER_VERSION="latest" -# When building a multi-arch manifest-list, this buid-arg is set automatically. -ARG TARGETARCH="amd64" -ENV RUNNER_RPM_URL=https://gitlab-runner-downloads.s3.amazonaws.com/${RUNNER_VERSION}/rpm/gitlab-runner_${TARGETARCH}.rpm -RUN for rpm in ${EXCLUDE_PACKAGES}; do x+="--exclude=$rpm "; done && \ - set -x && \ - $DNFCMD update && \ - $DNFCMD install $x $RUNNER_RPM_URL && \ - $DNFCMD upgrade && \ - $DNFCMD reinstall shadow-utils && \ - if [[ "${DNFCMD}" == "${_DNFCMD}" ]]; then \ - dnf clean all && \ - rm -rf /var/cache/dnf; \ - fi +# Allow image-builders to choose an alternate nested-container pruning cycle. +# For most people the default is probably fine. This setting is dependent +# on the number and frequency of jobs run, along with the amount of disk-space +# available for both /cache and /home/podman/.local/share/containers volumes. +ARG PRUNE_INTERVAL=daily # see systemd.timer for allowable values -# In case of a runner escape, prevent easy installation of packages. -RUN rm -f /etc/dnf/protected.d/* && \ - rpm -e dnf && \ - rm -f $(type -P rpm) +# All-in-one packaging/image-setup script to keep things simple. +RUN PRUNE_INTERVAL=${PRUNE_INTERVAL} \ + RUNNER_VERSION=${RUNNER_VERSION} \ + bash /root/setup.sh -ADD /config.toml /home/podman/.gitlab-runner/config.toml -# The global "listen_address" option is used for metrics and -# debugging. Disable it by default since use requires special/ -# additional host configuration. -# Ref: https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-global-section -ARG RUNNER_LISTEN_ADDRESS="disabled" -ENV RUNNER_LISTEN_ADDRESS=$RUNNER_LISTEN_ADDRESS -RUN if [[ "$RUNNER_LISTEN_ADDRESS" == "disabled" ]]; then \ - sed -i -r \ - -e "s/.*@@RUNNER_LISTEN_ADDRESS@@.*//g" \ - /home/podman/.gitlab-runner/config.toml; \ - else \ - sed -i -r \ - -e "s/@@RUNNER_LISTEN_ADDRESS@@/$RUNNER_LISTEN_ADDRESS/g" \ - /home/podman/.gitlab-runner/config.toml; \ - fi +VOLUME /cache /home/podman/.local/share/containers +ENTRYPOINT /lib/systemd/systemd -# A small wrapper is needed to launch a background podman system service -# process for the gitlab-runner to connect to. -ADD /gitlab-runner-wrapper /podman-in-podman-maintenance /usr/local/bin/ -# Base image UTS NS configuration causes runner to break when launching -# nested rootless containers. -RUN sed -i -r \ - -e 's/^utsns.+host.*/utsns="private"/' \ - /etc/containers/containers.conf && \ - chmod +x /usr/local/bin/gitlab-runner-wrapper && \ - chmod +x /usr/local/bin/podman-in-podman-maintenance && \ - chown -R podman:podman /home/podman && \ - chmod u+s /usr/bin/new{uid,gid}map && \ - rm -f /home/podman/.bash* && \ - echo DOCKER_HOST="unix:///tmp/podman-run-1000/podman/podman.sock" > /etc/profile.d/podman.sh && \ - echo "podman:10000:10000" | tee /etc/subuid > /etc/subgid && \ - setcap -n 10000 cap_setuid+ep /usr/bin/newuidmap && \ - setcap -n 10000 cap_setuid+ep /usr/bin/newgidmap +# Gitlab-runner configuration options, may be freely overridden at +# container image build time. +ARG DEFAULT_JOB_IMAGE=registry.fedoraproject.org/fedora-minimal:latest +# Run nested containers in --privileged mode - required to allow building +# container images using podman or buildah. Otherwise may be set 'false'. +ARG NESTED_PRIVILEGED=true -# Runtime rootless-mode configuration -USER podman -# N/B: Volumes are cumulative with the base image -VOLUME ["/home/podman/.gitlab-runner/", "/cache"] -WORKDIR /home/podman -ENTRYPOINT ["/usr/local/bin/gitlab-runner-wrapper"] - -# Ensure root storage directory exists with correct permissions -RUN mkdir -p .local/share/containers/storage - -# Gitlab-runner configuration options. Default to unprivileged (nested) -# runner. Privileged is required to permit nested container image building. -ARG RUNNER_NAME="qontainers-pipglr" -# Running inner-podman privileged is necessary at the time of this commit. -ARG PRIVILEGED_RUNNER="true" -# Tags allow pinning jobs to specific runners, comma-separated list of -# tags to add to runner (no spaces!) -ARG RUNNER_TAGS="podman-in-podman" -# Permit running jobs without any tag at all -ARG RUNNER_UNTAGGED="true" -# Adjust based on usage and storage size to prevent ENOSPACE problems -ARG CLEAN_INTERVAL="24h" -ENV CLEAN_INTERVAL="$CLEAN_INTERVAL" \ - REGISTER_NON_INTERACTIVE="true" \ - RUNNER_TAG_LIST="$RUNNER_TAGS" \ - REGISTER_RUN_UNTAGGED="$RUNNER_UNTAGGED" \ - REGISTER_ACCESS_LEVEL="ref_protected" \ - REGISTER_MAXIMUM_TIMEOUT="3600" \ - CI_SERVER_URL="https://gitlab.com/" \ - RUNNER_NAME="${RUNNER_NAME}" \ - RUNNER_EXECUTOR="docker" \ - RUNNER_SHELL="bash" \ - REGISTER_MAINTENANCE_NOTE="Podman-in-Podman containerized runner" \ - DOCKER_HOST="unix:///tmp/podman-run-1000/podman/podman.sock" \ - DOCKER_DEVICES="/dev/fuse" \ - DOCKER_IMAGE="registry.fedoraproject.org/fedora-minimal:latest" \ - DOCKER_CACHE_DIR="/cache" \ - DOCKER_VOLUMES="/cache" \ - DOCKER_NETWORK_MODE="host" \ - DOCKER_PRIVILEGED="$PRIVILEGED_RUNNER" - -# Not a real build-arg. Simply here to save lots of typing. -ARG _pm="--systemd=true --device=/dev/fuse --security-opt label=disable --user podman --volume pipglr-podman-root:/home/podman/.local/share/containers --volume pipglr-config:/home/podman/.gitlab-runner -v pipglr-podman-cache:/cache --tmpfs /var/lib/containers,ro,size=1k -e PODMAN_RUNNER_DEBUG -e LOG_LEVEL" - -# These labels simply make it easier to register and execute the runner. -# Define them last so they are absent should a image-build failure occur. -LABEL register="podman run -it --rm $_pm --secret REGISTRATION_TOKEN,type=env \$IMAGE register" -# Note: Privileged mode is required to permit building container images with inner-podman -LABEL run="podman run -d --privileged --name pipglr $_pm \$IMAGE run" - -# In case it's helpful, include the documentation -ADD /README.md /home/podman/ +# The registration runlabel may be called multiple times to register more than +# one runner. Each expects a REGISTRATION_TOKEN secret to be pre-defined and +# the file './config.toml' to exist (may be empty). A local-cache volume +# '/cache' is configured for bind-mounting into all interrior-containers +# for container-runtime use, as recommended by the docs. Other settings +# may be changed if you know what you're doing. +LABEL register="podman run -it --rm \ + --secret=REGISTRATION_TOKEN,type=env \ + -v ./config.toml:/home/runner/.gitlab-runner//config.toml:Z \ + -e REGISTER_NON_INTERACTIVE=true \ + -e CI_SERVER_URL=https://gitlab.com/ \ + -e RUNNER_NAME=pipglr \ + -e RUNNER_EXECUTOR=docker \ + -e RUNNER_SHELL=bash \ + -e REGISTER_MAINTENANCE_NOTE=Podman-In-Podman-GitLab-Runner \ + -e DOCKER_HOST=unix:///home/runner/podman.sock \ + -e DOCKER_IMAGE=${DEFAULT_JOB_IMAGE} \ + -e DOCKER_CACHE_DIR=/cache \ + -e DOCKER_VOLUMES=/cache \ + -e DOCKER_NETWORK_MODE=host \ + -e DOCKER_PRIVILEGED=${NESTED_PRIVILEGED} \ + --user runner \ + --entrypoint=/usr/bin/gitlab-runner \$IMAGE register" +# Additionally, the nested-podman storage volumes must be pre-created with +# 'podman' UID/GID values to allow nested containers access. +LABEL setupstorage="podman volume create --opt o=uid=1000,gid=1000 pipglr-storage" +# Lastly, the gitlab-runner will manage container-cache in this directory, +# which will also be bind-mounted into every container. So it must be +# writable by both 'podman' user and 'runner' group. +LABEL setupcache="podman volume create --opt o=uid=1000,gid=1001 pipglr-cache" +# Helper to extract the current configuration secret to allow editing. +LABEL dumpconfig="podman run -it --rm \ + --secret config.toml --entrypoint=/bin/cat \ + \$IMAGE /var/run/secrets/config.toml" +# Executing the runner container depends on the config.toml secret being +# set (see above) and two volumes existing with correct permissions set. +# Note: The contents of the volumes are not critical, they may be removed +# and re-created (see above) to quickly free-up disk space. +LABEL run="podman run -dt --name pipglr \ + --secret config.toml,uid=1001,gid=1001 \ + -v pipglr-storage:/home/podman/.local/share/containers \ + -v pipglr-cache:/cache \ + --systemd true --privileged \ + --device /dev/fuse \$IMAGE" diff --git a/README.md b/README.md index ef11f8a..c5b7716 100644 --- a/README.md +++ b/README.md @@ -22,112 +22,122 @@ configuration relative to their own security situation/environment. ### Operation -This image supports `podman container runlabel`, or if your version -lacks this feature, Several labels are set on the image to support -easy registration and execution of a runner container using a special -bash command. See the examples below for more information. +This image leverages the podman `runlabel` feature heavily. Several +labels are set on the image to support easy registration and execution +of the runner container. While it's possible to use the container +with your own command-line, it's highly recommended to base them +off of one of the labels. See the examples below for more information. -#### [Volume setup] - -Since podman inside the container runs as user `podman`, the volumes -used by it need to be pre-created with ownership information. While, -we're at it, might as well add the performance-improving `noatime`, -option as well. +***Note:*** Some older versions of podman don't support the +`container runlabel` sub-command. If this is the case, you may simulate +it with the following, substituting `