Fix podman leaking conmon processes
When running in the background without a full-blown init system, `podman system service` will leak `conmon` processes for every gitlab-runner job that executes via the docker socket API. These `conmon` processes almost immediately becomes zombies, and are never cleaned up. Eventually the zombies will consume all available PIDs. Many attempts to fix this in various ways have all failed. In all cases the GitLab Runner process will start behaving strangely (or fail completely) after an amount of time dependent on its usage executing jobs. Fix this by entirely reimplementing *pipglr* to utilize systemd and a pair of lingering user-slices. One for podman, another for the gitlab runner. Include a systemd timer service to affect runner cleanup, periodically. Also update documentation and examples accordingly. Signed-off-by: Chris Evich <chris_gitlab@icuc.me>
This commit is contained in:
109
setup.sh
Normal file
109
setup.sh
Normal file
@@ -0,0 +1,109 @@
|
||||
|
||||
|
||||
# This script is intended to be run during container-image build. Any
|
||||
# other usage outside this context is likely to cause harm.
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
for varname in PRUNE_INTERVAL RUNNER_VERSION TARGETARCH; do
|
||||
if [[ -z "${!varname}" ]]; then
|
||||
echo "Error: \$$varname must be non-empty."
|
||||
fi
|
||||
done
|
||||
|
||||
# Make image smaller by not installing docs.
|
||||
DNF="dnf --setopt=tsflags=nodocs -y"
|
||||
|
||||
for rpm in $(egrep -v '^(# )+' < /root/xpackages.txt); do
|
||||
x+="--exclude=$rpm ";
|
||||
done
|
||||
|
||||
set -x # show what's happening to make debugging easier
|
||||
|
||||
# DNF itself or a dependence may need upgrading, take care of it first.
|
||||
$DNF upgrade
|
||||
|
||||
$DNF $x install \
|
||||
podman \
|
||||
systemd
|
||||
|
||||
# Gitlab-runner package contains scriptlets which do not function properly inside a
|
||||
# container-build environment where systemd is not active/running.
|
||||
$DNF $x --setopt=tsflags=noscripts install \
|
||||
https://gitlab-runner-downloads.s3.amazonaws.com/$RUNNER_VERSION/rpm/gitlab-runner_${TARGETARCH}.rpm
|
||||
|
||||
# Allow removing dnf, sudo, etc. packages. Also don't start unnecessary or broken
|
||||
# systemd services, like anything kernel related or login gettys.
|
||||
rm -rf \
|
||||
/etc/dnf/protected.d/* \
|
||||
/etc/sytemd/system/getty.target.wants/* \
|
||||
/etc/sytemd/system/multi-user.target.wants/* \
|
||||
/etc/sytemd/system/sysinit.target.wants/* \
|
||||
/etc/sytemd/system/timers.target.wants/* \
|
||||
/lib/systemd/system/graphical.target.wants/* \
|
||||
/lib/systemd/system/multi-user.target.wants/{getty.target,systemd-ask-password-wall.path} \
|
||||
/lib/systemd/system/sys-kernel*.mount
|
||||
|
||||
# Remove unnecessary packages, see xpackages.txt to learn how this list was generated.
|
||||
# This makes the image smaller and reduces the attack-surface.
|
||||
dnf remove -y $(egrep -v '^(# )+' /root/xpackages.txt)
|
||||
|
||||
# Wipe out the DNF cache, then remove it entirely, again to make the image smaller.
|
||||
$DNF clean all
|
||||
rm -rf /var/cache/dnf /var/log/dnf* /var/log/yum.*
|
||||
rpm -e dnf
|
||||
|
||||
# Workaround https://bugzilla.redhat.com/show_bug.cgi?id=1995337
|
||||
rpm --setcaps shadow-utils
|
||||
|
||||
# Prevent copying of skel since it can interfere with the gitlab-runner
|
||||
mkdir -p /home/podman /home/runner
|
||||
# Guarantee uid/gid 1000 for user 'podman' / 1001 for user 'runner'.
|
||||
groupadd -g 1000 podman
|
||||
groupadd -g 1001 runner
|
||||
# Separate users for services to increase process isolation.
|
||||
# The 'podman' user's socket service writes /home/runner/podman.socket
|
||||
useradd -M -u 1000 -g podman -G runner podman
|
||||
useradd -M -u 1001 -g runner runner
|
||||
# Allow 'podman' user to create socket file under /home/runner.
|
||||
chmod 770 /home/runner
|
||||
|
||||
# Overwrite defaults, only user 'podman' permited to have a user-namespace
|
||||
# Split the namespaced ID's around the containers root (ID 0) and the user
|
||||
# IDs 1000 and 1001 (defined above) to prevent hijacking from a nested container.
|
||||
echo -e "podman:1:999\npodman:1002:64533" | tee /etc/subuid > /etc/subgid
|
||||
# Host volume mount necessary for nested-podman to use overlayfs2 for container & volume storage.
|
||||
mkdir -p /home/podman/.local/share/containers
|
||||
# Nested-container's local container-cache volume mount, recommended by gitlab-runner docs.
|
||||
mkdir -p /cache
|
||||
# Both the gitlab-runner and podman need access to the cache directory / volume mount.
|
||||
chown podman:runner /cache
|
||||
|
||||
# Setup persistent 'podman' user services to start & run without a login.
|
||||
mkdir -p /var/lib/systemd/linger
|
||||
touch /var/lib/systemd/linger/podman
|
||||
# Setup 'podman' socket and a container-storage pruning service for 'podman' user.
|
||||
mkdir -p /home/podman/.config/systemd/user/{sockets.target.wants,default.target.wants}
|
||||
cd /home/podman/.config/systemd/user/
|
||||
ln -s $PWD/podman.socket ./sockets.target.wants/ # Added from Containerfile
|
||||
ln -s $PWD/prune.timer ./default.target.wants/ # also from Containerfile
|
||||
# Substitute value from --build-arg if specified, otherwise use default from Containerfile.
|
||||
sed -i -e "s/@@@PRUNE_INTERVAL@@@/$PRUNE_INTERVAL/" ./prune.timer
|
||||
# Containerfile ADD instruction does not properly set ownership/permissions.
|
||||
chown -R 1000:1000 /home/podman
|
||||
chmod 700 /home/podman
|
||||
|
||||
# Setup persistent 'runner' user services to start & run without a login.
|
||||
touch /var/lib/systemd/linger/runner
|
||||
mkdir -p /home/runner/.config/systemd/user/default.target.wants
|
||||
cd /home/runner/.config/systemd/user/
|
||||
# Does not depend on podman.socket file availablility, will retry if not present.
|
||||
ln -s $PWD/runner.service ./default.target.wants/
|
||||
# gitlab-runner will create side-car '.runner_system_id' file next to 'config.toml'
|
||||
# on first startup. Ensure access is allowed. Also link to future config file
|
||||
# presented as a container-secret.
|
||||
mkdir -p /home/runner/.gitlab-runner
|
||||
ln -s /var/run/secrets/config.toml /home/runner/.gitlab-runner/config.toml
|
||||
# Containerfile ADD instruction does not properly set ownership/permissions.
|
||||
chown -R runner:runner /home/runner
|
||||
chmod -R 700 /home/runner/.gitlab-runner
|
||||
Reference in New Issue
Block a user