media tools
This blogpost will go through the below topics for local or hosting methods using distroless/hardened images.
- Archiving Online Media w/re-encoding
- Pipeline to identifying common faces in Images and Videos and tagging tags/ reorganziing them
- How to setup github actions to automatically build, smoke test
Pre-requistes: Docker , probably want a decent iGPU or Macbook with M series or, better dGPU. OS: Unix (sorry powershell is too hard)
Archiving online media content for offline viewing through NAS
Installing hardened distroless images are safer with less attack surface whether it be in local machine or in a instance
i.e. you want to have https://www.youtube.com/watch?v=tGXV-ZRwcUM locally
# can add sha256
FROM cgr.dev/chainguard/python:latest AS builder
USER root
RUN python -m venv /venv
ENV PATH="/venv/bin:$PATH"
RUN pip install --no-cache-dir yt-dlp
ARG TARGETARCH
WORKDIR /ffmpeg-build
RUN ARCH=$(case "${TARGETARCH}" in \
amd64) echo "linux64" ;; \
arm64) echo "linuxarm64" ;; \
*) echo "Unsupported: ${TARGETARCH}" >&2; exit 1 ;; \
esac) && \
FFMPEG_BASENAME="ffmpeg-master-latest-${ARCH}-gpl" && \
echo "Downloading FFmpeg for ${TARGETARCH} (${ARCH})" && \
wget "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/${FFMPEG_BASENAME}.tar.xz" && \
wget "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/checksums.sha256" && \
grep "${FFMPEG_BASENAME}.tar.xz" checksums.sha256 | sha256sum -c && \
tar -xf "${FFMPEG_BASENAME}.tar.xz" && \
chmod +x "${FFMPEG_BASENAME}/bin/ffmpeg" "${FFMPEG_BASENAME}/bin/ffprobe" && \
ln -s "${FFMPEG_BASENAME}" ffmpeg-bundle
FROM cgr.dev/chainguard/python:latest
# can add sha256
COPY --from=builder /venv /venv
ENV PATH="/venv/bin:$PATH"
COPY --from=builder /ffmpeg-build/ffmpeg-bundle/bin/ffmpeg /usr/local/bin/
COPY --from=builder /ffmpeg-build/ffmpeg-bundle/bin/ffprobe /usr/local/bin/
COPY --from=builder /etc/ssl/cert.pem /etc/ssl/cert.pem
ENV SSL_CERT_FILE=/etc/ssl/cert.pem
WORKDIR /downloads
USER nonroot
ENTRYPOINT ["yt-dlp"]Now you can download:
Video:
docker run --rm \
-v "$(pwd):/downloads" \
<your image> \
"https://www.youtube.com/watch?v=dQw4w9WgXcQ"Audio:
docker run --rm -v "$(pwd):/downloads" <your image> -f bestaudio "URL"ffmpeg also allows conversion to AV1 codec
docker run --rm \
-v "$(pwd):/downloads" \
<your image> \
-f "bestvideo*+bestaudio/best" \
--recode-video mkv \
--postprocessor-args "FFmpeg:-c:v libaom-av1 -crf 18 -cpu-used 8 -c:a copy" \
"https://www.youtube.com/watch?v=dQw4w9WgXcQ"Image Building:
_build-images.yml
name: Build Image (Reusable)
on:
workflow_call:
inputs:
image_name:
required: true
type: string
description: "Image name used for tags and metadata"
context:
required: true
type: string
description: "Docker build context path"
smoke_cmd:
required: false
type: string
default: "--version"
description: "Command to pass to the image entrypoint for smoke testing"
platforms:
required: false
type: string
default: "linux/amd64"
description: "Build target platforms (e.g. linux/amd64,linux/arm64)"
trivy_severity:
required: false
type: string
default: "CRITICAL,HIGH"
description: "CVSS severity levels that fail the build"
permissions:
packages: write
id-token: write
attestations: write
contents: read
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install cosign
uses: sigstore/cosign-installer@v3
- name: Login to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository }}/${{ inputs.image_name }}
tags: |
type=raw,value=latest
type=sha,prefix=sha-
type=raw,value={{date 'YYYY-MM-DD'}}
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build for smoke test (amd64 only, local load)
uses: docker/build-push-action@v6
with:
context: ${{ inputs.context }}
platforms: linux/amd64
push: false
load: true
tags: smoke-test:${{ inputs.image_name }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Smoke test
run: |
docker run --rm smoke-test:${{ inputs.image_name }} ${{ inputs.smoke_cmd }}
- name: Build and push (multi-arch)
id: build
uses: docker/build-push-action@v6
with:
context: ${{ inputs.context }}
platforms: ${{ inputs.platforms }}
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
provenance: true
sbom: true
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Vulnerability scan
uses: aquasecurity/trivy-action@master
with:
image-ref: ghcr.io/${{ github.repository }}/${{ inputs.image_name }}@${{ steps.build.outputs.digest }}
format: 'table'
exit-code: '1'
severity: ${{ inputs.trivy_severity }}
scanners: 'vuln'
vuln-type: 'os,library'
- name: Sign image
run: |
cosign sign --yes \
ghcr.io/${{ github.repository }}/${{ inputs.image_name }}@${{ steps.build.outputs.digest }}With this reuseable workflow, we can consume this for yt-dlp:
name: Build yt-dlp
on:
push:
branches: [main]
paths:
- 'yt-dlp/**'
- '.github/workflows/build-yt-dlp.yml'
- '.github/workflows/_build-image.yml'
schedule:
- cron: '0 0 * * 0'
workflow_dispatch:
permissions:
contents: read
packages: write
id-token: write
attestations: write
jobs:
build:
uses: ./.github/workflows/_build-image.yml
with:
image_name: yt-dlp
context: yt-dlp
smoke_cmd: "--version"
platforms: linux/amd64,linux/arm64
trivy_severity: CRITICAL,HIGH