Kfp Movie -

Kfp Movie -

train, test = train_test_split(df, test_size=0.2, random_state=42) train.to_parquet(os.path.join(OUT_DIR, "train.parquet")) test.to_parquet(os.path.join(OUT_DIR, "test.parquet"))

def ingest_op(): return components.load_component_from_text(""" name: Ingest MovieLens implementation: container: image: python:3.9-slim command: - sh - -c - | pip install pandas && \ python - <<'PY' import os, urllib.request, zipfile DATA_URL = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip" DEST="/mnt/data" os.makedirs(DEST, exist_ok=True) zip_path=os.path.join(DEST,"ml-latest-small.zip") urllib.request.urlretrieve(DATA_URL, zip_path) with zipfile.ZipFile(zip_path) as z: z.extractall(DEST) print("✅ Ingestion done") PY args: [] volumeMounts: - mountPath: /mnt/data name: data-volume """) kfp movie

Goal: Pull the latest MovieLens dataset (e.g., ml‑latest‑small ) from the public URL and store it in a shared volume ( /mnt/data ). train, test = train_test_split(df, test_size=0

DATA_DIR = "/mnt/data/processed" MODEL_DIR = "/mnt/data/model" os.makedirs(MODEL_DIR, exist_ok=True) test = train_test_split(df