I am developing a k8s controller with kubebuilder which reconciles my CustomResource object 'test-pod-monitor-cr' this is a cluster level resource and I get below error when the controller goes thru the reconcile process. As per the error, looks like the issue occurs in controller runtime code itself.
The service account the controller pod is using has the needed privileges to get the CR(Tested with kubectl GET using the service account token).
2025-02-15T23:13:32Z INFO starting server {"name": "health probe", "addr": "[::]:8081"}
2025-02-15T23:13:32Z INFO Starting EventSource {"controller": "podlogmonitor", "controllerGroup": "monitoring.mydomain", "controllerKind": "PodLogMonitor", "source": "kind source: *v1.PodLogMonitor"}
2025-02-15T23:13:32Z INFO Starting Controller {"controller": "podlogmonitor", "controllerGroup": "monitoring.mydomain", "controllerKind": "PodLogMonitor"}
2025-02-15T23:13:32Z INFO Starting workers {"controller": "podlogmonitor", "controllerGroup": "monitoring.mydomain", "controllerKind": "PodLogMonitor", "worker count": 1}
2025-02-15T23:13:32Z ERROR Reconciler error {"controller": "podlogmonitor", "controllerGroup": "monitoring.mydomain", "controllerKind": "PodLogMonitor", "PodLogMonitor": {"name":"test-pod-monitor-cr"}, "namespace": "", "name": "test-pod-monitor-cr", "reconcileID": "66f143a5-a211-4eab-911c-f0109f0661d7", "error": "podlogmonitors.monitoring.mydomain \"test-pod-monitor-cr\" not found"}
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler
/go/pkg/mod/sigs.k8s.io/[email protected]/pkg/internal/controller/controller.go:332
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem
/go/pkg/mod/sigs.k8s.io/[email protected]/pkg/internal/controller/controller.go:279
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2
/go/pkg/mod/sigs.k8s.io/[email protected]/pkg/internal/controller/controller.go:240
Below is my controller code. Could you help me troubleshoot this issue.
package controller
import (
"context"
"strings"
"time"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"github/go-logr/logr"
monitoringv1 "mydomain/m/api/v1"
)
// PodLogMonitorReconciler reconciles a PodLogMonitor object
type PodLogMonitorReconciler struct {
client.Client
Scheme *runtime.Scheme
Log logr.Logger
clientset *kubernetes.Clientset
}
// +kubebuilder:rbac:groups=monitoring.mydomain,resources=podlogmonitors,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=monitoring.mydomain,resources=podlogmonitors/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=monitoring.mydomain,resources=podlogmonitors/finalizers,verbs=update
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// TODO(user): Modify the Reconcile function to compare the state specified by
// the PodLogMonitor object against the actual cluster state, and then
// perform operations to make the cluster state reflect the state specified by
// the user.
//
// For more details, check Reconcile and its Result here:
// - /sigs.k8s.io/[email protected]/pkg/reconcile
func (r *PodLogMonitorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
_ = log.FromContext(ctx)
log := r.Log.WithValues("podlogmonitor", req.NamespacedName)
log.Info("Line - 1")
// Fetch the PodLogMonitor resource
var podLogMonitor monitoringv1.PodLogMonitor
if err := r.Get(ctx, req.NamespacedName, &podLogMonitor); err != nil {
log.Error(err, "unable to fetch PodLogMonitor")
return reconcile.Result{}, client.IgnoreNotFound(err)
}
log.Info("Line - 2")
// Fetch Pod logs from the specified namespace
podList := &corev1.PodList{}
if err := r.List(ctx, podList, client.MatchingLabels{"someLabel": podLogMonitor.Spec.Namespace}); err != nil {
//if err := r.List(ctx, podList, client.InNamespace(podLogMonitor.Spec.Namespace)); err != nil {
log.Error(err, "unable to list pods")
return reconcile.Result{}, err
}
log.Info("Line - 3")
// Iterate over the pods and check logs
for _, pod := range podList.Items {
log.Info("In the for loop")
if err := r.checkPodLogs(ctx, &pod, podLogMonitor.Spec.LogMessage, &podLogMonitor); err != nil {
log.Error(err, "unable to check pod logs", "pod", pod.Name)
continue
}
}
log.Info("Line - 4")
// Save the updated status
if err := r.Status().Update(ctx, &podLogMonitor); err != nil {
log.Error(err, "unable to update PodLogMonitor status")
return reconcile.Result{}, err
}
log.Info("Line - 5")
// Return after processing
return reconcile.Result{}, nil
}
func (r *PodLogMonitorReconciler) checkPodLogs(ctx context.Context, pod *corev1.Pod, logMessage string, podLogMonitor *monitoringv1.PodLogMonitor) error {
log := r.Log.WithValues("pod", pod.Name)
log.Info("Line - 6")
// Check if we have the clientset; if not, create it
if r.clientset == nil {
log.Info("Creating a new clientset...")
config, err := rest.InClusterConfig()
if err != nil {
log.Error(err, "unable to create in-cluster config")
return err
}
clientset, err := kubernetes.NewForConfig(config)
if err != nil {
log.Error(err, "unable to create Kubernetes clientset")
return err
}
r.clientset = clientset
}
// Get the logs of the pod using the Kubernetes API
req := r.clientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{})
podLogs, err := req.Stream(ctx)
if err != nil {
log.Error(err, "unable to stream pod logs")
return err
}
defer podLogs.Close()
// Check if the log message exists in the pod logs
buf := make([]byte, 2000)
_, err = podLogs.Read(buf)
if err != nil {
log.Error(err, "error reading pod logs")
return err
}
logStr := string(buf)
if strings.Contains(logStr, logMessage) {
log.Info("Found the specified log message, restarting pod")
// Restart the pod by deleting it (will be recreated by deployment/statefulset/etc.)
err = r.clientset.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{})
if err != nil {
log.Error(err, "unable to delete pod", "pod", pod.Name)
return err
}
log.Info("Pod restarted", "pod", pod.Name)
// Update the PodLogMonitor status
podLogMonitor.Status.LastRestartedPodName = pod.Name
podLogMonitor.Status.LastRestartTime = metav1.NewTime(time.Now())
if err := r.Status().Update(ctx, podLogMonitor); err != nil {
log.Error(err, "unable to update PodLogMonitor status")
return err
}
}
return nil
}
// SetupWithManager sets up the controller with the Manager.
func (r *PodLogMonitorReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&monitoringv1.PodLogMonitor{}).
Complete(r)
}
Below is the CRD definition for 'podlogmonitors.monitoring.mydomain'
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: podlogmonitors.monitoring.mydomain
spec:
group: monitoring.mydomain
names:
kind: PodLogMonitor
listKind: PodLogMonitorList
plural: podlogmonitors
singular: podlogmonitor
scope: Cluster
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
properties:
namespace:
type: string
logMessage:
type: string
status:
type: object
properties:
lastRestartedPodName:
type: string
lastRestartTime:
type: string
Below is the CR definition for 'test-pod-monitor-cr'
apiVersion: monitoring.mydomain/v1
kind: PodLogMonitor
metadata:
name: test-pod-monitor-cr
spec:
namespace: hello-devops
logMessage: "error restart test"