diff --git a/cmd/machine-config-controller/start.go b/cmd/machine-config-controller/start.go index f22c4dfee0..36b424f2e4 100644 --- a/cmd/machine-config-controller/start.go +++ b/cmd/machine-config-controller/start.go @@ -40,6 +40,8 @@ var ( templates string promMetricsListenAddress string resourceLockNamespace string + tlsCipherSuites []string + tlsMinVersion string } ) @@ -48,6 +50,8 @@ func init() { startCmd.PersistentFlags().StringVar(&startOpts.kubeconfig, "kubeconfig", "", "Kubeconfig file to access a remote cluster (testing only)") startCmd.PersistentFlags().StringVar(&startOpts.resourceLockNamespace, "resourcelock-namespace", metav1.NamespaceSystem, "Path to the template files used for creating MachineConfig objects") startCmd.PersistentFlags().StringVar(&startOpts.promMetricsListenAddress, "metrics-listen-address", "127.0.0.1:8797", "Listen address for prometheus metrics listener") + startCmd.PersistentFlags().StringSliceVar(&startOpts.tlsCipherSuites, "tls-cipher-suites", nil, "Comma-separated list of cipher suites for the metrics server") + startCmd.PersistentFlags().StringVar(&startOpts.tlsMinVersion, "tls-min-version", "VersionTLS12", "Minimum TLS version supported for the metrics server") } func runStartCmd(_ *cobra.Command, _ []string) { @@ -73,7 +77,7 @@ func runStartCmd(_ *cobra.Command, _ []string) { ctrlctx := ctrlcommon.CreateControllerContext(ctx, cb) - go ctrlcommon.StartMetricsListener(startOpts.promMetricsListenAddress, ctrlctx.Stop, ctrlcommon.RegisterMCCMetrics) + go ctrlcommon.StartMetricsListener(startOpts.promMetricsListenAddress, ctrlctx.Stop, ctrlcommon.RegisterMCCMetrics, startOpts.tlsMinVersion, startOpts.tlsCipherSuites) controllers := createControllers(ctrlctx) draincontroller := drain.New( diff --git a/cmd/machine-config-daemon/start.go b/cmd/machine-config-daemon/start.go index 535a9f4427..09e6061c40 100644 --- a/cmd/machine-config-daemon/start.go +++ b/cmd/machine-config-daemon/start.go @@ -43,6 +43,8 @@ var ( kubeletHealthzEnabled bool kubeletHealthzEndpoint string promMetricsURL string + tlsCipherSuites []string + tlsMinVersion string } ) @@ -57,6 +59,8 @@ func init() { startCmd.PersistentFlags().BoolVar(&startOpts.kubeletHealthzEnabled, "kubelet-healthz-enabled", true, "kubelet healthz endpoint monitoring") startCmd.PersistentFlags().StringVar(&startOpts.kubeletHealthzEndpoint, "kubelet-healthz-endpoint", "http://localhost:10248/healthz", "healthz endpoint to check health") startCmd.PersistentFlags().StringVar(&startOpts.promMetricsURL, "metrics-url", "127.0.0.1:8797", "URL for prometheus metrics listener") + startCmd.PersistentFlags().StringSliceVar(&startOpts.tlsCipherSuites, "tls-cipher-suites", nil, "Comma-separated list of cipher suites for the metrics server") + startCmd.PersistentFlags().StringVar(&startOpts.tlsMinVersion, "tls-min-version", "VersionTLS12", "Minimum TLS version supported for the metrics server") } //nolint:gocritic @@ -177,7 +181,7 @@ func runStartCmd(_ *cobra.Command, _ []string) { } // Start local metrics listener - go ctrlcommon.StartMetricsListener(startOpts.promMetricsURL, stopCh, daemon.RegisterMCDMetrics) + go ctrlcommon.StartMetricsListener(startOpts.promMetricsURL, stopCh, daemon.RegisterMCDMetrics, startOpts.tlsMinVersion, startOpts.tlsCipherSuites) ctrlctx := ctrlcommon.CreateControllerContext(ctx, cb) diff --git a/cmd/machine-config-operator/start.go b/cmd/machine-config-operator/start.go index 22d7323dea..86c7c9fd30 100644 --- a/cmd/machine-config-operator/start.go +++ b/cmd/machine-config-operator/start.go @@ -25,9 +25,8 @@ var ( } startOpts struct { - kubeconfig string - imagesFile string - promMetricsURL string + kubeconfig string + imagesFile string } ) @@ -35,7 +34,6 @@ func init() { rootCmd.AddCommand(startCmd) startCmd.PersistentFlags().StringVar(&startOpts.kubeconfig, "kubeconfig", "", "Kubeconfig file to access a remote cluster (testing only)") startCmd.PersistentFlags().StringVar(&startOpts.imagesFile, "images-json", "", "images.json file for MCO.") - startCmd.PersistentFlags().StringVar(&startOpts.promMetricsURL, "metrics-listen-address", "127.0.0.1:8797", "Listen address for prometheus metrics listener") } func runStartCmd(_ *cobra.Command, _ []string) { @@ -45,8 +43,6 @@ func runStartCmd(_ *cobra.Command, _ []string) { // This is 'main' context that we thread through the controller context and // the leader elections. Cancelling this is "stop everything, we are shutting down". runContext, runCancel := context.WithCancel(context.Background()) - stopCh := make(chan struct{}) - defer close(stopCh) // To help debugging, immediately log version klog.Infof("Version: %s (Raw: %s, Hash: %s)", version.ReleaseVersion, version.Raw, version.Hash) @@ -60,9 +56,6 @@ func runStartCmd(_ *cobra.Command, _ []string) { klog.Fatalf("error creating clients: %v", err) } - // start metrics listener - go ctrlcommon.StartMetricsListener(startOpts.promMetricsURL, stopCh, operator.RegisterMCOMetrics) - run := func(ctx context.Context) { go common.SignalHandler(runCancel) ctrlctx := ctrlcommon.CreateControllerContext(ctx, cb) diff --git a/install/0000_80_machine-config_00_service.yaml b/install/0000_80_machine-config_00_service.yaml index 68e5009562..764ac230d2 100644 --- a/install/0000_80_machine-config_00_service.yaml +++ b/install/0000_80_machine-config_00_service.yaml @@ -1,26 +1,5 @@ apiVersion: v1 kind: Service -metadata: - name: machine-config-operator - namespace: openshift-machine-config-operator - labels: - k8s-app: machine-config-operator - annotations: - include.release.openshift.io/ibm-cloud-managed: "true" - include.release.openshift.io/self-managed-high-availability: "true" - include.release.openshift.io/single-node-developer: "true" - service.beta.openshift.io/serving-cert-secret-name: mco-proxy-tls -spec: - type: ClusterIP - selector: - k8s-app: machine-config-operator - ports: - - name: metrics - port: 9001 - protocol: TCP ---- -apiVersion: v1 -kind: Service metadata: name: machine-config-controller namespace: openshift-machine-config-operator diff --git a/install/0000_80_machine-config_04_deployment.yaml b/install/0000_80_machine-config_04_deployment.yaml index 42bddaa8de..7ff8c9d6a1 100644 --- a/install/0000_80_machine-config_04_deployment.yaml +++ b/install/0000_80_machine-config_04_deployment.yaml @@ -43,30 +43,6 @@ spec: volumeMounts: - name: images mountPath: /etc/mco/images - - name: kube-rbac-proxy - image: placeholder.url.oc.will.replace.this.org/placeholdernamespace:kube-rbac-proxy - ports: - - containerPort: 9001 - name: metrics - protocol: TCP - args: - - --secure-listen-address=0.0.0.0:9001 - - --config-file=/etc/kube-rbac-proxy/config-file.yaml - - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 - - --upstream=http://127.0.0.1:8797 - - --logtostderr=true - - --tls-cert-file=/etc/tls/private/tls.crt - - --tls-private-key-file=/etc/tls/private/tls.key - resources: - requests: - cpu: 20m - memory: 50Mi - terminationMessagePolicy: FallbackToLogsOnError - volumeMounts: - - mountPath: /etc/tls/private - name: proxy-tls - - mountPath: /etc/kube-rbac-proxy - name: auth-proxy-config serviceAccountName: machine-config-operator nodeSelector: node-role.kubernetes.io/master: "" @@ -91,10 +67,4 @@ spec: - name: images configMap: name: machine-config-operator-images - - name: proxy-tls - secret: - secretName: mco-proxy-tls - - configMap: - name: kube-rbac-proxy - name: auth-proxy-config diff --git a/install/0000_80_machine-config_04_kube_rbac_proxy_config.yaml b/install/0000_80_machine-config_04_kube_rbac_proxy_config.yaml deleted file mode 100644 index 2da25d6d9a..0000000000 --- a/install/0000_80_machine-config_04_kube_rbac_proxy_config.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: kube-rbac-proxy - namespace: openshift-machine-config-operator - annotations: - include.release.openshift.io/ibm-cloud-managed: "true" - include.release.openshift.io/self-managed-high-availability: "true" - include.release.openshift.io/single-node-developer: "true" -data: - config-file.yaml: |+ - authorization: - resourceAttributes: - apiVersion: v1 - resource: namespace - subresource: metrics - namespace: openshift-machine-config-operator \ No newline at end of file diff --git a/install/0000_90_machine-config_00_servicemonitor.yaml b/install/0000_90_machine-config_00_servicemonitor.yaml index 80b886ace2..a622883dd2 100644 --- a/install/0000_90_machine-config_00_servicemonitor.yaml +++ b/install/0000_90_machine-config_00_servicemonitor.yaml @@ -1,41 +1,5 @@ apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor -metadata: - name: machine-config-operator - namespace: openshift-machine-config-operator - labels: - k8s-app: machine-config-operator - annotations: - include.release.openshift.io/self-managed-high-availability: "true" - include.release.openshift.io/single-node-developer: "true" -spec: - endpoints: - - interval: 30s - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - port: metrics - scheme: https - path: /metrics - relabelings: - - action: replace - regex: ;(.*) - replacement: $1 - separator: ";" - sourceLabels: - - node - - __meta_kubernetes_pod_node_name - targetLabel: node - tlsConfig: - caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt - serverName: machine-config-operator.openshift-machine-config-operator.svc - namespaceSelector: - matchNames: - - openshift-machine-config-operator - selector: - matchLabels: - k8s-app: machine-config-operator ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor metadata: name: machine-config-controller namespace: openshift-machine-config-operator diff --git a/manifests/machineconfigcontroller/deployment.yaml b/manifests/machineconfigcontroller/deployment.yaml index 6bc75c8ccb..002b1881e6 100644 --- a/manifests/machineconfigcontroller/deployment.yaml +++ b/manifests/machineconfigcontroller/deployment.yaml @@ -24,6 +24,8 @@ spec: - "--resourcelock-namespace={{.TargetNamespace}}" - "--v={{.LogLevel}}" - "--payload-version={{.ReleaseVersion}}" + - "--tls-cipher-suites={{join .TLSCipherSuites ","}}" + - "--tls-min-version={{.TLSMinVersion}}" resources: requests: cpu: 20m diff --git a/manifests/machineconfigdaemon/daemonset.yaml b/manifests/machineconfigdaemon/daemonset.yaml index f17d2068c9..65816e9820 100644 --- a/manifests/machineconfigdaemon/daemonset.yaml +++ b/manifests/machineconfigdaemon/daemonset.yaml @@ -32,6 +32,8 @@ spec: - "start" - "--payload-version={{.ReleaseVersion}}" - "--v={{.LogLevel}}" + - "--tls-cipher-suites={{join .TLSCipherSuites ","}}" + - "--tls-min-version={{.TLSMinVersion}}" resources: requests: cpu: 20m diff --git a/pkg/controller/common/metrics.go b/pkg/controller/common/metrics.go index cbf119452b..2e7f138858 100644 --- a/pkg/controller/common/metrics.go +++ b/pkg/controller/common/metrics.go @@ -5,7 +5,6 @@ import ( "crypto/tls" "fmt" "net/http" - "strings" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -32,18 +31,56 @@ var ( Name: "mcc_drain_err", Help: "logs failed drain", }, []string{"node"}) - // MCCPoolAlert logs when the pool configuration changes in a way the user should know. + + // MCCPoolAlert logs when the pool configuration changes in a way the user should know MCCPoolAlert = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "mcc_pool_alert", Help: "pool status alert", }, []string{"node"}) + // MCCSubControllerState logs the state of the subcontrollers of the MCC MCCSubControllerState = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "mcc_sub_controller_state", Help: "state of sub-controllers in the MCC", }, []string{"subcontroller", "state", "object"}) + + // MCCState is the state of the machine config controller + // pause, updated, updating, degraded + MCCState = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "mco_state", + Help: "state of a specified pool", + }, []string{"node", "pool", "state", "reason"}) + + // MCCMachineCount is the total number of nodes in the pool + MCCMachineCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "mco_machine_count", + Help: "total number of machines in a specified pool", + }, []string{"pool"}) + + // MCCUpdatedMachineCount is the updated machines in the pool + MCCUpdatedMachineCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "mco_updated_machine_count", + Help: "total number of updated machines in specified pool", + }, []string{"pool"}) + + // MCCDegradedMachineCount is the degraded machines in the pool + MCCDegradedMachineCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "mco_degraded_machine_count", + Help: "total number of degraded machines in specified pool", + }, []string{"pool"}) + + // MCCUnavailableMachineCount is the unavailable machines in the pool + MCCUnavailableMachineCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "mco_unavailable_machine_count", + Help: "total number of unavailable machines in specified pool", + }, []string{"pool"}) ) func RegisterMCCMetrics() error { @@ -52,13 +89,18 @@ func RegisterMCCMetrics() error { MCCDrainErr, MCCPoolAlert, MCCSubControllerState, + MCCState, + MCCMachineCount, + MCCUpdatedMachineCount, + MCCDegradedMachineCount, + MCCUnavailableMachineCount, }) if err != nil { return fmt.Errorf("could not register machine-config-controller metrics: %w", err) } - // Initilize GuageVecs to ensure that metrics of type GuageVec are accessible from the dashboard even if without a logged value + // Initialize GaugeVecs to ensure that metrics of type GaugeVec are accessible from the dashboard even if without a logged value // Solution to OCPBUGS-20427: https://issues.redhat.com/browse/OCPBUGS-20427 OSImageURLOverride.WithLabelValues("initialize").Set(0) MCCDrainErr.WithLabelValues("initialize").Set(0) @@ -84,7 +126,7 @@ func RegisterMetrics(metrics []prometheus.Collector) error { } // StartMetricsListener is metrics listener via http on localhost -func StartMetricsListener(addr string, stopCh <-chan struct{}, registerFunc func() error) { +func StartMetricsListener(addr string, stopCh <-chan struct{}, registerFunc func() error, tlsMinVersion string, tlsCipherSuites []string) { if addr == "" { addr = DefaultBindAddress } @@ -96,15 +138,14 @@ func StartMetricsListener(addr string, stopCh <-chan struct{}, registerFunc func return } - klog.Infof("Starting metrics listener on %s", addr) + // Get TLS config from provided settings, or use defaults + tlsConfig := GetGoTLSConfig(tlsMinVersion, tlsCipherSuites) + + klog.Infof("Starting metrics listener on %s with TLS min version: %s", addr, tlsMinVersion) mux := http.NewServeMux() mux.Handle("/metrics", promhttp.Handler()) s := http.Server{ - TLSConfig: &tls.Config{ - MinVersion: tls.VersionTLS12, - NextProtos: []string{"http/1.1"}, - CipherSuites: cipherOrder(), - }, + TLSConfig: tlsConfig, TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)), Addr: addr, Handler: mux} @@ -123,43 +164,3 @@ func StartMetricsListener(addr string, stopCh <-chan struct{}, registerFunc func klog.Infof("Metrics listener successfully stopped") } } - -func cipherOrder() []uint16 { - var first []uint16 - var second []uint16 - - allowable := func(c *tls.CipherSuite) bool { - // Disallow block ciphers using straight SHA1 - // See: https://tools.ietf.org/html/rfc7540#appendix-A - if strings.HasSuffix(c.Name, "CBC_SHA") { - return false - } - // 3DES is considered insecure - if strings.Contains(c.Name, "3DES") { - return false - } - return true - } - - for _, c := range tls.CipherSuites() { - for _, v := range c.SupportedVersions { - if v == tls.VersionTLS13 { - first = append(first, c.ID) - } - if v == tls.VersionTLS12 && allowable(c) { - inFirst := false - for _, id := range first { - if c.ID == id { - inFirst = true - break - } - } - if !inFirst { - second = append(second, c.ID) - } - } - } - } - - return append(first, second...) -} diff --git a/pkg/controller/node/node_controller.go b/pkg/controller/node/node_controller.go index 7aabcf24ee..2bd832be22 100644 --- a/pkg/controller/node/node_controller.go +++ b/pkg/controller/node/node_controller.go @@ -1319,7 +1319,13 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error { } ctrlcommon.UpdateStateMetric(ctrlcommon.MCCSubControllerState, "machine-config-controller-node", "Sync Machine Config Pool", pool.Name) } - return ctrl.syncStatusOnly(pool) + + if err := ctrl.syncStatusOnly(pool); err != nil { + return err + } + + // Update metrics after syncing the pool status + return ctrl.syncMetrics() } // checkIfNodeHasInProgressTaint checks if the given node has in progress taint @@ -1708,3 +1714,33 @@ func (ctrl *Controller) isConfigOrBuildPresent(mosc *mcfgv1.MachineOSConfig, mos func (ctrl *Controller) isConfigAndBuildPresent(mosc *mcfgv1.MachineOSConfig, mosb *mcfgv1.MachineOSBuild) bool { return (mosc != nil && mosb != nil) } + +// syncMetrics updates the metrics for all pools +func (ctrl *Controller) syncMetrics() error { + pools, err := ctrl.mcpLister.List(labels.Everything()) + if err != nil { + return err + } + // set metrics per pool, we need to get the latest condition to log for the state + var latestTime metav1.Time + latestTime.Time = time.Time{} + var cond mcfgv1.MachineConfigPoolCondition + for _, pool := range pools { + for _, condition := range pool.Status.Conditions { + if condition.Status == corev1.ConditionTrue && condition.LastTransitionTime.After(latestTime.Time) { + cond = condition + latestTime = cond.LastTransitionTime + } + } + + nodes, _ := helpers.GetNodesForPool(ctrl.mcpLister, ctrl.nodeLister, pool) + for _, node := range nodes { + ctrlcommon.MCCState.WithLabelValues(node.Name, pool.Name, string(cond.Type), cond.Reason).SetToCurrentTime() + } + ctrlcommon.MCCMachineCount.WithLabelValues(pool.Name).Set(float64(pool.Status.MachineCount)) + ctrlcommon.MCCUpdatedMachineCount.WithLabelValues(pool.Name).Set(float64(pool.Status.UpdatedMachineCount)) + ctrlcommon.MCCDegradedMachineCount.WithLabelValues(pool.Name).Set(float64(pool.Status.DegradedMachineCount)) + ctrlcommon.MCCUnavailableMachineCount.WithLabelValues(pool.Name).Set(float64(pool.Status.UnavailableMachineCount)) + } + return nil +} diff --git a/pkg/operator/metrics.go b/pkg/operator/metrics.go deleted file mode 100644 index 284025279c..0000000000 --- a/pkg/operator/metrics.go +++ /dev/null @@ -1,64 +0,0 @@ -package operator - -import ( - "fmt" - - ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" - - "github.com/prometheus/client_golang/prometheus" -) - -const ( - DefaultBindAddress = ":8797" -) - -// MCO Metrics -var ( - // mcoState is the state of the machine config operator - // pause, updated, updating, degraded - mcoState = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "mco_state", - Help: "state of a specified pool", - }, []string{"node", "pool", "state", "reason"}) - // mcoMachineCount is the total number of nodes in the pool - mcoMachineCount = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "mco_machine_count", - Help: "total number of machines in a specified pool", - }, []string{"pool"}) - // mcoUpdatedMachineCount is the updated machines in the pool - mcoUpdatedMachineCount = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "mco_updated_machine_count", - Help: "total number of updated machines in specified pool", - }, []string{"pool"}) - // mcoDegradedMachineCount is the degraded machines in the pool - mcoDegradedMachineCount = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "mco_degraded_machine_count", - Help: "total number of degraded machines in specified pool", - }, []string{"pool"}) - // mcoUnavailableMachineCount is the degraded machines in the pool - mcoUnavailableMachineCount = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "mco_unavailable_machine_count", - Help: "total number of unavailable machines in specified pool", - }, []string{"pool"}) -) - -func RegisterMCOMetrics() error { - err := ctrlcommon.RegisterMetrics([]prometheus.Collector{ - mcoState, - mcoMachineCount, - mcoUpdatedMachineCount, - mcoDegradedMachineCount, - mcoUnavailableMachineCount, - }) - - if err != nil { - return fmt.Errorf("could not register machine-config-operator metrics: %w", err) - } - - return nil -} diff --git a/pkg/operator/operator_test.go b/pkg/operator/operator_test.go deleted file mode 100644 index cbc3389e7d..0000000000 --- a/pkg/operator/operator_test.go +++ /dev/null @@ -1,98 +0,0 @@ -package operator - -import ( - "fmt" - "testing" - - configv1 "github.com/openshift/api/config/v1" - features "github.com/openshift/api/features" - mcfgv1 "github.com/openshift/api/machineconfiguration/v1" - fakeconfigclientset "github.com/openshift/client-go/config/clientset/versioned/fake" - configlistersv1 "github.com/openshift/client-go/config/listers/config/v1" - ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" - "github.com/openshift/machine-config-operator/test/helpers" - "github.com/prometheus/client_golang/prometheus/testutil" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/uuid" - corelisterv1 "k8s.io/client-go/listers/core/v1" - "k8s.io/client-go/tools/cache" - "k8s.io/client-go/tools/record" -) - -func TestMetrics(t *testing.T) { - optr := &Operator{ - eventRecorder: &record.FakeRecorder{}, - fgHandler: ctrlcommon.NewFeatureGatesHardcodedHandler( - []configv1.FeatureGateName{features.FeatureGatePinnedImages}, []configv1.FeatureGateName{}, - ), - } - optr.vStore = newVersionStore() - - p1, p2 := helpers.NewMachineConfigPool("master", nil, helpers.MasterSelector, "v0"), helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") - p2.Status.MachineCount = 2 - p2.Status.UpdatedMachineCount = 1 - p2.Status.DegradedMachineCount = 1 - optr.mcpLister = &mockMCPLister{ - pools: []*mcfgv1.MachineConfigPool{p1, p2}, - } - - nodeIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) - optr.nodeLister = corelisterv1.NewNodeLister(nodeIndexer) - nodeIndexer.Add(&corev1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: "first-node", Labels: map[string]string{"node-role/worker": ""}}, - Status: corev1.NodeStatus{ - NodeInfo: corev1.NodeSystemInfo{ - KubeletVersion: "v1.21", - }, - }, - }) - - configMapIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) - optr.mcoCmLister = corelisterv1.NewConfigMapLister(configMapIndexer) - - coName := fmt.Sprintf("test-%s", uuid.NewUUID()) - co := &configv1.ClusterOperator{ObjectMeta: metav1.ObjectMeta{Name: coName}} - optr.name = coName - kasOperator := &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{Name: "kube-apiserver"}, - Status: configv1.ClusterOperatorStatus{ - Versions: []configv1.OperandVersion{ - {Name: "kube-apiserver", Version: "1.21"}, - }, - }, - } - - operatorIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) - optr.clusterOperatorLister = configlistersv1.NewClusterOperatorLister(operatorIndexer) - operatorIndexer.Add(co) - operatorIndexer.Add(kasOperator) - - configNode := &configv1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.ClusterNodeInstanceName}, - Spec: configv1.NodeSpec{}, - } - configNodeIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) - optr.nodeClusterLister = configlistersv1.NewNodeLister(configNodeIndexer) - configNodeIndexer.Add(configNode) - - optr.configClient = fakeconfigclientset.NewSimpleClientset(co, kasOperator) - err := optr.syncAll([]syncFunc{ - {name: "fn1", - fn: func(config *renderConfig, co *configv1.ClusterOperator) error { return nil }, - }, - }) - require.Nil(t, err) - - metric := testutil.ToFloat64(mcoMachineCount.WithLabelValues("worker")) - assert.Equal(t, metric, float64(2)) - - metric = testutil.ToFloat64(mcoUpdatedMachineCount.WithLabelValues("worker")) - assert.Equal(t, metric, float64(1)) - - metric = testutil.ToFloat64(mcoDegradedMachineCount.WithLabelValues("worker")) - assert.Equal(t, metric, float64(1)) - -} diff --git a/pkg/operator/status.go b/pkg/operator/status.go index c41fd949cb..99f9d0c49d 100644 --- a/pkg/operator/status.go +++ b/pkg/operator/status.go @@ -7,7 +7,6 @@ import ( "reflect" "sort" "strings" - "time" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" @@ -26,7 +25,6 @@ import ( "github.com/openshift/machine-config-operator/pkg/apihelpers" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" kcc "github.com/openshift/machine-config-operator/pkg/controller/kubelet-config" - "github.com/openshift/machine-config-operator/pkg/helpers" ) // syncVersion handles reporting the version to the clusteroperator @@ -296,35 +294,6 @@ func (optr *Operator) syncUpgradeableStatus(co *configv1.ClusterOperator) error return nil } -func (optr *Operator) syncMetrics() error { - pools, err := optr.mcpLister.List(labels.Everything()) - if err != nil { - return err - } - // set metrics per pool, we need to get the latest condition to log for the state - var latestTime metav1.Time - latestTime.Time = time.Time{} - var cond mcfgv1.MachineConfigPoolCondition - for _, pool := range pools { - for _, condition := range pool.Status.Conditions { - if condition.Status == corev1.ConditionTrue && condition.LastTransitionTime.After(latestTime.Time) { - cond = condition - latestTime = cond.LastTransitionTime - } - } - - nodes, _ := helpers.GetNodesForPool(optr.mcpLister, optr.nodeLister, pool) - for _, node := range nodes { - mcoState.WithLabelValues(node.Name, pool.Name, string(cond.Type), cond.Reason).SetToCurrentTime() - } - mcoMachineCount.WithLabelValues(pool.Name).Set(float64(pool.Status.MachineCount)) - mcoUpdatedMachineCount.WithLabelValues(pool.Name).Set(float64(pool.Status.UpdatedMachineCount)) - mcoDegradedMachineCount.WithLabelValues(pool.Name).Set(float64(pool.Status.DegradedMachineCount)) - mcoUnavailableMachineCount.WithLabelValues(pool.Name).Set(float64(pool.Status.UnavailableMachineCount)) - } - return nil -} - func (optr *Operator) syncClusterFleetEvaluation(co *configv1.ClusterOperator) error { unexpectedEvaluations, err := optr.generateClusterFleetEvaluations() diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 5cb26773d5..caadde4c58 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -231,10 +231,6 @@ func (optr *Operator) syncAll(syncFuncs []syncFunc) error { return fmt.Errorf("error updating cluster operator status: %w", syncClusterFleetEvaluationErr) } - if err := optr.syncMetrics(); err != nil { - return fmt.Errorf("error syncing metrics: %w", err) - } - if optr.inClusterBringup && syncErr.err == nil { klog.Infof("Initialization complete") optr.inClusterBringup = false @@ -1673,10 +1669,6 @@ func (optr *Operator) syncRequiredMachineConfigPools(config *renderConfig, co *c // Let's start with a 10 minute timeout per "required" node. if err := wait.PollUntilContextTimeout(ctx, time.Second, time.Duration(requiredMachineCount*10)*time.Minute, false, func(_ context.Context) (bool, error) { - if err := optr.syncMetrics(); err != nil { - return false, err - } - if lastErr != nil { // In this case, only the status extension field is updated. newCOStatus := co.Status.DeepCopy() diff --git a/test/e2e-2of2/mco_test.go b/test/e2e-2of2/mco_test.go index e4c5b28c4d..856e6caf04 100644 --- a/test/e2e-2of2/mco_test.go +++ b/test/e2e-2of2/mco_test.go @@ -160,7 +160,7 @@ func TestMetrics(t *testing.T) { t.Cleanup(e2e_shared_test.MutateNodeAndWait(t, cs, &node, mcp)) if err := wait.Poll(5*time.Second, 5*time.Minute, func() (bool, error) { - svc, err := cs.Services("openshift-machine-config-operator").Get(context.TODO(), "machine-config-operator", metav1.GetOptions{}) + svc, err := cs.Services("openshift-machine-config-operator").Get(context.TODO(), "machine-config-controller", metav1.GetOptions{}) require.Nil(t, err) // Extract the IP and port and build the URL