Unknown error when installing on self-managed cluster via Ansible

Hello,

We are trying to install and run Kafka and its components using Ansible.
Following the github source for playbooks and inventories defined in 6.2.2-post version.
https://github.com/confluentinc/cp-ansible/tree/6.2.2-post

The infrastructure for Kafka nodes is as follows:
Brokers nodes: 3
Zookeeper nodes: 3
Kafka connect nodes: 2
Kafka control center node: 1
Kafka REST proxy nodes: 2
Kafka schema registry nodes: 2

We are currently using a self signed SSL certificate, LDAP and RBAC settings have been set up, we have disabled the SSL part for the brokers to attempt to get the environment up and running as we struggle to sort out some security part (certs and keystores).

Our inventory is as follows:

---
# Production Inventory
all:
  vars:
    #Ansible
    # For debugging OpenLdap
    controller_mds_private_key_stat:
      stat:
        exists: false
#    mask_sensitive_logs: false

    ansible_connection: ssh
    ansible_user: root
    ansible_become: true
    ansible_ssh_private_key_file: ~/.ssh/id_rsa

    #Cluster config
    kafka_broker_cluster_name: prod_broker
    schema_registry_cluster_name: prod_schema
    kafka_connect_cluster_name: prod_connect

    #These are not needed when we are using a specific branch to install
    #confluent_package_version: 6.2.2
    #confluent_server_enabled: true

    #Monitoring
    jmxexporter_enabled: true
    jolokia_enabled: true

    #Proxy
    proxy_server: '<proxy_address>'
    proxy_env:
      http_proxy: '{{ proxy_server }}'
      https_proxy: '{{ proxy_server }}'
      no_proxy: '<list of hosts>'

    inventory: 'production'
    beat_tags:
      - 'kafka'
    reboot: true

    # License
    confluent_license: ''
    schema_registry_custom_properties:
      confluent.license: "{{ confluent_license }}"
    kafka_connect_custom_properties:
      confluent.license: "{{ confluent_license }}"
    control_center_custom_properties:
      confluent.license: "{{ confluent_license }}"
    kafka_rest_custom_properties:
      confluent.license: "{{ confluent_license }}"

    #SSL Config
    regenerate_ca: true
    regenerate_keystore_and_truststore: true
    ssl_enabled: true
    sasl_protocol: plain

    #Broker SSL config
    #kafka_broker_custom_listeners:
      #client_listener:
        #name: CLIENT
        #port: 9093
        #ssl_enabled: true
        #ssl_mutual_auth_enabled: true
        #sasl_protocol: plain

    #For LDAP Authentication
    kafka_broker_custom_properties:
      confluent.license: "{{ confluent_license }}"
      listener.name.client.plain.sasl.jaas.config: 'org.apache.kafka.common.security.plain.PlainLoginModule required username="broker" password=<password>'

      listener.name.client.plain.sasl.server.callback.handler.class: 'io.confluent.security.auth.provider.ldap.LdapAuthenticateCallbackHandler'

      # Authentication of confluent server to ldap server
      ldap.java.naming.factory.initial: com.sun.jndi.ldap.LdapCtxFactory
      ldap.com.sun.jndi.ldap.read.timeout: 3000
      #For active directory the port might be different
      ldap.java.naming.provider<ldapURL>
      # this is the auth you use in ldapsearch. This user should be able to search in the entire
      # LDAP tree
      ldap.java.naming.security.principal: cn=confluent_auth,ou=system-ids,o=nwu
      # The password for that user
      ldap.java.naming.security.credentials: <password>
      ldap.java.naming.security.authentication: simple

      # User Configuration
      ldap.user.search.scope: 2
      ldap.user.search.base: o=nwu
      ldap.user.name.attribute: cn
      ldap.user.object.class: inetOrgPerson

      # Group Configuration
      ldap.search.mode: GROUPS
      ldap.group.search.base: ou=groups,o=nwu
      ldap.group.name.attribute: cn
      ldap.group.object.class: groupOfNames
      ldap.group.member.attribute: member
      ldap.group.member.attribute.pattern: cn=(\\w+),.*
      ldap.group.search.scope: 2

      # Filters
      ldap.user.search.filter: '(|(memberOf=cn=confluent_administrators,ou=orgunits,ou=groups,o=nwu)(memberOf=cn=confluent_components,ou=groups,o=nwu)(memberOf=cn=producers,ou=groups,o=nwu))'
      ldap.group.search.filter: '(description=confluent)'

    # RBAC usernames & passwords for kafka
    rbac_enabled: true
    #regenerate_token_pem: false
    #create_mds_certs: true
    mds_super_user: <username>
    mds_super_user_password: <password>
    kafka_broker_ldap_user: <username>
    kafka_broker_ldap_password: <password>
    schema_registry_ldap_user: <username>
    schema_registry_ldap_password: <password>
    kafka_connect_ldap_user: <username>
    kafka_connect_ldap_password: <password>
    control_center_ldap_user: <username>
    control_center_ldap_password: <password>
    kafka_rest_ldap_user: <username>
    kafka_rest_ldap_password: <password>

    token_services_public_pem_file: ../files/public.pem
    token_services_private_pem_file: ../files/tokenKeypair.pem

zookeeper:
  vars:
    vmLVMs:
      - { vg: data, lv: data, path: /var/lib/kafka/data, pvs: '/dev/sdb1', owner: 'cp-kafka', group: 'confluent' }
  hosts:
    <zookeeper_fqdn>:
      ip_addr: <ip>
    <zookeeper_fqdn>:
      ip_addr: <ip>
    <zookeeper_fqdn>:
      ip_addr: <ip>

kafka_broker:
  vars:
    vmLVMs:
      - { vg: data, lv: data, path: /var/lib/kafka/data, pvs: '/dev/sdb1', owner: 'cp-kafka', group: 'confluent' }
    filebeat_modules:
      - 'system'
      - 'kafka'
    metricbeat_modules:
      - 'system'
      - 'kafka'
  hosts:
    <broker_fqdn>:
      ip_addr: <ip>
      broker_id: 1
      kafka_broker_custom_properties:
        broker.rack: 'dc1'
    <broker_fqdn>:
      ip_addr: <ip>
      broker_id: 2
      kafka_broker_custom_properties:
        broker.rack: 'dc1'
    <broker_fqdn>:
      ip_addr: <ip>
      broker_id: 3
      kafka_broker_custom_properties:
        broker.rack: 'dc2'

schema_registry:
  hosts:
    <schema_fqdn>:
      ip_addr: <ip>
    <schema_fqdn>:
      ip_addr: <ip>

kafka_rest:
  hosts:
     <rest_fqdn>:
       ip_addr: <ip>
     <rest_fqdn>:
       ip_addr: <ip>

kafka_connect:
  hosts:
   <connect_fqdn>:
      ip_addr: <ip>
    <connect_fqdn>:
      ip_addr: <ip>

control_center:
  vars:
    vmLVMs:
      - { vg: data, lv: data, path: /var/lib/kafka/data, pvs: '/dev/sdb1', owner: 'cp-kafka', group: 'confluent' }
  hosts:
    <control_fqdn>:
      ip_addr: <ip>

Task on which Ansible fails:

TASK [confluent.kafka_broker : Register Kafka Cluster] *****************************************************************
fatal: [<BROKER_URL>]: FAILED! => {
    "changed": false,
    "connection": "close",
    "content": "{\"status_code\":500,\"message\":\"Error updating Cluster Registry\",\"type\":\"CLUSTER REGISTRY UPDATE\"}",
    "content_length": "96",
    "content_type": "application/json",
    "date": "Tue, 17 May 2022 16:21:06 GMT",
    "elapsed": 0,
    "invocation": {
        "module_args": {
            "attributes": null,
            "backup": null,
            "body": [
                {
                    "clusterName": "prod_broker",
                    "hosts": [
                        {
                            "host": "<BROKER_URL>:" // Broker 1
                            "port": 9092
                        },
                        {
                            "host": "<BROKER_URL>", // broker 2
                            "port": 9092
                        },
                        {
                            "host": "<BROKER_URL>", // broker 3
                            "port": 9092
                        }
                    ],
                    "protocol": "SASL_SSL",
                    "scope": {
                        "clusters": {
                            "kafka-cluster": "XXXXXXXXXXXXXX"
                        }
                    }
                }
            ],
            "body_format": "json",
            "client_cert": null,
            "client_key": null,
            "content": null,
            "creates": null,
            "delimiter": null,
            "dest": null,
            "directory_mode": null,
            "follow": false,
            "follow_redirects": "safe",
            "force": false,
            "force_basic_auth": true,
            "group": null,
            "headers": {
                "Content-Type": "application/json"
            },
            "http_agent": "ansible-httpget",
            "method": "POST",
            "mode": null,
            "owner": null,
            "regexp": null,
            "remote_src": null,
            "removes": null,
            "return_content": false,
            "selevel": null,
            "serole": null,
            "setype": null,
            "seuser": null,
            "src": null,
            "status_code": [
                "204"
            ],
            "timeout": 30,
            "unix_socket": null,
            "unsafe_writes": null,
            "url": "<BROKER_URL>:8090/security/1.0/registry/clusters",
            "url_password": "VALUE_SPECIFIED_IN_NO_LOG_PARAMETER",
            "url_username": "<USER_NAME>",
            "use_proxy": true,
            "validate_certs": false
        }
    },
    "json": {
        "message": "Error updating Cluster Registry",
        "status_code": 500,
        "type": "CLUSTER REGISTRY UPDATE"
    },
    "msg": "Status code was 500 and not [204]: HTTP Error 500: Internal Server Error",
    "redirected": false,
    "status": 500,
    "url": "<BROKER_URL>:8090/security/1.0/registry/clusters"
}

If anyone has any idea what is going wrong or where we can look for an issue that would be greatly appreciated. Thanks

hey @carlv
welcome to the forum :slight_smile:

any logfiles available?
did you try to call the uri manually?

best,
michael

Hi @mmuehlbeyer

Thanks, hope I can help and get help where possible:)

I can add the Ansible logs and logs from the broker nodes, so I will do so shortly.

If I navigate to the uri in a browser I get a 401 error, to be expected. When I try via postman with login credentials I get back a 200 OK