5 Replies Latest reply on Mar 4, 2019 2:24 PM by haohong

    WARN  [org.jgroups.protocols.TCP] JGRP000012: discarded message from different cluster xsite(our cluster is cluster).

    haohong

      When I set up infinispan with cross site mode, I get this error.

      10:19:38,082 WARN [org.jgroups.protocols.TCP] (jgroups-4,cache-am-v2-0.node.rcdn-dev.coi) JGRP000012: discarded message from different cluster xsite2 (our cluster is cluster2). Sender was null:rcdn-dev (received 3 identical messages from null:rcdn-dev in the last 61063 ms)

      The "xsite2" is used for different site communication.

      The "cluster2" is used for different node in same site communication.

      The following is my config:

       


        <subsystem xmlns="urn:infinispan:server:jgroups:9.3">
        <channels default="cluster2">
        <channel name="cluster2"/>
        <channel name="xsite2" stack="tcp2"/>
        </channels>
        <stacks default="${jboss.default.jgroups.stack:kubernetes2}">
        <stack name="tcp2">
        <transport type="TCP" socket-binding="jgroups-tcp">
        <property name="external_addr">${jgroups.tcp.external_addr:}</property>
        </transport>
        <protocol type="TCPPING">
        <property name="initial_hosts">${jgroups.tcpping.initial_hosts:}</property>
        <property name="ergonomics">false</property>
        </protocol>
        <protocol type="MERGE3">
        <property name="min_interval">10000</property>
        <property name="max_interval">30000</property>
        </protocol>
        <protocol type="FD_SOCK" socket-binding="jgroups-tcp-fd"/>
        <protocol type="FD_ALL">
        <property name="timeout">60000</property>
        <property name="interval">15000</property>
        <property name="timeout_check_interval">5000</property>
        </protocol>
        <protocol type="VERIFY_SUSPECT">
        <property name="timeout">5000</property>
        </protocol>
        <protocol type="SYM_ENCRYPT">
        <property name="keystore_type">pkcs12</property>
        <property name="sym_algorithm">AES</property>
        <property name="keystore_name">${env.ISPNKEYSTORE_REPLICATION}</property>
        <property name="store_password">${env.KEYSTORE_PASSWORD}</property>
        <property name="alias">replication</property>
        </protocol>
        <protocol type="pbcast.NAKACK2">
        <property name="use_mcast_xmit">false</property>
        <property name="xmit_interval">100</property>
        <property name="xmit_table_num_rows">50</property>
        <property name="xmit_table_msgs_per_row">1024</property>
        <property name="xmit_table_max_compaction_time">30000</property>
        <property name="resend_last_seqno">true</property>
        </protocol>
        <protocol type="UNICAST3">
        <property name="xmit_interval">100</property>
        <property name="xmit_table_num_rows">50</property>
        <property name="xmit_table_msgs_per_row">1024</property>
        <property name="xmit_table_max_compaction_time">30000</property>
        <property name="conn_expiry_timeout">0</property>
        </protocol>
        <protocol type="pbcast.STABLE">
        <property name="stability_delay">500</property>
        <property name="desired_avg_gossip">5000</property>
        <property name="max_bytes">1M</property>
        </protocol>
        <protocol type="pbcast.GMS">
        <property name="print_local_addr">true</property>
        <property name="join_timeout">${jgroups.join_timeout:5000}</property>
        </protocol>
        <protocol type="MFC">
        <property name="max_credits">2m</property>
        <property name="min_threshold">0.40</property>
        </protocol>
        <protocol type="FRAG3"/>
        <protocol type="RSVP"/>
        </stack>
        <stack name="kubernetes2">
        <transport type="TCP" socket-binding="jgroups-tcp">
        <property name="logical_addr_cache_expiration">360000</property>
        </transport>
        <protocol type="kubernetes.KUBE_PING"/>
        <protocol type="MERGE3">
        <property name="min_interval">10000</property>
        <property name="max_interval">30000</property>
        </protocol>
        <protocol type="FD_SOCK" socket-binding="jgroups-tcp-fd"/>
        <protocol type="FD_ALL">
        <property name="timeout">60000</property>
        <property name="interval">15000</property>
        <property name="timeout_check_interval">5000</property>
        </protocol>
        <protocol type="VERIFY_SUSPECT">
        <property name="timeout">5000</property>
        </protocol>
        <protocol type="SYM_ENCRYPT">
        <property name="keystore_type">pkcs12</property>
        <property name="sym_algorithm">AES</property>
        <property name="keystore_name">${env.ISPNKEYSTORE_REPLICATION}</property>
        <property name="store_password">${env.KEYSTORE_PASSWORD}</property>
        <property name="alias">replication</property>
        </protocol>
        <protocol type="pbcast.NAKACK2">
        <property name="use_mcast_xmit">false</property>
        <property name="xmit_interval">100</property>
        <property name="xmit_table_num_rows">50</property>
        <property name="xmit_table_msgs_per_row">1024</property>
        <property name="xmit_table_max_compaction_time">30000</property>
        <property name="resend_last_seqno">true</property>
        </protocol>
        <protocol type="UNICAST3">
        <property name="xmit_interval">100</property>
        <property name="xmit_table_num_rows">50</property>
        <property name="xmit_table_msgs_per_row">1024</property>
        <property name="xmit_table_max_compaction_time">30000</property>
        <property name="conn_expiry_timeout">0</property>
        </protocol>
        <protocol type="pbcast.STABLE">
        <property name="stability_delay">500</property>
        <property name="desired_avg_gossip">5000</property>
        <property name="max_bytes">1M</property>
        </protocol>
        <protocol type="pbcast.GMS">
        <property name="print_local_addr">true</property>
        <property name="join_timeout">${jgroups.join_timeout:5000}</property>
        </protocol>
        <protocol type="MFC">
        <property name="max_credits">2m</property>
        <property name="min_threshold">0.40</property>
        </protocol>
        <protocol type="FRAG3"/>
        <relay site="${env.LOCAL_SITE}">
        <property name="relay_multicasts">false</property>
        <property name="max_site_masters">3</property>
        </relay>
        </stack>
        </stacks>
        </subsystem>
        ...........
        <interfaces>
        <interface name="management">
        <inet-address value="${jboss.bind.address.management:127.0.0.1}"/>
        </interface>
        <interface name="public">
        <inet-address value="${jboss.bind.address:127.0.0.1}"/>
        </interface>
        </interfaces>
        <socket-binding-group name="standard-sockets" default-interface="public" port-offset="${jboss.socket.binding.port-offset:0}">
        <socket-binding name="management-http" interface="management" port="${jboss.management.http.port:9990}"/>
        <socket-binding name="management-https" interface="management" port="${jboss.management.https.port:9993}"/>
        <socket-binding name="hotrod" port="11222"/>
        <socket-binding name="hotrod-internal" port="11223"/>
        <socket-binding name="hotrod-multi-tenancy" port="11224"/>
        <socket-binding name="jgroups-mping" port="0" multicast-address="${jboss.default.multicast.address:234.99.54.14}" multicast-port="45700"/>
        <socket-binding name="jgroups-tcp" port="7660"/>
        <socket-binding name="jgroups-tcp-fd" port="57660"/>
        <socket-binding name="jgroups-udp" port="55200" multicast-address="${jboss.default.multicast.address:234.99.54.14}" multicast-port="45688"/>
        <socket-binding name="jgroups-udp-fd" port="54200"/>
        <socket-binding name="memcached" port="11211"/>
        <socket-binding name="rest" port="8080"/>57

         <socket-binding name="rest-multi-tenancy" port="8081"/>
        <socket-binding name="rest-ssl" port="8443"/>
        <socket-binding name="txn-recovery-environment" port="4712"/>
        <socket-binding name="txn-status-manager" port="4713"/>
        <socket-binding name="websocket" port="8181"/>
        <outbound-socket-binding name="remote-store-hotrod-server">
        <remote-destination host="remote-host" port="11222"/>
        </outbound-socket-binding>
        <outbound-socket-binding name="remote-store-rest-server">
        <remote-destination host="remote-host" port="8080"/>
        </outbound-socket-binding>
        </socket-binding-group>
      </server>

      if you have any suggestion, please tell me.

      Thanks,

      Haoran Hong

        • 1. Re: WARN  [org.jgroups.protocols.TCP] JGRP000012: discarded message from different cluster xsite(our cluster is cluster).
          dan.berindei

          I would guess it's because both channels are configured with the same bind address and port.

           

          In your tcp2 config you're setting TCP.external_addr, but that only tells TCP to tell the other nodes that they should connect to that external address. The channel still binds to bind_addr, and relies on a port forwarding rule to send packets from the external address to the bind address.

           

          The 2 channels can't bind to the same port, but TCP can move to the next free port (up to port_range). If the kubernetes2 stack starts first and binds to the first port in the range, it will stay in TCPPING's initial_hosts list, and other instances that try to connect to the xsite2 cluster will connect to it first.

          • 2. Re: WARN  [org.jgroups.protocols.TCP] JGRP000012: discarded message from different cluster xsite(our cluster is cluster).
            haohong

            Thanks Dan,

            Your mean that I need export another port for kubernetes2 channel. Right?

            like:

            This is for tcp2 channel.

            <socket-binding name="jgroups-tcp" port="7660"/>
            <socket-binding name="jgroups-tcp-fd" port="57660"/>

            This is for kubernetes2 channel

            <socket-binding name="jgroups-tcp2" port="7670"/>
            <socket-binding name="jgroups-tcp2-fd" port="57670"/>

            • 3. Re: WARN  [org.jgroups.protocols.TCP] JGRP000012: discarded message from different cluster xsite(our cluster is cluster).
              haohong

              The following is previous config. that's use old version infinispan, but jgroup subsystem config is same. this is has been working normally.

              So I was very confused why did this error happen after I upgraded infinispan.

               

               

                  <subsystem

                    xmlns="urn:infinispan:server:jgroups:9.0">

                    <channels default="cluster">

                      <channel name="cluster"/>

                      <channel name="xsite" stack="tcp"/>

                    </channels>

                    <stacks default="${jboss.default.jgroups.stack:kubernetes}">

                      <stack name="tcp">

                        <transport type="TCP" socket-binding="jgroups-tcp">

                          <property name="external_addr">${jgroups.tcp.external_addr:}</property>

                        </transport>

                        <protocol type="TCPPING">

                          <property name="initial_hosts">${jgroups.tcpping.initial_hosts:}</property>

                          <property name="ergonomics">false</property>

                        </protocol>

                        <protocol type="MERGE3">

                          <property name="min_interval">10000</property>

                          <property name="max_interval">30000</property>

                        </protocol>

                        <protocol type="FD_SOCK" socket-binding="jgroups-tcp-fd"/>

                        <protocol type="FD_ALL">

                          <property name="timeout">60000</property>

                          <property name="interval">15000</property>

                          <property name="timeout_check_interval">5000</property>

                        </protocol>

                        <protocol type="VERIFY_SUSPECT">

                          <property name="timeout">5000</property>

                        </protocol>

                        <protocol type="SYM_ENCRYPT">

                          <property name="keystore_type">pkcs12</property>

                          <property name="sym_algorithm">AES</property>

                          <property name="encrypt_entire_message">true</property>

                          <property name="keystore_name">${env.ISPNKEYSTORE_REPLICATION}</property>

                          <property name="store_password">${env.KEYSTORE_PASSWORD}</property>

                          <property name="alias">replication</property>

                        </protocol>

                        <protocol type="pbcast.NAKACK2">

                          <property name="use_mcast_xmit">false</property>

                          <property name="xmit_interval">100</property>

                          <property name="xmit_table_num_rows">50</property>

                          <property name="xmit_table_msgs_per_row">1024</property>

                          <property name="xmit_table_max_compaction_time">30000</property>

                          <property name="resend_last_seqno">true</property>

                        </protocol>

                        <protocol type="UNICAST3">

                          <property name="xmit_interval">100</property>

                          <property name="xmit_table_num_rows">50</property>

                          <property name="xmit_table_msgs_per_row">1024</property>

                          <property name="xmit_table_max_compaction_time">30000</property>

                          <property name="conn_expiry_timeout">0</property>

                        </protocol>

                        <protocol type="pbcast.STABLE">

                          <property name="stability_delay">500</property>

                          <property name="desired_avg_gossip">5000</property>

                          <property name="max_bytes">1M</property>

                        </protocol>

                        <protocol type="pbcast.GMS">

                          <property name="print_local_addr">true</property>

                          <property name="install_view_locally_first">true</property>

                          <property name="join_timeout">${jgroups.join_timeout:5000}</property>

                        </protocol>

                        <protocol type="MFC">

                          <property name="max_credits">2m</property>

                          <property name="min_threshold">0.40</property>

                        </protocol>

                        <protocol type="FRAG3"/>

                        <protocol type="RSVP"/>

                      </stack>

                      <stack name="kubernetes">

                        <transport type="TCP" socket-binding="jgroups-tcp">

                          <property name="logical_addr_cache_expiration">360000</property>

                        </transport>

                        <protocol type="kubernetes.KUBE_PING"/>

                        <protocol type="MERGE3">

                          <property name="min_interval">10000</property>

                          <property name="max_interval">30000</property>

                        </protocol>

                        <protocol type="FD_SOCK" socket-binding="jgroups-tcp-fd"/>

                        <protocol type="FD_ALL">

                          <property name="timeout">60000</property>

                          <property name="interval">15000</property>

                          <property name="timeout_check_interval">5000</property>

                        </protocol>

                        <protocol type="VERIFY_SUSPECT">

                          <property name="timeout">5000</property>

                        </protocol>

                        <protocol type="SYM_ENCRYPT">

                          <property name="keystore_type">pkcs12</property>

                          <property name="sym_algorithm">AES</property>

                          <property name="encrypt_entire_message">true</property>

                          <property name="keystore_name">${env.ISPNKEYSTORE_REPLICATION}</property>

                          <property name="store_password">${env.KEYSTORE_PASSWORD}</property>

                          <property name="alias">replication</property>

                        </protocol>

                        <protocol type="pbcast.NAKACK2">

                          <property name="use_mcast_xmit">false</property>

                          <property name="xmit_interval">100</property>

                          <property name="xmit_table_num_rows">50</property>

                          <property name="xmit_table_msgs_per_row">1024</property>

                          <property name="xmit_table_max_compaction_time">30000</property>

                          <property name="resend_last_seqno">true</property>

                        </protocol>

                        <protocol type="UNICAST3">

                          <property name="xmit_interval">100</property>

                          <property name="xmit_table_num_rows">50</property>

                          <property name="xmit_table_msgs_per_row">1024</property>

                          <property name="xmit_table_max_compaction_time">30000</property>

                          <property name="conn_expiry_timeout">0</property>

                        </protocol>

                        <protocol type="pbcast.STABLE">

                          <property name="stability_delay">500</property>

                          <property name="desired_avg_gossip">5000</property>

                          <property name="max_bytes">1M</property>

                        </protocol>

                        <protocol type="pbcast.GMS">

                          <property name="print_local_addr">true</property>

                          <property name="install_view_locally_first">true</property>

                          <property name="join_timeout">${jgroups.join_timeout:5000}</property>

                        </protocol>

                        <protocol type="MFC">

                          <property name="max_credits">2m</property>

                          <property name="min_threshold">0.40</property>

                        </protocol>

                        <protocol type="FRAG3"/>

                        <relay site="${env.LOCAL_SITE}">

                          <property name="relay_multicasts">false</property>

                          <property name="max_site_masters">3</property>

                        </relay>

                      </stack>

                    </stacks>

                  </subsystem>

                ................

                  <interfaces>

                    <interface name="management">

                      <inet-address value="${jboss.bind.address.management:127.0.0.1}"/>

                    </interface>

                    <interface name="public">

                      <inet-address value="${jboss.bind.address:127.0.0.1}"/>

                    </interface>

                  </interfaces>

                  <socket-binding-group name="standard-sockets" default-interface="public" port-offset="${jboss.socket.binding.port-offset:0}">

                    <socket-binding name="management-http" interface="management" port="${jboss.management.http.port:9990}"/>

                    <socket-binding name="management-https" interface="management" port="${jboss.management.https.port:9993}"/>

                    <socket-binding name="hotrod" port="11222"/>

                    <socket-binding name="hotrod-internal" port="11223"/>

                    <socket-binding name="hotrod-multi-tenancy" port="11224"/>

                    <socket-binding name="jgroups-mping" port="0" multicast-address="${jboss.default.multicast.address:234.99.54.14}" multicast-port="45700"/>

                    <socket-binding name="jgroups-tcp" port="7600"/>

                    <socket-binding name="jgroups-tcp-fd" port="57600"/>

                    <socket-binding name="jgroups-udp" port="55200" multicast-address="${jboss.default.multicast.address:234.99.54.14}" multicast-port="45688"/>

                    <socket-binding name="jgroups-udp-fd" port="54200"/>

                    <socket-binding name="memcached" port="11211"/>

                    <socket-binding name="rest" port="8080"/>

                    <socket-binding name="rest-multi-tenancy" port="8081"/>

                    <socket-binding name="rest-ssl" port="8443"/>

                    <socket-binding name="txn-recovery-environment" port="4712"/>

                    <socket-binding name="txn-status-manager" port="4713"/>

                    <socket-binding name="websocket" port="8181"/>

                    <outbound-socket-binding name="remote-store-hotrod-server">

                      <remote-destination host="remote-host" port="11222"/>

                    </outbound-socket-binding>

                    <outbound-socket-binding name="remote-store-rest-server">

                      <remote-destination host="remote-host" port="8080"/>

                    </outbound-socket-binding>

                  </socket-binding-group>

                </server>

               

              Thanks Dan!

              • 4. Re: WARN  [org.jgroups.protocols.TCP] JGRP000012: discarded message from different cluster xsite(our cluster is cluster).
                dan.berindei

                haohong yes, you just need to define different socket bindings for the two stacks.

                 

                I have no idea why the warning did not appear with the older version, but it could be that the old version started the `cluster` stack before the `xsite` stack. Because `xsite` uses `KUBE_PING`, it sends messages directly to the correct port, without trying the default port first. Note that it's just a warning, not an error, and you should not see it when the cluster is stable, only when new nodes are starting.

                • 5. Re: WARN  [org.jgroups.protocols.TCP] JGRP000012: discarded message from different cluster xsite(our cluster is cluster).
                  haohong

                  Thanks so much!!!