1 Reply Latest reply on Nov 2, 2008 6:50 PM by mogy

    Clustering Novice

      Hi all,
      I have been allocated an issue to resolve on JBoss clustering. I'd like to understand what is supposed to happen before I dive into our application issues.

      My testing environment.
      -----------------------------
      Machine (a) WinXP
      Virtual Machine (b) VMWare Instance running RedHat 4 (on same machine)

      I have successfully managed to get the two JBoss instances clustering after initial startup (no applications installed) and all is happy until I disable the network card in the virtual machine (trying to simulate a network outage).

      When I bring it back online they fail to re-establish a cluster, with
      "[FD] I was suspected by 192.168.56.130:1123; ignoring the SUSPECT message and sending back a HEARTBEAT_ACK" messages until I restart either of the JBoss instances.

      If anyone could shed some light on why this happens, I'd be grateful.
      thank you Mogy ...





      <UDP mcast_addr="${jboss.partition.udpGroup:228.1.2.3}"
      mcast_port="${jboss.hapartition.mcast_port:45566}"
      tos="8"
      ucast_recv_buf_size="20000000"
      ucast_send_buf_size="640000"
      mcast_recv_buf_size="25000000"
      mcast_send_buf_size="640000"
      loopback="true"
      discard_incompatible_packets="true"
      enable_bundling="false"
      max_bundle_size="64000"
      max_bundle_timeout="30"
      use_incoming_packet_handler="true"
      use_outgoing_packet_handler="false"
      ip_ttl="${jgroups.udp.ip_ttl:2}"
      down_thread="false" up_thread="false"/>
      <PING timeout="2000"
      down_thread="false" up_thread="false" num_initial_members="3"/>
      <MERGE2 max_interval="100000"
      down_thread="false" up_thread="false" min_interval="20000"/>
      <FD_SOCK down_thread="false" up_thread="false"/>
      <FD timeout="5000" max_tries="5" down_thread="false" up_thread="false" shun="false"/>
      <VERIFY_SUSPECT timeout="1500" down_thread="false" up_thread="false"/>
      <pbcast.NAKACK max_xmit_size="60000"
      use_mcast_xmit="false" gc_lag="0"
      retransmit_timeout="300,600,1200,2400,4800"
      down_thread="false" up_thread="false"
      discard_delivered_msgs="true"/>
      <UNICAST timeout="300,600,1200,2400,3600"
      down_thread="false" up_thread="false"/>
      <pbcast.STABLE stability_delay="1000" desired_avg_gossip="50000"
      down_thread="false" up_thread="false"
      max_bytes="400000"/>
      <pbcast.GMS print_local_addr="true" join_timeout="3000"
      down_thread="false" up_thread="false"
      join_retry_timeout="2000" shun="true"
      view_bundling="true"/>
      <FRAG2 frag_size="60000" down_thread="false" up_thread="false"/>
      <pbcast.STATE_TRANSFER down_thread="false" up_thread="false" use_flush="false"/>