problems with jms- failover/reconnect

hellweg Nov 6, 2007 7:14 AM

hello,

we encountered some problems with jms-reconnects/failovers ....

this is our set-up*:
Messaging-Nodes A+B with no application;
Application-Node connecting to messaging-node

this is the scenario:
application connects to A, everything is good
when A dies, the application connects to B( everything is good )
when A comes back again and B dies all jms-connections are gone

according to logs, when A comes up again, it rejoins the partition
but the connections are dead ...

* configurations:
cluster-service.xml copy paste from example

<TCP bind_addr="XXX" start_port="XXX" loopback="true"
 tcp_nodelay="true"
 recv_buf_size="20000000"
 send_buf_size="640000"
 discard_incompatible_packets="true"
 enable_bundling="false"
 max_bundle_size="64000"
 max_bundle_timeout="30"
 use_incoming_packet_handler="true"
 use_outgoing_packet_handler="false"
 down_thread="false" up_thread="false"
 use_send_queues="false"
 sock_conn_timeout="300"
 skip_suspected_members="true"/>
<TCPPING initial_hosts="XXX" port_range="3"
 timeout="3000"
 down_thread="false" up_thread="false"
 num_initial_members="3"/>
<MERGE2 max_interval="100000"
 down_thread="false" up_thread="false" min_interval="20000"/>
<FD_SOCK down_thread="false" up_thread="false" srv_sick_bind_addr="XXX" />
<VERIFY_SUSPECT timeout="1500" down_thread="false" up_thread="false"/>
<pbcast.NAKACK max_xmit_size="60000"
 use_mcast_xmit="false" gc_lag="0"
 retransmit_timeout="300,600,1200,2400,4800"
 down_thread="false" up_thread="false"
 discard_delivered_msgs="true"/>
<pbcast.STABLE stability_delay="1000" desired_avg_gossip="50000"
 down_thread="false" up_thread="false"
 max_bytes="400000"/>
 <pbcast.GMS print_local_addr="true" join_timeout="3000"
 down_thread="false" up_thread="false"
 join_retry_timeout="2000" shun="true"
 view_bundling="true"/>
<pbcast.STATE_TRANSFER down_thread="false" up_thread="false" use_flush="false"/>