I have a cluster member attempting to join a replicated cache populated with some 100,000 entries and the fetchInMemoryState=true when it is getting the following StateTransferException. Ideas?
2010-05-26 14:18:16,917 472255 TRACE [org.jgroups.protocols.UNICAST] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) te
st16a_GCBC-4894 <-- ACK(test16b_GCBC-40743: #2)
2010-05-26 14:18:17,082 472420 TRACE [org.jgroups.protocols.TCP] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) receiv
ed [dst: test16a_GCBC-4894, src: test16b_GCBC-40743 (3 headers), size=0 bytes], headers are FLUSH: FLUSH[type=ABORT_FLUSH,viewId=3], UNICAST: DAT
A, seqno=137, conn_id=2, TCP: [channel_name=infinispan-cluster-OHServerHybrid-test16]
2010-05-26 14:18:17,082 472420 TRACE [org.jgroups.protocols.UNICAST] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) te
st16a_GCBC-4894 <-- DATA(test16b_GCBC-40743: #137, conn_id=2)
2010-05-26 14:18:17,083 472421 TRACE [org.jgroups.protocols.UNICAST] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) te
st16a_GCBC-4894 --> ACK(test16b_GCBC-40743: #151)
2010-05-26 14:18:17,083 472421 TRACE [org.jgroups.protocols.TCP] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) sendin
g msg to test16b_GCBC-40743, src=test16a_GCBC-4894, headers are UNICAST: ACK, seqno=151, TCP: [channel_name=infinispan-cluster-OHServerHybrid-tes
t16]
2010-05-26 14:18:17,083 472421 TRACE [org.jgroups.protocols.TCP] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) dest=1
70.137.237.81:44446 (102 bytes)
2010-05-26 14:18:17,083 472421 DEBUG [org.jgroups.protocols.pbcast.FLUSH] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894
test16a_GCBC-4894: received ABORT_FLUSH from flush coordinator test16b_GCBC-40743, am I flush participant=true
2010-05-26 14:18:17,083 472421 DEBUG [org.jgroups.protocols.pbcast.FLUSH] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894
test16a_GCBC-4894: received ABORT_FLUSH from flush coordinator test16b_GCBC-40743, am I flush participant=true
2010-05-26 14:18:17,083 472421 DEBUG [org.jgroups.protocols.pbcast.FLUSH] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894
test16a_GCBC-4894: received ABORT_FLUSH from flush coordinator test16b_GCBC-40743, am I flush participant=true
2010-05-26 14:18:17,083 472421 DEBUG [org.jgroups.protocols.pbcast.FLUSH] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894
test16a_GCBC-4894: received ABORT_FLUSH from flush coordinator test16b_GCBC-40743, am I flush participant=true
2010-05-26 14:18:17,083 472421 DEBUG [org.jgroups.protocols.pbcast.GMS] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:)
test16a_GCBC-4894: sending RESUME event
2010-05-26 14:18:17,083 472421 DEBUG [org.jgroups.protocols.pbcast.FLUSH] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894
test16a_GCBC-4894: received RESUME, sending STOP_FLUSH to all
2010-05-26 14:18:17,083 472421 TRACE [org.jgroups.protocols.pbcast.NAKACK] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-489
4:) sending test16a_GCBC-4894#79
2010-05-26 14:18:17,083 472421 TRACE [org.jgroups.protocols.TCP] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) sendin
g msg to null, src=test16a_GCBC-4894, headers are FLUSH: FLUSH[type=STOP_FLUSH,viewId=4], NAKACK: [MSG, seqno=79], TCP: [channel_name=infinispan-
cluster-OHServerHybrid-test16]
2010-05-26 14:18:17,084 472422 TRACE [org.jgroups.protocols.TCP] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) loopin
g back message [dst: <null>, src: test16a_GCBC-4894 (3 headers), size=0 bytes]
2010-05-26 14:18:17,085 472423 TRACE [org.jgroups.protocols.TCP] (Incoming-47,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) receiv
ed [dst: <null>, src: test16a_GCBC-4894 (3 headers), size=0 bytes], headers are FLUSH: FLUSH[type=STOP_FLUSH,viewId=4], NAKACK: [MSG, seqno=79],
TCP: [channel_name=infinispan-cluster-OHServerHybrid-test16]
2010-05-26 14:18:17,085 472423 TRACE [org.jgroups.protocols.pbcast.STABLE] (Incoming-45,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-489
4:) test16a_GCBC-4894: resetting digest from NAKACK: [test16a_GCBC-4894#78]
...skipping...
2010-05-26 14:17:30,004 425342 WARN [org.jgroups.protocols.pbcast.NAKACK] (Incoming-42,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-489
4:) test16a_GCBC-4894: dropped message from test16b_GCBC-40743 (not in xmit_table), keys are [test16a_GCBC-4894], view=[test16a_GCBC-4894|4] [tes
t16a_GCBC-4894]
2010-05-26 14:17:30,014 425352 DEBUG [org.jgroups.protocols.pbcast.GMS] (ViewHandler,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:)
determining merge leader from [test16a_GCBC-4894, test16b_GCBC-40743]
2010-05-26 14:17:30,014 425352 DEBUG [org.jgroups.protocols.pbcast.GMS] (ViewHandler,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:)
I (test16a_GCBC-4894) am not the merge leader, waiting for merge leader (test16b_GCBC-40743) to initiate merge
2010-05-26 14:17:30,215 425553 TRACE [org.jgroups.protocols.TCP] (OOB-116,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) received [
dst: test16a_GCBC-4894, src: test16b_GCBC-40743 (2 headers), size=0 bytes, flags=OOB], headers are FD: heartbeat, TCP: [channel_name=infinispan-c
luster-OHServerHybrid-test16]
2010-05-26 14:17:30,215 425553 TRACE [org.jgroups.protocols.FD] (OOB-116,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) received ar
e-you-alive from test16b_GCBC-40743, sending response
2010-05-26 14:17:30,215 425553 TRACE [org.jgroups.protocols.TCP] (OOB-116,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) sending ms
g to test16b_GCBC-40743, src=test16a_GCBC-4894, headers are FD: heartbeat ack, TCP: [channel_name=infinispan-cluster-OHServerHybrid-test16]
2010-05-26 14:17:30,215 425553 TRACE [org.jgroups.protocols.TCP] (OOB-116,infinispan-cluster-OHServerHybrid-test16,test16a_GCBC-4894:) dest=170.1
37.237.81:44446 (114 bytes)
2010-05-26 14:17:31,338 426676 ERROR [org.infinispan.remoting.transport.jgroups.JGroupsTransport] (Incoming-13,infinispan-cluster-OHServerHybrid-
test16,test16a_GCBC-4894:) Caught while requesting or applying state
org.infinispan.statetransfer.StateTransferException: java.io.EOFException: Read past end of file
at org.infinispan.statetransfer.StateTransferManagerImpl.applyState(StateTransferManagerImpl.java:329)
at org.infinispan.remoting.InboundInvocationHandlerImpl.applyState(InboundInvocationHandlerImpl.java:78)
at org.infinispan.remoting.transport.jgroups.JGroupsTransport.setState(JGroupsTransport.java:582)
at org.jgroups.blocks.MessageDispatcher$ProtocolAdapter.handleUpEvent(MessageDispatcher.java:670)
at org.jgroups.blocks.MessageDispatcher$ProtocolAdapter.up(MessageDispatcher.java:730)
at org.jgroups.JChannel.up(JChannel.java:1427)
at org.jgroups.stack.ProtocolStack.up(ProtocolStack.java:855)
at org.jgroups.protocols.pbcast.FLUSH.up(FLUSH.java:483)
at org.jgroups.protocols.pbcast.STREAMING_STATE_TRANSFER.connectToStateProvider(STREAMING_STATE_TRANSFER.java:524)
at org.jgroups.protocols.pbcast.STREAMING_STATE_TRANSFER.handleStateRsp(STREAMING_STATE_TRANSFER.java:463)
at org.jgroups.protocols.pbcast.STREAMING_STATE_TRANSFER.up(STREAMING_STATE_TRANSFER.java:225)
at org.jgroups.protocols.FRAG2.up(FRAG2.java:188)
at org.jgroups.protocols.FC.up(FC.java:474)
at org.jgroups.protocols.pbcast.GMS.up(GMS.java:890)
at org.jgroups.protocols.pbcast.STABLE.up(STABLE.java:234)
at org.jgroups.protocols.UNICAST.handleDataReceived(UNICAST.java:614)
at org.jgroups.protocols.UNICAST.up(UNICAST.java:294)
at org.jgroups.protocols.pbcast.NAKACK.up(NAKACK.java:707)
at org.jgroups.protocols.VERIFY_SUSPECT.up(VERIFY_SUSPECT.java:132)
at org.jgroups.protocols.FD.up(FD.java:261)
at org.jgroups.protocols.FD_SOCK.up(FD_SOCK.java:270)
at org.jgroups.protocols.MERGE2.up(MERGE2.java:210)
at org.jgroups.protocols.Discovery.up(Discovery.java:277)
at org.jgroups.protocols.PING.up(PING.java:67)
at org.jgroups.protocols.MPING.up(MPING.java:175)
at org.jgroups.protocols.TP.passMessageUp(TP.java:997)
at org.jgroups.protocols.TP.access$100(TP.java:56)
at org.jgroups.protocols.TP$IncomingPacket.handleMyMessage(TP.java:1537)
at org.jgroups.protocols.TP$IncomingPacket.run(TP.java:1519)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:619)
Caused by: java.io.EOFException: Read past end of file
at org.jboss.marshalling.AbstractUnmarshaller.eofOnRead(AbstractUnmarshaller.java:184)
at org.jboss.marshalling.AbstractUnmarshaller.readUnsignedByteDirect(AbstractUnmarshaller.java:319)
at org.jboss.marshalling.AbstractUnmarshaller.readUnsignedByte(AbstractUnmarshaller.java:280)
at org.jboss.marshalling.river.RiverUnmarshaller.doReadObject(RiverUnmarshaller.java:207)
at org.jboss.marshalling.AbstractUnmarshaller.readObject(AbstractUnmarshaller.java:85)
And, Relevant jgroups file I am using:
<config xmlns="urn:org:jgroups"
xsi:schemaLocation="urn:org:jgroups file:schema/JGroups-2.8.xsd">
<TCP loopback="true"
port_range="30"
recv_buf_size="20000000"
send_buf_size="640000"
discard_incompatible_packets="true"
max_bundle_size="64000"
max_bundle_timeout="30"
enable_bundling="true"
use_send_queues="true"
sock_conn_timeout="300"
enable_diagnostics="false"
thread_pool.enabled="true"
thread_pool.min_threads="2"
thread_pool.max_threads="30"
thread_pool.keep_alive_time="5000"
thread_pool.queue_enabled="false"
thread_pool.queue_max_size="100"
thread_pool.rejection_policy="Discard"
oob_thread_pool.enabled="true"
oob_thread_pool.min_threads="2"
oob_thread_pool.max_threads="30"
oob_thread_pool.keep_alive_time="5000"
oob_thread_pool.queue_enabled="false"
oob_thread_pool.queue_max_size="100"
oob_thread_pool.rejection_policy="Discard"
/>
<MPING receive_on_all_interfaces="true" break_on_coord_rsp="true"
mcast_addr="${jgroups.udp.mcast_addr:228.6.7.8}" mcast_port="${jgroups.udp.mcast_port:42610}" ip_ttl="${jgroups.udp.ip_ttl:16}"
num_initial_members="2" num_ping_requests="1"/>
<MERGE2 max_interval="30000"
min_interval="10000"/>
<FD_SOCK/>
<FD timeout="3000" max_tries="3"/>
<VERIFY_SUSPECT timeout="1500"/>
<pbcast.NAKACK
use_mcast_xmit="false" gc_lag="0"
retransmit_timeout="300,600,1200,2400,4800"
discard_delivered_msgs="false"/>
<UNICAST timeout="300,600,1200"/>
<pbcast.STABLE stability_delay="1000" desired_avg_gossip="50000"
max_bytes="400000"/>
<pbcast.GMS print_local_addr="false" join_timeout="7000" view_bundling="true"/>
<FC max_credits="2000000"
min_threshold="0.10"/>
<FRAG2 frag_size="60000"/>
<pbcast.STREAMING_STATE_TRANSFER/>
<!-- <pbcast.STATE_TRANSFER/> -->
<pbcast.FLUSH timeout="0"/>
</config>