load test problems
dmitry123 Aug 29, 2011 6:11 PMHi guys,
I ran a load test on a cluster with a replicated cache spread over two machines with each machine hosting 2 or 3 nodes(4 or 5 nodes total in the cluster). One node is a dedicated listener and after a startup it doesn't send messages to the cache. The other nodes( 3 or 4 of them) in the cluster write non overlapping keys to the cache every 1000ms. This works well when i start the coordinator node(the one that doesn't write) first and then bring up the other nodes(that write at 1000ms intervals) every 40 seconds. Problems begin when I bring up another node that writes at 1000ms intervals. The symptoms are dropped messages, and the coordinator(non writing node) locks up and becomes a SUSPECT. Is this normal behavior? Doesn't seem like 1 listener node + 4 write nodes of mutually exclusive data at 1000ms intervals should cause problems. And the symptoms are worse when the write interval is less than 1000ms in each node. Many thanks for looking at this.
jdk 1.6_24
infinispan 5.0.Final
JGroups version: 2.12.1.1.Final
Here's my config and snippet of code:
jgroups-tcp.xml
**********************************************************
<config xmlns="urn:org:jgroups"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:org:jgroups file:schema/JGroups-2.8.xsd">
<TCP
bind_addr="${jgroups.tcp.address:123.12.101.42}"
bind_port="${jgroups.tcp.port:46522}"
loopback="true"
port_range="30"
recv_buf_size="20000000"
send_buf_size="640000"
discard_incompatible_packets="true"
max_bundle_size="64000"
max_bundle_timeout="30"
enable_bundling="true"
use_send_queues="true"
sock_conn_timeout="300"
enable_diagnostics="false"
thread_pool.enabled="true"
thread_pool.min_threads="2"
thread_pool.max_threads="30"
thread_pool.keep_alive_time="5000"
thread_pool.queue_enabled="false"
thread_pool.queue_max_size="100"
thread_pool.rejection_policy="Discard"
oob_thread_pool.enabled="true"
oob_thread_pool.min_threads="2"
oob_thread_pool.max_threads="30"
oob_thread_pool.keep_alive_time="5000"
oob_thread_pool.queue_enabled="false"
oob_thread_pool.queue_max_size="100"
oob_thread_pool.rejection_policy="Discard"
/>
<MPING bind_addr="${jgroups.bind_addr:123.12.101.42}" break_on_coord_rsp="true"
mcast_addr="${jgroups.udp.mcast_addr:228.6.7.8}" mcast_port="${jgroups.udp.mcast_port:46655}" ip_ttl="${jgroups.udp.ip_ttl:2}"
num_initial_members="3"/>
<MERGE2 max_interval="30000"
min_interval="10000"/>
<FD_SOCK/>
<FD timeout="3000" max_tries="10"/>
<VERIFY_SUSPECT timeout="5000"/>
<pbcast.NAKACK
use_mcast_xmit="false" gc_lag="0"
retransmit_timeout="300,600,1200,2400,4800"
discard_delivered_msgs="false"/>
<UNICAST timeout="300,600,1200"/>
<pbcast.STABLE stability_delay="1000" desired_avg_gossip="50000"
max_bytes="400000" />
<pbcast.GMS print_local_addr="true" join_timeout="10000" view_bundling="true"
merge_timeout="20000" print_physical_addrs="true"/>
<UFC max_credits="2000000" min_threshold="0.10"/>
<MFC max_credits="2000000" min_threshold="0.10"/>
<FRAG2 frag_size="60000"/>
<pbcast.STREAMING_STATE_TRANSFER/>
</config>
cache.xml
********************************************************
<?xml version="1.0" encoding="UTF-8"?>
<infinispan xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:infinispan:config:5.0" xsi:schemaLocation="urn:infinispan:config:5.0 http://www.infinispan.org/schemas/infinispan-config-5.0.xsd">
<global>
<transport clusterName="infinispan-test-cluster" >
<properties>
<property name="configurationFile" value="jgroups-tcp.xml"/>
</properties>
</transport>
</global>
<default>
<transaction
transactionManagerLookupClass="org.infinispan.transaction.lookup.JBossStandaloneJTAManagerLookup"
syncRollbackPhase="false"
syncCommitPhase="false"
useEagerLocking="true"/>
<jmxStatistics enabled="true"/>
<clustering mode="replication">
<async asyncMarshalling="false"/>
</clustering>
<indexing enabled="true" indexLocalOnly="true" >
<properties>
<property name="hibernate.search.default.directory_provider" value="ram" />
<property name="hibernate.search.default.indexBase" value="/var/lucene/indexes"/>
</properties>
</indexing>
<!-- <deadlockDetection enabled="true" spinDuration="100" /> -->
<locking isolationLevel="READ_UNCOMMITTED"/>
</default>
<namedCache name="testCache">
</namedCache>
</infinispan>
relevant code
*************************************************
public static void populateCache(){
p = new Thread(new Runnable(){
public void run() {
for (int i = startIndex; i < startIndex+ 1000; i++) {
Book b = getUniqueBook();
cache.putAsync(i, b);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
});
}