0 Replies Latest reply on Jan 14, 2016 8:33 AM by alejandro.serna

    Problem with cluster when restarting node

    alejandro.serna

      I have two hosts with embedded Infinispan 8.0.1 instances that form a cluster with unicast (sa3HOFVPL42WNC4, sa3P9K409HXEX0H). It works fine but when I restart sa3P9K409HXEX0H I get the output below. It seems that it does note remove the stale memeber and it replaces it with efdfe6fe-748e-64f0-5db8-02960e590a7e. Then it seems to try to connect producing a time out and stoping the server from start.

       

      ...

      -------------------------------------------------------------------

      GMS: address=sa3P9K409HXEX0H-62086, cluster=HOTEL_API_CLUSTER_INFINISPAN-TEST, physical address=10.175.16.224:3807

      -------------------------------------------------------------------

      DEBUG|2016-01-14T11:30:00,070||JGroupsTransport|New view accepted: [sa3HOFVPL42WNC4-44408|2] (3) [sa3HOFVPL42WNC4-44408, efdfe6fe-748e-64f0-5db8-02960e590a7e, sa3P9K409HXEX0H-62086]

      ...

      DEBUG|2016-01-14T11:30:00,578||LocalTopologyManagerImpl|Starting local rebalance for cache HotelMap_4, topology = CacheTopology{id=3, rebalanceId=2, currentCH=ReplicatedConsistentHash{ns = 60, owners = (2)[sa3HOFVPL42WNC4-44408: 30, efdfe6fe-748e-64f0-5db8-02960e590a7e: 30]}, pendingCH=ReplicatedConsistentHash{ns = 60, owners = (3)[sa3HOFVPL42WNC4-44408: 20, efdfe6fe-748e-64f0-5db8-02960e590a7e: 20, sa3P9K409HXEX0H-62086: 20]}, unionCH=null, actualMembers=[sa3HOFVPL42WNC4-44408, efdfe6fe-748e-64f0-5db8-02960e590a7e, sa3P9K409HXEX0H-62086]}

      ...

      Caused by: org.infinispan.commons.CacheException: Unable to invoke method public void org.infinispan.statetransfer.StateTransferManagerImpl.waitForInitialStateTransferToComplete() throws java.lang.Exception on object of type StateTransferManagerImpl

              at org.infinispan.commons.util.ReflectionUtil.invokeAccessibly(ReflectionUtil.java:172)

              at org.infinispan.factories.AbstractComponentRegistry$PrioritizedMethod.invoke(AbstractComponentRegistry.java:869)

              at org.infinispan.factories.AbstractComponentRegistry.invokeStartMethods(AbstractComponentRegistry.java:638)

              at org.infinispan.factories.AbstractComponentRegistry.internalStart(AbstractComponentRegistry.java:627)

              at org.infinispan.factories.AbstractComponentRegistry.start(AbstractComponentRegistry.java:530)

              at org.infinispan.factories.ComponentRegistry.start(ComponentRegistry.java:218)

              at org.infinispan.cache.impl.CacheImpl.start(CacheImpl.java:850)

              at org.infinispan.manager.DefaultCacheManager.wireAndStartCache(DefaultCacheManager.java:629)

              at org.infinispan.manager.DefaultCacheManager.createCache(DefaultCacheManager.java:580)

              at org.infinispan.manager.DefaultCacheManager.getCache(DefaultCacheManager.java:445)

              at org.infinispan.manager.DefaultCacheManager.getCache(DefaultCacheManager.java:431)

              at com.hotelbeds.distribution.cache.spring.InfinispanRedisCacheServiceImplementation.lambda$3(InfinispanRedisCacheServiceImplementation.java:137)

              at com.hotelbeds.distribution.cache.spring.InfinispanRedisCacheServiceImplementation$$Lambda$39/1989375473.accept(Unknown Source)

              at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184)

              at java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:175)

              at java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)

              at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:512)

              at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:502)

              at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:151)

              at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:174)

              at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)

              at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418)

              at com.hotelbeds.distribution.cache.spring.InfinispanRedisCacheServiceImplementation.initLocalCaches(InfinispanRedisCacheServiceImplementation.java:137)

              at com.hotelbeds.distribution.cache.spring.InfinispanRedisCacheServiceImplementation.initMaps(InfinispanRedisCacheServiceImplementation.java:101)

              at com.hotelbeds.distribution.cache.configuration.CacheServiceConfiguration.selectDataProviderCacheHandler(CacheServiceConfiguration.java:261)

              at com.hotelbeds.distribution.cache.configuration.CacheServiceConfiguration$$EnhancerBySpringCGLIB$$38df2187.CGLIB$selectDataProviderCacheHandler$5(<generated>)

              at com.hotelbeds.distribution.cache.configuration.CacheServiceConfiguration$$EnhancerBySpringCGLIB$$38df2187$$FastClassBySpringCGLIB$$77769e34.invoke(<generated>)

              at org.springframework.cglib.proxy.MethodProxy.invokeSuper(MethodProxy.java:228)

              at org.springframework.context.annotation.ConfigurationClassEnhancer$BeanMethodInterceptor.intercept(ConfigurationClassEnhancer.java:309)

              at com.hotelbeds.distribution.cache.configuration.CacheServiceConfiguration$$EnhancerBySpringCGLIB$$38df2187.selectDataProviderCacheHandler(<generated>)

              at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

              at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)

              at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

              at java.lang.reflect.Method.invoke(Method.java:497)

              at org.springframework.beans.factory.support.SimpleInstantiationStrategy.instantiate(SimpleInstantiationStrategy.java:162)

              ... 78 more

      Caused by: org.infinispan.commons.CacheException: Initial state transfer timed out for cache HotelMap_4 on sa3P9K409HXEX0H-62086

              at org.infinispan.statetransfer.StateTransferManagerImpl.waitForInitialStateTransferToComplete(StateTransferManagerImpl.java:225)

              at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

              at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)

              at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

              at java.lang.reflect.Method.invoke(Method.java:497)

              at org.infinispan.commons.util.ReflectionUtil.invokeAccessibly(ReflectionUtil.java:168)

       

       

      On the other host that has not been restarted (sa3HOFVPL42WNC4) I get this message:

       

      WARN |2016-01-14T11:33:58,066||ClusterTopologyManagerImpl|ISPN000197: Error updating cluster member list org.infinispan.util.concurrent.TimeoutException: Replication timeout for sa3P9K409HXEX0H-43917

              at org.infinispan.remoting.transport.jgroups.JGroupsTransport.checkRsp(JGroupsTransport.java:755)

              at org.infinispan.remoting.transport.jgroups.JGroupsTransport.lambda$invokeRemotelyAsync$81(JGroupsTransport.java:602)

              at org.infinispan.remoting.transport.jgroups.JGroupsTransport$$Lambda$189/1162411583.apply(Unknown Source)

              at java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602)

              at java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577)

              at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)

              at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1954)

              at org.infinispan.remoting.transport.jgroups.RspListFuture.call(RspListFuture.java:47)

              at org.infinispan.remoting.transport.jgroups.RspListFuture.call(RspListFuture.java:16)

              at java.util.concurrent.FutureTask.run(FutureTask.java:266)

              at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)

              at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)

              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

              at java.lang.Thread.run(Thread.java:745)

      Java configuration:

       

      
      public EmbeddedCacheManager getEmbeddedCacheManager() throws Exception {
              reloadVMProperties();
            
              TransportConfigurationBuilder transport = GlobalConfigurationBuilder.defaultClusteredBuilder().transport();
              GlobalConfiguration globalConfiguration =
                  transport.defaultTransport()
                          .addProperty(CONFIGURATION_FILE, "jgroups_tcpunicast.xml")
                          .clusterName(getGroupName(isLocal()))
                          .build();
              ConfigurationBuilder configurationBuilder = new ConfigurationBuilder();
              configurationBuilder.clustering().cacheMode(CacheMode.REPL_ASYNC);
            
              EmbeddedCacheManager cacheManager = new DefaultCacheManager(globalConfiguration, configurationBuilder.build());
      
              return cacheManager;
          }
      
      

       

      jgroups_tcpunicast.xml:

       

      <config xmlns="urn:org:jgroups" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:org:jgroups http://www.jgroups.org/schema/JGroups-3.6.xsd">
      
        <TCP bind_port="${jgroups.tcp.port:7800}"
        enable_diagnostics="false"
              thread_naming_pattern="pl"
              send_buf_size="640k"
             sock_conn_timeout="300"
      
              thread_pool.min_threads="${jgroups.thread_pool.min_threads:2}"
              thread_pool.max_threads="${jgroups.thread_pool.max_threads:30}"
              thread_pool.keep_alive_time="60000"
              thread_pool.queue_enabled="false"
      
              internal_thread_pool.min_threads="${jgroups.internal_thread_pool.min_threads:5}"
              internal_thread_pool.max_threads="${jgroups.internal_thread_pool.max_threads:20}"
              internal_thread_pool.keep_alive_time="60000"
              internal_thread_pool.queue_enabled="true"
              internal_thread_pool.queue_max_size="500"
      
              oob_thread_pool.min_threads="${jgroups.oob_thread_pool.min_threads:20}"
              oob_thread_pool.max_threads="${jgroups.oob_thread_pool.max_threads:200}"
              oob_thread_pool.keep_alive_time="60000"
              oob_thread_pool.queue_enabled="false"
      
        />
      
        <TCPPING timeout="3000" initial_hosts="${jgroups.tcpping.initial_hosts:sa3HOFVPL42WNC4[3806],sa3P9K409HXEX0H[3807]}" port_range="30" />
      
        <VERIFY_SUSPECT timeout="1500" />
      
      <pbcast.NAKACK use_mcast_xmit="false" retransmit_timeout="300,600,1200,2400,4800" discard_delivered_msgs="true" />
      
        <pbcast.STABLE stability_delay="1000" desired_avg_gossip="50000" max_bytes="400000" />
      
        <pbcast.GMS print_local_addr="true" join_timeout="3000" view_bundling="true" />
      
      
      <FRAG2 />
      
      </config>