Spring Cloud Sleuth服务链路追踪

Zipkin服务追踪原理

创造一些追踪标识符(tracingId,spanId,parentId),最终将一个request的流程树构建出来,各业务系统在彼此调用时,将特定的跟踪消息传递至zipkin,zipkin在收集到跟踪信息后将其聚合处理、存储、展示等,用户可通过web UI方便获得网络延迟、调用链路、系统依赖等等。

https://images2015.cnblogs.com/blog/140563/201706/140563-20170613131120462-1419921670.png

transport作用:收集被trace的services的spans,并将它们转化为zipkin common Span,之后把这些Spans传递的存储层

collector会对一个到来的被trace的数据(span)进行验证、存储并设置索引(Cassandra/ES-search/Memory)

Zipkin基本概念&核心数据结构
  • Annotation(用途:用于定位一个request的开始和结束,cs/sr/ss/cr含有额外的信息,比如说时间点):
    • cs:Client Start,表示客户端发起请求一个span的开始
    • sr:Server Receive,表示服务端收到请求
    • ss:Server Send,表示服务端完成处理,并将结果发送给客户端
    • cr:Client Received,表示客户端获取到服务端返回信息一个span的结束,当这个annotation被记录了,这个RPC也被认为完成。客户端调用时间=cr-cs,服务端处理时间=sr-ss。
  • Span:一个请求(包含一组Annotation和BinaryAnnotation);它是基本工作单元,一次链路调用(可以是RPC,DB等没有特定的限制)创建一个span,通过一个64位ID标识它。
    • span通过还有其他的数据,例如描述信息,时间戳,key-value对的(Annotation)tag信息,parent-id等,其中parent-id可以表示span调用链路来源,通俗的理解span就是一次请求信息
  • Trace:类似于树结构的Span集合,表示一条调用链路,存在唯一标识通过traceId(全局的跟踪ID,是跟踪的入口点,根据需求来决定在哪生成traceId)、spanId(请求跟踪ID,比如一次rpc等)和parentId(上一次 请求跟踪ID,用来将前后的请求串联起来),被收集到的span会汇聚成一个tree,从而提供出一个request的整体流程。

整个描述:在一次Trace中,每个服务的每一次调用,就是一个基本工作单元,就像上图中的每一个树节点,称之为span。每一个span都有一个id作为唯一标识,同样每一次Trace都会生成一个traceId在span中作为追踪标识,另外再通过一个parentId标明本次调用的发起者(就是发起者的span-id)。当span有了上面三个标识后,就可以很清晰的将多个span进行梳理串联,最终归纳出一条完整的跟踪链路。

代码实践操作

无存储方式,只需要控制台

本文的源代码在:git@github.com:chengcheng222e/springcloud-learn.git 欢迎大家去fork。

/Users/chenyuan/Workspaces/Github/springcloud-learn/eureka-server

@EnableEurekaServer
@SpringBootApplication
public class EurekaApplication {  
    public static void main(String[] args) {
        SpringApplication.run(EurekaApplication.class,args);
    }
}

application.properties

spring.application.name=eureka-server  
server.port=8001  
eureka.instance.hostname=localhost  
eureka.client.service-url.defaultZone=http://${eureka.instance.hostname}:${server.port}/eureka  
eureka.client.register-with-eureka=false  
eureka.client.fetch-registry=false  

/Users/chenyuan/Workspaces/Github/springcloud-learn/zipkin-server

package com.allei;

import org.springframework.boot.SpringApplication;  
import org.springframework.boot.autoconfigure.SpringBootApplication;  
import org.springframework.cloud.netflix.eureka.EnableEurekaClient;  
import zipkin.server.internal.EnableZipkinServer;

@EnableZipkinServer
@EnableEurekaClient
@SpringBootApplication
public class ZipkinApplication {  
    public static void main(String[] args) {
        SpringApplication.run(ZipkinApplication.class, args);
    }
}

application.properties

spring.application.name=zipkin-server  
server.port=10000

eureka.client.serviceUrl.defaultZone=http://localhost:8001/eureka/

zipkin.instance.hostname=localhost  
spring.zipkin.base-url=http://${zipkin.instance.hostname}:${server.port}/

spring.sleuth.enabled=false  
spring.sleuth.sampler.probability=1.0  
spring.sleuth.hystrix.strategy.enabled=true

spring.main.allow-bean-definition-overriding=true  

/Users/chenyuan/Workspaces/Github/springcloud-learn/trace-a

package com.allei;

import java.util.concurrent.TimeUnit;

import lombok.extern.slf4j.Slf4j;  
import org.springframework.boot.SpringApplication;  
import org.springframework.boot.autoconfigure.SpringBootApplication;  
import org.springframework.cloud.client.discovery.EnableDiscoveryClient;  
import org.springframework.cloud.client.loadbalancer.LoadBalanced;  
import org.springframework.context.annotation.Bean;  
import org.springframework.web.bind.annotation.RequestMapping;  
import org.springframework.web.bind.annotation.RequestMethod;  
import org.springframework.web.bind.annotation.RestController;  
import org.springframework.web.client.RestTemplate;

@RestController
@EnableDiscoveryClient
@SpringBootApplication
@Slf4j
public class TraceAApplication {

    @Bean
    @LoadBalanced
    RestTemplate restTemplate() {
        return new RestTemplate();
    }

    @RequestMapping(value = "/trace-a", method = RequestMethod.GET)
    public String trace() throws InterruptedException {
        log.info("call trace-a----->");
        TimeUnit.SECONDS.sleep(1l);
        return restTemplate().getForEntity("http://trace-b/trace-b", String.class).getBody();
    }

    public static void main(String[] args) {
        SpringApplication.run(TraceAApplication.class, args);
    }
}

application.properties

spring.application.name=trace-a  
server.port=10001

eureka.client.serviceUrl.defaultZone=http://localhost:8001/eureka/

spring.zipkin.base-url=http://localhost:10000  
spring.sleuth.sampler.percentage=1  
# log trace detail
logging.level.org.springframework.web.servlet.DispatcherServlet=DEBUG  

/Users/chenyuan/Workspaces/Github/springcloud-learn/trace-b

package com.allei;

import java.util.concurrent.TimeUnit;

import lombok.extern.slf4j.Slf4j;  
import org.apache.commons.lang.math.RandomUtils;  
import org.springframework.boot.SpringApplication;  
import org.springframework.boot.autoconfigure.SpringBootApplication;  
import org.springframework.cloud.client.discovery.EnableDiscoveryClient;  
import org.springframework.cloud.client.loadbalancer.LoadBalanced;  
import org.springframework.context.annotation.Bean;  
import org.springframework.web.bind.annotation.RequestMapping;  
import org.springframework.web.bind.annotation.RequestMethod;  
import org.springframework.web.bind.annotation.RestController;  
import org.springframework.web.client.RestTemplate;

@RestController
@EnableDiscoveryClient
@SpringBootApplication
@Slf4j
public class TraceBApplication {

    @Bean
    @LoadBalanced
    RestTemplate restTemplate() {
        return new RestTemplate();
    }

    @RequestMapping(value = "/trace-b", method = RequestMethod.GET)
    public String trace() throws InterruptedException {
        log.info("call trace-b----->");
        TimeUnit.SECONDS.sleep(RandomUtils.nextInt(2));
        return restTemplate().getForEntity("http://trace-c/trace-c", String.class).getBody();
    }

    public static void main(String[] args) {
        SpringApplication.run(TraceBApplication.class, args);
    }
}

application.properties

spring.application.name=trace-b  
server.port=10002

eureka.client.serviceUrl.defaultZone=http://localhost:8001/eureka/

spring.zipkin.base-url=http://localhost:10000

spring.sleuth.sampler.percentage=1

# log trace detail
logging.level.org.springframework.web.servlet.DispatcherServlet=DEBUG  

/Users/chenyuan/Workspaces/Github/springcloud-learn/trace-c

package com.allei;

import java.util.concurrent.TimeUnit;

import lombok.extern.slf4j.Slf4j;  
import org.apache.commons.lang.math.RandomUtils;  
import org.springframework.boot.SpringApplication;  
import org.springframework.boot.autoconfigure.SpringBootApplication;  
import org.springframework.cloud.client.discovery.EnableDiscoveryClient;  
import org.springframework.cloud.client.loadbalancer.LoadBalanced;  
import org.springframework.context.annotation.Bean;  
import org.springframework.web.bind.annotation.RequestMapping;  
import org.springframework.web.bind.annotation.RequestMethod;  
import org.springframework.web.bind.annotation.RestController;  
import org.springframework.web.client.RestTemplate;

@RestController
@EnableDiscoveryClient
@SpringBootApplication
@Slf4j
public class TraceCApplication {  
    @Bean
    @LoadBalanced
    RestTemplate restTemplate() {
        return new RestTemplate();
    }

    @RequestMapping(value = "/trace-c", method = RequestMethod.GET)
    public String trace() throws InterruptedException {
        log.info("call trace-c----->");
        TimeUnit.SECONDS.sleep(RandomUtils.nextInt(2));
        return "hello trace";
    }

    public static void main(String[] args) {
        SpringApplication.run(TraceCApplication.class, args);
    }
}

application.properties

spring.application.name=trace-c  
server.port=10003  
eureka.client.serviceUrl.defaultZone=http://localhost:8001/eureka/  
spring.zipkin.base-url=http://localhost:10000  
spring.sleuth.sampler.percentage=1  
# log trace detail
logging.level.org.springframework.web.servlet.DispatcherServlet=DEBUG  

全部把服务启动起来,然后我们去注册中心看一下启动的服务情况。

http://localhost:8001/eureka/

http://static.cyblogs.com/WX20200110-120329@2x.png

我们从trace-a服务触发一个服务:

http://static.cyblogs.com/WX20200110-120512@2x.png

查阅一下日志

/Users/chenyuan/Workspaces/Github/springcloud-learn/trace-a 日志

http://static.cyblogs.com/WX20200110-120630.png

2020-01-10 12:04:51.226 DEBUG [trace-a,c8df468b969a0917,c8df468b969a0917,false] 2626 --- [io-10001-exec-1] o.s.web.servlet.DispatcherServlet        : GET "/trace-a", parameters={}  
2020-01-10 12:04:51.240  INFO [trace-a,c8df468b969a0917,c8df468b969a0917,false] 2626 --- [io-10001-exec-1] com.allei.TraceAApplication              : call trace-a----->  
2020-01-10 12:04:52.528  INFO [trace-a,c8df468b969a0917,a4c77f10471bb209,false] 2626 --- [io-10001-exec-1] c.netflix.config.ChainedDynamicProperty  : Flipping property: trace-b.ribbon.ActiveConnectionsLimit to use NEXT property: niws.loadbalancer.availabilityFilteringRule.activeConnectionsLimit = 2147483647  
2020-01-10 12:04:52.558  INFO [trace-a,c8df468b969a0917,a4c77f10471bb209,false] 2626 --- [io-10001-exec-1] c.n.u.concurrent.ShutdownEnabledTimer    : Shutdown hook installed for: NFLoadBalancer-PingTimer-trace-b  
2020-01-10 12:04:52.558  INFO [trace-a,c8df468b969a0917,a4c77f10471bb209,false] 2626 --- [io-10001-exec-1] c.netflix.loadbalancer.BaseLoadBalancer  : Client: trace-b instantiated a LoadBalancer: DynamicServerListLoadBalancer:{NFLoadBalancer:name=trace-b,current list of Servers=[],Load balancer stats=Zone stats: {},Server stats: []}ServerList:null  
2020-01-10 12:04:52.564  INFO [trace-a,c8df468b969a0917,a4c77f10471bb209,false] 2626 --- [io-10001-exec-1] c.n.l.DynamicServerListLoadBalancer      : Using serverListUpdater PollingServerListUpdater  
2020-01-10 12:04:52.588  INFO [trace-a,c8df468b969a0917,a4c77f10471bb209,false] 2626 --- [io-10001-exec-1] c.netflix.config.ChainedDynamicProperty  : Flipping property: trace-b.ribbon.ActiveConnectionsLimit to use NEXT property: niws.loadbalancer.availabilityFilteringRule.activeConnectionsLimit = 2147483647  
2020-01-10 12:04:52.590  INFO [trace-a,c8df468b969a0917,a4c77f10471bb209,false] 2626 --- [io-10001-exec-1] c.n.l.DynamicServerListLoadBalancer      : DynamicServerListLoadBalancer for client trace-b initialized: DynamicServerListLoadBalancer:{NFLoadBalancer:name=trace-b,current list of Servers=[172.18.232.69:10002],Load balancer stats=Zone stats: {defaultzone=[Zone:defaultzone;    Instance count:1;   Active connections count: 0;    Circuit breaker tripped count: 0;   Active connections per server: 0.0;]  
},Server stats: [[Server:172.18.232.69:10002;    Zone:defaultZone;   Total Requests:0;   Successive connection failure:0;    Total blackout seconds:0;   Last connection made:Thu Jan 01 08:00:00 CST 1970;  First connection made: Thu Jan 01 08:00:00 CST 1970;    Active Connections:0;   total failure count in last (1000) msecs:0; average resp time:0.0;  90 percentile resp time:0.0;    95 percentile resp time:0.0;    min resp time:0.0;  max resp time:0.0;  stddev resp time:0.0]
]}ServerList:org.springframework.cloud.netflix.ribbon.eureka.DomainExtractingServerList@128e37fa
2020-01-10 12:04:53.573  INFO [trace-a,,,] 2626 --- [erListUpdater-0] c.netflix.config.ChainedDynamicProperty  : Flipping property: trace-b.ribbon.ActiveConnectionsLimit to use NEXT property: niws.loadbalancer.availabilityFilteringRule.activeConnectionsLimit = 2147483647  
2020-01-10 12:04:54.397 DEBUG [trace-a,c8df468b969a0917,c8df468b969a0917,false] 2626 --- [io-10001-exec-1] o.s.web.servlet.DispatcherServlet        : Completed 200 OK  
2020-01-10 12:04:54.731 DEBUG [trace-a,4eb7430157ffd40f,4eb7430157ffd40f,false] 2626 --- [io-10001-exec-2] o.s.web.servlet.DispatcherServlet        : GET "/favicon.ico", parameters={}  
2020-01-10 12:04:54.749 DEBUG [trace-a,4eb7430157ffd40f,4eb7430157ffd40f,false] 2626 --- [io-10001-exec-2] o.s.web.servlet.DispatcherServlet        : Completed 200 OK  
2020-01-10 12:06:16.665  INFO [trace-a,,,] 2626 --- [trap-executor-0] c.n.d.s.r.aws.ConfigClusterResolver      : Resolving eureka endpoints via configuration  
2020-01-10 12:11:16.661  INFO [trace-a,,,] 2626 --- [trap-executor-0] c.n.d.s.r.aws.ConfigClusterResolver      : Resolving eureka endpoints via configuration  

/Users/chenyuan/Workspaces/Github/springcloud-learn/trace-b 日志

http://static.cyblogs.com/WX20200110-120711.png

2020-01-10 12:04:52.733 DEBUG [trace-b,c8df468b969a0917,a4c77f10471bb209,false] 2862 --- [io-10002-exec-1] o.s.web.servlet.DispatcherServlet        : GET "/trace-b", parameters={}  
2020-01-10 12:04:52.752  INFO [trace-b,c8df468b969a0917,a4c77f10471bb209,false] 2862 --- [io-10002-exec-1] com.allei.TraceBApplication              : call trace-b----->  
2020-01-10 12:04:53.062  INFO [trace-b,c8df468b969a0917,e9651abffa9bf1d2,false] 2862 --- [io-10002-exec-1] c.netflix.config.ChainedDynamicProperty  : Flipping property: trace-c.ribbon.ActiveConnectionsLimit to use NEXT property: niws.loadbalancer.availabilityFilteringRule.activeConnectionsLimit = 2147483647  
2020-01-10 12:04:53.108  INFO [trace-b,c8df468b969a0917,e9651abffa9bf1d2,false] 2862 --- [io-10002-exec-1] c.n.u.concurrent.ShutdownEnabledTimer    : Shutdown hook installed for: NFLoadBalancer-PingTimer-trace-c  
2020-01-10 12:04:53.108  INFO [trace-b,c8df468b969a0917,e9651abffa9bf1d2,false] 2862 --- [io-10002-exec-1] c.netflix.loadbalancer.BaseLoadBalancer  : Client: trace-c instantiated a LoadBalancer: DynamicServerListLoadBalancer:{NFLoadBalancer:name=trace-c,current list of Servers=[],Load balancer stats=Zone stats: {},Server stats: []}ServerList:null  
2020-01-10 12:04:53.116  INFO [trace-b,c8df468b969a0917,e9651abffa9bf1d2,false] 2862 --- [io-10002-exec-1] c.n.l.DynamicServerListLoadBalancer      : Using serverListUpdater PollingServerListUpdater  
2020-01-10 12:04:53.146  INFO [trace-b,c8df468b969a0917,e9651abffa9bf1d2,false] 2862 --- [io-10002-exec-1] c.netflix.config.ChainedDynamicProperty  : Flipping property: trace-c.ribbon.ActiveConnectionsLimit to use NEXT property: niws.loadbalancer.availabilityFilteringRule.activeConnectionsLimit = 2147483647  
2020-01-10 12:04:53.148  INFO [trace-b,c8df468b969a0917,e9651abffa9bf1d2,false] 2862 --- [io-10002-exec-1] c.n.l.DynamicServerListLoadBalancer      : DynamicServerListLoadBalancer for client trace-c initialized: DynamicServerListLoadBalancer:{NFLoadBalancer:name=trace-c,current list of Servers=[172.18.232.69:10003],Load balancer stats=Zone stats: {defaultzone=[Zone:defaultzone;    Instance count:1;   Active connections count: 0;    Circuit breaker tripped count: 0;   Active connections per server: 0.0;]  
},Server stats: [[Server:172.18.232.69:10003;    Zone:defaultZone;   Total Requests:0;   Successive connection failure:0;    Total blackout seconds:0;   Last connection made:Thu Jan 01 08:00:00 CST 1970;  First connection made: Thu Jan 01 08:00:00 CST 1970;    Active Connections:0;   total failure count in last (1000) msecs:0; average resp time:0.0;  90 percentile resp time:0.0;    95 percentile resp time:0.0;    min resp time:0.0;  max resp time:0.0;  stddev resp time:0.0]
]}ServerList:org.springframework.cloud.netflix.ribbon.eureka.DomainExtractingServerList@4e144ec0
2020-01-10 12:04:54.124  INFO [trace-b,,,] 2862 --- [erListUpdater-0] c.netflix.config.ChainedDynamicProperty  : Flipping property: trace-c.ribbon.ActiveConnectionsLimit to use NEXT property: niws.loadbalancer.availabilityFilteringRule.activeConnectionsLimit = 2147483647  
2020-01-10 12:04:54.374 DEBUG [trace-b,c8df468b969a0917,a4c77f10471bb209,false] 2862 --- [io-10002-exec-1] o.s.web.servlet.DispatcherServlet        : Completed 200 OK  
2020-01-10 12:06:57.545  INFO [trace-b,,,] 2862 --- [trap-executor-0] c.n.d.s.r.aws.ConfigClusterResolver      : Resolving eureka endpoints via configuration  
2020-01-10 12:11:57.541  INFO [trace-b,,,] 2862 --- [trap-executor-0] c.n.d.s.r.aws.ConfigClusterResolver      : Resolving eureka endpoints via configuration  

/Users/chenyuan/Workspaces/Github/springcloud-learn/trace-c 日志

http://static.cyblogs.com/WX20200110-120747.png

2020-01-10 12:04:53.306 DEBUG [trace-c,c8df468b969a0917,e9651abffa9bf1d2,false] 3211 --- [io-10003-exec-1] o.s.web.servlet.DispatcherServlet        : GET "/trace-c", parameters={}  
2020-01-10 12:04:53.320  INFO [trace-c,c8df468b969a0917,e9651abffa9bf1d2,false] 3211 --- [io-10003-exec-1] com.allei.TraceCApplication              : call trace-c----->  
2020-01-10 12:04:54.346 DEBUG [trace-c,c8df468b969a0917,e9651abffa9bf1d2,false] 3211 --- [io-10003-exec-1] o.s.web.servlet.DispatcherServlet        : Completed 200 OK  
2020-01-10 12:08:09.576  INFO [trace-c,,,] 3211 --- [trap-executor-0] c.n.d.s.r.aws.ConfigClusterResolver      : Resolving eureka endpoints via configuration  
2020-01-10 12:13:09.572  INFO [trace-c,,,] 3211 --- [trap-executor-0] c.n.d.s.r.aws.ConfigClusterResolver      : Resolving eureka endpoints via configuration  

参考地址

简栈文化服务订阅号