【原创】HPB数据同步之header同步源码解析(一)


#1

根据前文对快速同步基本流程的解析。这里以新加入的节点为例,假设现在有1000个块需要同步,origin=0,pivot=900,from=origin+1,height=1000 ,通过代码分析下那快速同步是什么样子的。

go-hpb/synctrl/synfast.go fetchHeaders函数是同步header的入口处,代码如下

func (this *fastSync) fetchHeaders(p *peerConnection, from uint64) error {

​    p.log.Debug("Directing header fast syncs", "origin", from)

​    defer p.log.Debug("Header fast sync terminated")

​    // Create a timeout timer, and the associated header fetcher

​    skeleton := true            // Skeleton assembly phase or finishing up

​    request := time.Now()       // time of the last skeleton fetch request

​    timeout := time.NewTimer(0) // timer to dump a non-responsive active peer

​    <-timeout.C                 // timeout channel should be initially empty

​    defer timeout.Stop()

​    var ttl time.Duration

​    getHeaders := func(from uint64) {

​        request = time.Now()

​        ttl = this.syncer.requestTTL()

​        timeout.Reset(ttl)

​        if skeleton {

​            p.log.Trace("Fetching skeleton headers", "count", MaxHeaderFetch, "from", from)

​            go p.peer.RequestHeadersByNumber(from+uint64(MaxHeaderFetch)-1, MaxSkeletonSize, MaxHeaderFetch-1, false)

​        } else {

​            p.log.Trace("Fetching full headers", "count", MaxHeaderFetch, "from", from)

​            go p.peer.RequestHeadersByNumber(from, MaxHeaderFetch, 0, false)

​        }

​    }

​    // Start pulling the header chain skeleton until all is done

​    getHeaders(from)

​    for {

​        select {

​        case packet := <-this.headerCh:

​            // Make sure the active peer is giving us the skeleton headers

​            if packet.PeerId() != p.id {

​                log.Debug("Received skeleton from incorrect peer", "peer", packet.PeerId())

​                break

​            }

​            headerReqTimer.UpdateSince(request)

​            timeout.Stop()

​            if packet.Items() == 0 && skeleton {

​                skeleton = false

​                getHeaders(from)

​                continue

​            }

​            // If no more headers are inbound, notify the content fetchers and return

​            if packet.Items() == 0 {

​                p.log.Debug("No more headers available")

​                select {

​                case this.headerProcCh <- nil:

​                    return nil

​                case <-this.cancelCh:

​                    return errCancelHeaderFetch

​                }

​            }

​            headers := packet.(*headerPack).headers

​            // If we received a skeleton batch, resolve internals concurrently

​            if skeleton {

​                filled, proced, err := this.fillHeaderSkeleton(from, headers)

​                if err != nil {

​                    p.log.Debug("Skeleton chain invalid", "err", err)

​                    return errInvalidChain

​                }

​                headers = filled[proced:]

​                from += uint64(proced)

​            }

​            // Insert all the new headers and fetch the next batch

​            if len(headers) > 0 {

​                p.log.Trace("Scheduling new headers", "count", len(headers), "from", from)

​                select {

​                case this.headerProcCh <- headers:

​                case <-this.cancelCh:

​                    return errCancelHeaderFetch

​                }

​                from += uint64(len(headers))

​            }

​            getHeaders(from)

​        }

​    }

}

从上面代码中可以看出,进入该函数后,直接调用getHeaders(from),由于skeleton := true,所以执行go p.peer.RequestHeadersByNumber(from+uint64(MaxHeaderFetch)-1, MaxSkeletonSize, MaxHeaderFetch-1, false) 这里 MaxHeaderFetch,MaxSkeletonSize分别是192,128。RequestHeadersByNumber其实是SendData函数,向节点发送GetBlockHeadersMsg请求。

对方节点收到GetBlockHeadersMsg请求后,处理函数如下:

func HandleGetBlockHeadersMsg(p *p2p.Peer, msg p2p.Msg) error {

​    // Decode the complex header query

​    var query getBlockHeadersData

​    if err := msg.Decode(&query); err != nil {

​        return p2p.ErrResp(p2p.ErrDecode, "%v: %v", msg, err)

​    }

​    hashMode := query.Origin.Hash != (common.Hash{})

​    // Gather headers until the fetch or network limits is reached

​    var (

​        bytes   common.StorageSize

​        headers []*types.Header

​        unknown bool

​    )

​    for !unknown && len(headers) < int(query.Amount) && bytes < softResponseLimit && len(headers) < MaxHeaderFetch {

​        // Retrieve the next header satisfying the query

​        var origin *types.Header

​        if hashMode {

​            origin = bc.InstanceBlockChain().GetHeaderByHash(query.Origin.Hash)

​        } else {

​            origin = bc.InstanceBlockChain().GetHeaderByNumber(query.Origin.Number)

​        }

​        if origin == nil {

​            break

​        }

​        number := origin.Number.Uint64()

​        headers = append(headers, origin)

​        bytes += estHeaderRlpSize

​        // Advance to the next header of the query

​        switch {

​        case query.Origin.Hash != (common.Hash{}) && query.Reverse:

​            // Hash based traversal towards the genesis block

​            log.Error("HandleGetBlockHeadersMsg 1111")

​            for i := 0; i < int(query.Skip)+1; i++ {

​                if header := bc.InstanceBlockChain().GetHeader(query.Origin.Hash, number); header != nil {

​                    query.Origin.Hash = header.ParentHash

​                    number--

​                } else {

​                    unknown = true

​                    break

​                }

​            }

​        case query.Origin.Hash != (common.Hash{}) && !query.Reverse:

​            log.Error("HandleGetBlockHeadersMsg 2222")

​            // Hash based traversal towards the leaf block

​            var (

​                current = origin.Number.Uint64()

​                next    = current + query.Skip + 1

​            )

​            if next <= current {

​                log.Warn("GetBlockHeaders skip overflow attack", "current", current, "skip", query.Skip, "next", next, "attacker", p.ID())

​                unknown = true

​            } else {

​                if header := bc.InstanceBlockChain().GetHeaderByNumber(next); header != nil {

​                    if bc.InstanceBlockChain().GetBlockHashesFromHash(header.Hash(), query.Skip+1)[query.Skip] == query.Origin.Hash {

​                        query.Origin.Hash = header.Hash()

​                    } else {

​                        unknown = true

​                    }

​                } else {

​                    unknown = true

​                }

​            }

​        case query.Reverse:

​            log.Error("HandleGetBlockHeadersMsg 3333")

​            // Number based traversal towards the genesis block

​            if query.Origin.Number >= query.Skip+1 {

​                query.Origin.Number -= (query.Skip + 1)

​            } else {

​                unknown = true

​            }

​        case !query.Reverse:

​            log.Error("HandleGetBlockHeadersMsg 4444")

​            // Number based traversal towards the leaf block

​            query.Origin.Number += (query.Skip + 1)

​        }

​    }

​    return sendBlockHeaders(p, headers)

}

注意,根据请求的参数,对方节点的处理应该是走第4个case,而且skip=191,所以本次请求应该返回的headernum分别是192,384,576, 768,960 。

看看self节点收到以上header后是如何处理的。

func HandleBlockHeadersMsg(p *p2p.Peer, msg p2p.Msg) error {
	// A batch of headers arrived to one of our previous requests
	var headers []*types.Header
	if err := msg.Decode(&headers); err != nil {
		return p2p.ErrResp(p2p.ErrDecode, "msg %v: %v", msg, err)
	}

	// Filter out any explicitly requested headers, deliver the rest to the downloader
	filter := len(headers) == 1
	if filter {
		// Irrelevant of the fork checks, send the header to the fetcher just in case
		headers = InstanceSynCtrl().puller.FilterHeaders(p.GetID(), headers, time.Now())
	}
	if len(headers) > 0 || !filter {
		err := InstanceSynCtrl().syner.DeliverHeaders(p.GetID(), headers)
		if err != nil {
			log.Debug("Failed to deliver headers", "err", err)
		}
	}
	return nil
}

filter为false,所以走deliverheaders()函数,而deliver函数的作用是为了通知有headerCh到了。

func (this *fastSync) deliver(id string, destCh chan dataPack, packet dataPack, inMeter, dropMeter metrics.Meter) (err error) {
	select {
	case destCh <- packet:
		return nil
	case <-cancel:
		return errNoSyncActive
	}
}

此时,回到fetchHeaders函数中,看看headerCh事件触发了什么操作。由于packet.Items()为5 ,skeleton为true,所以 会走到fillHeaderSkeleton函数中。

func (this *fastSync) fillHeaderSkeleton(from uint64, skeleton []*types.Header) ([]*types.Header, int, error) {
	log.Debug("Filling up skeleton", "from", from)
	this.syncer.sch.ScheduleSkeleton(from, skeleton)

	var (
		deliver = func(packet dataPack) (int, error) {
			pack := packet.(*headerPack)
			return this.syncer.sch.DeliverHeaders(pack.peerId, pack.headers, this.headerProcCh)
		}
		expire   = func() map[string]int { return this.syncer.sch.ExpireHeaders(this.syncer.requestTTL()) }
		throttle = func() bool { return false }
		reserve  = func(p *peerConnection, count int) (*fetchRequest, bool, error) {
			return this.syncer.sch.ReserveHeaders(p, count), false, nil
		}
		fetch = func(p *peerConnection, req *fetchRequest) error {return p.FetchHeaders(req.From, MaxHeaderFetch)}
		capacity = func(p *peerConnection) int { return p.HeaderCapacity(this.requestRTT()) }
		setIdle  = func(p *peerConnection, accepted int) { p.SetHeadersIdle(accepted) }
	)
	err := this.fetchParts(errCancelHeaderFetch, this.headerCh, deliver, this.syncer.sch.headerContCh, expire,
		this.syncer.sch.PendingHeaders, this.syncer.sch.InFlightHeaders, throttle, reserve,
		nil, fetch, this.syncer.sch.CancelHeaders, capacity, this.syncer.peers.HeaderIdlePeers, setIdle, "headers")

	log.Debug("Skeleton fill terminated", "err", err)

	filled, proced := this.syncer.sch.RetrieveHeaders()
	return filled, proced, err
}

先是执行了ScheduleSkeleton,然后定义了几个函数,然后传入到fetchParts中。注意,ScheduleSkeleton很关键,看代码。这里,将headerTaskQueue中放入了从1开始,以192递增的块号。

func (this *scheduler) ScheduleSkeleton(from uint64, skeleton []*types.Header) {
	this.lock.Lock()
	defer this.lock.Unlock()

	// No skeleton retrieval can be in progress, fail hard if so (huge implementation bug)
	if this.headerResults != nil {
		panic("skeleton assembly already in progress")
	}
	// Shedule all the header retrieval tasks for the skeleton assembly
	this.headerTaskPool = make(map[uint64]*types.Header)
	this.headerTaskQueue = prque.New()
	this.headerPeerMiss = make(map[string]map[uint64]struct{}) // Reset availability to correct invalid chains
	this.headerResults = make([]*types.Header, len(skeleton)*MaxHeaderFetch)
	this.headerProced = 0
	this.headerOffset = from
	this.headerContCh = make(chan bool, 1)

	for i, header := range skeleton {
		index := from + uint64(i*MaxHeaderFetch)

		this.headerTaskPool[index] = header
		this.headerTaskQueue.Push(index, -float32(index))
	}
}

下面看看fetchParts函数

func (this *fastSync) fetchParts(errCancel error, deliveryCh chan dataPack, deliver func(dataPack) (int, error), wakeCh chan bool,
	expire func() map[string]int, pending func() int, inFlight func() bool, throttle func() bool, reserve func(*peerConnection, int) (*fetchRequest, bool, error),
	fetchHook func([]*types.Header), fetch func(*peerConnection, *fetchRequest) error, cancel func(*fetchRequest), capacity func(*peerConnection) int,
	idle func() ([]*peerConnection, int), setIdle func(*peerConnection, int), kind string) error {
	ticker := time.NewTicker(100 * time.Millisecond)
	defer ticker.Stop()

	update := make(chan struct{}, 1)

	finished := false
	for {
		select {
		case <-this.cancelCh:
			return errCancel

		case packet := <-deliveryCh:
			if peer := this.syncer.peers.Peer(packet.PeerId()); peer != nil {
				// Deliver the received chunk of data and check chain validity
				accepted, err := deliver(packet)
				if err == errInvalidChain {
					return err
				}
				if err != errStaleDelivery {
					setIdle(peer, accepted)
				}
				// Issue a log to the user to see what's going on
				switch {
				case err == nil && packet.Items() == 0:
					peer.log.Trace("Requested data not delivered", "type", kind)
				case err == nil:
					peer.log.Trace("Delivered new batch of data", "type", kind, "count", packet.Stats())
				default:
					peer.log.Trace("Failed to deliver retrieved data", "type", kind, "err", err)
				}
			}
			// Blocks assembled, try to update the progress
			select {
			case update <- struct{}{}:
			default:
			}

		case cont := <-wakeCh:
			// The header fetcher sent a continuation flag, check if it's done
			if !cont {
				finished = true
			}
			// Headers arrive, try to update the progress
			select {
			case update <- struct{}{}:
			default:
			}

		case <-ticker.C:
			// Sanity check update the progress
			select {
			case update <- struct{}{}:
			default:
			}

		case <-update:
	
			progressed, throttled, running := false, false, inFlight()
			idles, total := idle()

			for _, peer := range idles {
				request, progress, err := reserve(peer, capacity(peer))
				if err != nil {
					return err
				}
				if progress {
					progressed = true
				}
				if request == nil {
					continue
				}
	
				if fetchHook != nil {
					fetchHook(request.Headers)
				}
				if err := fetch(peer, request); err != nil {
				}
				running = true
			}
		}
	}
}

该函数的重点在于对update的处理。idle()函数获取到连接的节点,并遍历。来看看从reserve中恢复出来的request(ScheduleSkeleton中定义的函数)是什么。

func (this *scheduler) ReserveHeaders(p *peerConnection, count int) *fetchRequest {
	this.lock.Lock()
	defer this.lock.Unlock()
	if _, ok := this.headerPendPool[p.id]; ok {
		return nil
	}
	// Retrieve a batch of hashes, skipping previously failed ones
	send, skip := uint64(0), []uint64{}
	for send == 0 && !this.headerTaskQueue.Empty() {
		from, _ := this.headerTaskQueue.Pop()
		if this.headerPeerMiss[p.id] != nil {
			if _, ok := this.headerPeerMiss[p.id][from.(uint64)]; ok {
				skip = append(skip, from.(uint64))
				continue
			}
		}
		send = from.(uint64)
	}
	// Merge all the skipped batches back
	for _, from := range skip {
		this.headerTaskQueue.Push(from, -float32(from))
	}
	// Assemble and return the block download request
	if send == 0 {
		return nil
	}
	request := &fetchRequest{
		Peer: p,
		From: send,
		Time: time.Now(),
	}
	this.headerPendPool[p.id] = request
	return request
}

headerTaskQueue在这里起作用了,因此这里取出来的是1。所以request.from为1。

继续看,fetchHook传入的为nil,不用管,接下来是fetch(peer, request),fetch调用的是p.FetchHeaders(req.From, MaxHeaderFetch)。然而,FetchHeaders调用的是go p.peer.RequestHeadersByNumber(from, count, 0, false),代码如下

func (p *peerConnection) FetchHeaders(from uint64, count int) error {
	p.headerStarted = time.Now()
	// Issue the header retrieval request (absolut upwards without gaps)
	go p.peer.RequestHeadersByNumber(from, count, 0, false)

	return nil
}

至此,from=1,count=192,skip=0,所以此时的请求将返回从1到192号的块。

下次请求则from=193,count=192,skip=0,所以返回从193到384号的块,多次请求,即可将1000个块全部同步下来。

至此,header同步完成。header的处理下文介绍。

本文结束。