自定义Colly的LogDebugger设置

  1. 默认输出到项目的colly_log.csv文件
  2. 增加标题
  3. 可设置要输出的事件类型
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
func main() {
    debugger := &debug.LogDebugger{
            // Output: os.Stdout,
            EventFilter: debug.EventFilter{
                Request:  true,
                Response: true,
            }}
    c := colly.NewCollector(
            colly.Debugger(debugger),
        )
    for i := 1; i < 10; i++ {
        c.Visit(fmt.Sprintf("https://httpbin.org/get?page=%d", i))
	}
	c.Wait()
}
1
2
3
LogID,CollectorID,RequestID,EventType,EventValues,CostTime
logid_000001,collectorid_1,requestid_1,request,map["url":"https://httpbin.org/get?page=9"],520.6µs
logid_000017,collectorid_1,requestid_1,response,map["status":"OK" "url":"https://httpbin.org/get?page=9"],819.0048ms
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
package debug

import (
	"io"
	"log"
	"os"
	"strconv"
	"sync"
	"sync/atomic"
	"time"
)

type EventFilter struct {
	Request         bool
	Response        bool
	ResponseHeaders bool
	Scraped         bool
}

// LogDebugger is the simplest debugger which prints log messages to the STDERR
type LogDebugger struct {
	// Output is the log destination, anything can be used which implements them
	// io.Writer interface. Leave it blank to use STDERR
	Output io.Writer
	// Prefix appears at the beginning of each generated log line
	Prefix string
	// Flag defines the logging properties.
	Flag          int
	logger        *log.Logger
	counter       int32
	start         time.Time
	EventFilter   EventFilter
	headerPrinted bool       // 标记是否已输出标题行
	mu            sync.Mutex // 用于保护 headerPrinted 字段
}

// Init initializes the LogDebugger
func (l *LogDebugger) Init() error {
	l.counter = 0
	l.start = time.Now()

	// 默认情况下,将日志输出到当前项目目录下的文件 colly.log
	if l.Output == nil {
		file, err := os.OpenFile("colly_log.csv", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
		if err != nil {
			// 如果文件打开失败,使用标准错误输出
			l.Output = os.Stderr
		} else {
			l.Output = file
			// 考虑在 LogDebugger 结构体中添加一个文件关闭的方法
			// defer file.Close()
		}
	}

	l.logger = log.New(l.Output, l.Prefix, l.Flag)
	return nil
}

func (l *LogDebugger) Event(e *Event) {

	l.mu.Lock()
	if !l.headerPrinted {
		l.logger.Println("LogID,CollectorID,RequestID,EventType,EventValues,CostTime")
		l.headerPrinted = true
	}
	l.mu.Unlock()
	shouldLog := false

	// 检查 Filter 是否被设置,如果没有设置则记录所有事件
	if (l.EventFilter == EventFilter{}) {
		shouldLog = true
	} else {
		switch e.Type {
		case "request":
			shouldLog = l.EventFilter.Request
		case "response":
			shouldLog = l.EventFilter.Response
		case "responseHeaders":
			shouldLog = l.EventFilter.ResponseHeaders
		case "scraped":
			shouldLog = l.EventFilter.Scraped
		}
	}

	if shouldLog {
		i := atomic.AddInt32(&l.counter, 1)
		l.logger.Printf("logid_%06d,collectorid_%d,requestid_%s,%s,%q,%s\n", i, e.CollectorID, strconv.FormatUint(uint64(e.RequestID), 10), e.Type, e.Values, time.Since(l.start))
	}
}