dmitri.shuralyov.com/go/generated/...

update implementation for 2020 spec changes dmitri.shuralyov.com/go/generated#1

Mergeddmitshur opened this change 5 months ago
Commit Message
FileFile
@@ -0,0 +1,33 @@
1
Parent:     b1254a4 (remove comment about API being undecided)
2
Author:     Dmitri Shuralyov <dmitri@shuralyov.com>
3
AuthorDate: Mon Jun 21 00:40:49 2021 -0400
4
Commit:     Dmitri Shuralyov <dmitri@shuralyov.com>
5
CommitDate: Mon Dec 27 18:22:25 2021 -0500
6

7
update implementation for 2020 spec changes
8

9
With the new specification, it's possible to stop reading a file early
10
even when the generated comment is absent, since the generated comment
11
may no longer appear anywhere in the file. Begin parsing /* */ comment
12
blocks, which may span multiple lines, to be able to be able to detect
13
when we run into the first non-comment, non-blank text in the file.
14

15
It was mentioned in https://go.dev/issue/41196#issuecomment-686607452:
16

17
> [...] the primary focus for these comments is Go code
18
> and more generally Go package sources.
19
> Go package sources today can be Go, assembly, C, C++, Objective C,
20
> SWIG, and Fortran.
21
> The comment being discussed here, like the //go:build comment,
22
> handles all of these these except Fortran, which seems fine.
23

24
The initial scope of this package was to support parsing Go source only,
25
but it happens to work well with many other source types that are found
26
in Go packages. Replace mentions of "a Go source file" in documentation
27
and code with "a source file", to make it apply more generally. If this
28
will mean we need to parse comments differently depending on the source
29
type in the future, we can revisit this decision. Let's see how it goes.
30

31
Fixes issue 1.
32

33
Reviewed-On: https://dmitri.shuralyov.com/go/generated/...$changes/1
fuzz_test.go
FileFile
@@ -0,0 +1,42 @@
1
//go:build go1.18
2
// +build go1.18
3

4
package generated_test
5

6
import (
7
	"regexp"
8
	"strings"
9
	"testing"
10

11
	"dmitri.shuralyov.com/go/generated"
12
)
13

14
func FuzzParse(f *testing.F) {
15
	f.Add(`// stuff
16

17
// Code generated by tool; DO NOT EDIT.
18
// yes really
19
/*
20
still so
21
even
22
after
23
this
24
*/
25
// stuff
26

27
// +build !dev
28

29
// Package comment.
30
package p
31
`)
32
	r := regexp.MustCompile(`(^|\n)// Code generated .* DO NOT EDIT\.(\n|$)`)
33
	f.Fuzz(func(t *testing.T, src string) {
34
		has, err := generated.Parse(strings.NewReader(src))
35
		if err != nil {
36
			t.Fatalf("Parse failed to parse the source file %q: %v", src, err)
37
		}
38
		if has && !r.MatchString(src) {
39
			t.Errorf("Parse reported positively yet can't find match in %q", src)
40
		}
41
	})
42
}
generated.go
FileFile
@@ -1,65 +1,119 @@
1
// Package generated provides a function that parses a Go file and reports
1
// Package generated provides a function that parses a source file and reports
22
// whether it contains a "// Code generated … DO NOT EDIT." line comment.
33
//
4
// It implements the specification at https://golang.org/s/generatedcode.
4
// It implements the specification at https://go.dev/s/generatedcode.
55
//
66
// The first priority is correctness (no false negatives, no false positives).
7
// It must return accurate results even if the input Go source code is not gofmted.
7
// It must return accurate results even if the input source code is formatted
8
// unconventionally.
89
//
910
// The second priority is performance. The current version uses bufio.Reader and
1011
// ReadBytes. Performance can be optimized further by using lower level I/O
11
// primitives and allocating less. That can be explored later. A lot of the time
12
// is spent on reading the entire file without being able to stop early,
13
// since the specification allows the comment to appear anywhere in the file.
12
// primitives and allocating less. That can be explored later.
1413
package generated
1514

1615
import (
1716
	"bufio"
1817
	"bytes"
1918
	"io"
2019
	"os"
2120
)
2221

23
// Parse parses the source code of a single Go source file
24
// provided via src, and reports whether the file contains
25
// a "// Code generated ... DO NOT EDIT." line comment
26
// matching the specification at https://golang.org/s/generatedcode:
22
// Parse parses a source file provided via src, and reports whether
23
// the file contains a "// Code generated ... DO NOT EDIT." line comment
24
// matching the specification at https://go.dev/s/generatedcode:
2725
//
28
// 	Generated files are marked by a line of text that matches
29
// 	the regular expression, in Go syntax:
26
// 	To convey to humans and machine tools that code is generated,
27
// 	generated source should have a line that matches the following
28
// 	regular expression (in Go syntax):
3029
//
3130
// 		^// Code generated .* DO NOT EDIT\.$
3231
//
33
// 	The .* means the tool can put whatever folderol it wants in there,
34
// 	but the comment must be a single line and must start with Code generated
35
// 	and end with DO NOT EDIT., with a period.
36
//
37
// 	The text may appear anywhere in the file.
32
// 	This line must appear before the first non-comment, non-blank
33
// 	text in the file.
3834
func Parse(src io.Reader) (hasGeneratedComment bool, err error) {
3935
	br := bufio.NewReader(src)
36
	// Use inBlock to track whether we're inside a multi-line
37
	// /* */ comment block across calls to containsNonComment.
38
	var inBlock bool
4039
	for {
4140
		s, err := br.ReadBytes('\n')
4241
		if err == io.EOF {
43
			return containsComment(s), nil
42
			return containsGenComment(s), nil
4443
		} else if err != nil {
4544
			return false, err
4645
		}
4746
		if len(s) >= 2 && s[len(s)-2] == '\r' {
4847
			s = s[:len(s)-2] // Trim "\r\n".
4948
		} else {
5049
			s = s[:len(s)-1] // Trim "\n".
5150
		}
52
		if containsComment(s) {
51
		if containsGenComment(s) {
5352
			return true, nil
53
		} else if containsNonComment(s, &inBlock) {
54
			return false, nil
55
		}
56
	}
57
}
58

59
// containsNonComment reports whether a line of source code s (without newline)
60
// contains something other than a line comment, block comment, or white space.
61
func containsNonComment(s []byte, inBlock *bool) bool {
62
	type state int
63
	const (
64
		normal state = iota
65
		normalSlash
66
		block
67
		blockStar
68
	)
69
	var p state // Parser state.
70
	if *inBlock {
71
		p = block
72
	}
73
	for _, c := range s {
74
		switch p {
75
		case normal:
76
			switch c {
77
			case ' ', '\t': // White space, ignore.
78
			case '/':
79
				p = normalSlash
80
			default: // Non-comment found.
81
				return true // Return early and don't bother updating *inBlock since it won't matter.
82
			}
83
		case normalSlash:
84
			switch c {
85
			case '/': // Start of inline comment, "//". Ignore the rest of the line.
86
				*inBlock = false
87
				return false
88
			case '*': // Start of comment block, "/*".
89
				p = block
90
			default: // Non-comment found.
91
				return true // Return early and don't bother updating *inBlock since it won't matter.
92
			}
93
		case block:
94
			switch c {
95
			case '*':
96
				p = blockStar
97
			}
98
		case blockStar:
99
			switch c {
100
			case '/': // End of comment block, "*/".
101
				p = normal
102
			case '*': // Another '*', stay in blockStar.
103
			default:
104
				p = block
105
			}
54106
		}
55107
	}
108
	*inBlock = p >= block
109
	return p == normalSlash
56110
}
57111

58
// containsComment reports whether a line of Go source code s (without newline character)
112
// containsGenComment reports whether a line of source code s (without newline)
59113
// contains the generated comment.
60
func containsComment(s []byte) bool {
114
func containsGenComment(s []byte) bool {
61115
	return len(s) >= len(prefix)+len(suffix) &&
62116
		bytes.HasPrefix(s, prefix) &&
63117
		bytes.HasSuffix(s, suffix)
64118
}
65119

generated_test.go
FileFile
@@ -24,18 +24,22 @@ func TestParseFile(t *testing.T) {
2424
		{"positive.6.src", true},
2525
		{"positive.7.src", true},
2626
		{"positive.8.src", true},
2727
		{"positive.9.src", true},
2828
		{"positive.10.src", true},
29
		{"positive.11.src", true},
30
		{"positive.12.src", true},
3129

3230
		// Negative matches.
3331
		{"negative.0.src", false},
3432
		{"negative.1.src", false},
3533
		{"negative.2.src", false},
3634
		{"negative.3.src", false},
35
		{"negative.4.src", false},
36
		{"negative.5.src", false},
37
		{"../generated.go", false},
38
		{"../generated_test.go", false},
39
		{"../fuzz_test.go", false},
40
		{"../LICENSE", false},
3741
	}
3842
	for _, tc := range tests {
3943
		tc := tc
4044
		t.Run(tc.name, func(t *testing.T) {
4145
			hasGeneratedComment, err := generated.ParseFile(filepath.Join("testdata", tc.name))
testdata/negative.3.src
FileFile
@@ -57,6 +57,5 @@ func (s service) List(ctx context.Context, repo issues.RepoSpec, opt issues.Issu
5757

5858
	return is, nil
5959
}
6060

6161
// Doesn't match because there's no generated comment.
62
// But we still need to read the entire file to be sure.
testdata/negative.4.src
FileFile
@@ -0,0 +1,8 @@
1
package p
2

3
/*
4
It can no longer be anywhere in the file.
5
Even at the end, without a final newline.
6
*/
7

8
// Code generated by tool; DO NOT EDIT.
testdata/positive.12.src → testdata/negative.5.src
No modification.
testdata/positive.11.src
FileFile
@@ -1,8 +0,0 @@
1
package p
2

3
/*
4
It can be anywhere in the file.
5
Even at the end, without a final newline.
6
*/
7

8
// Code generated by tool; DO NOT EDIT.