dmitri.shuralyov.com/go/generated/...

update implementation for 2020 spec changes dmitri.shuralyov.com/go/generated#1

Mergeddmitshur opened this change 5 months ago
Patch Set 1
dmitshur committed 7 months ago commit 3cfc9a16f9e21bac6a05cc430757ddcd65448d8f
Commit Message
FileFile
@@ -0,0 +1,9 @@
1
Parent:     b1254a4 (remove comment about API being undecided)
2
Author:     Dmitri Shuralyov <dmitri@shuralyov.com>
3
AuthorDate: Mon Jun 21 00:40:49 2021 -0400
4
Commit:     Dmitri Shuralyov <dmitri@shuralyov.com>
5
CommitDate: Sun Aug 1 13:54:44 2021 -0400
6

7
update implementation for 2020 spec changes
dominikh commented 1 month ago (Patch Set 1)

Should capitalize sentence :P

Write Preview Markdown
dmitshur commented 1 month ago (Patch Set 1)

I'm following the Go style for commit messages, which chooses to do:

A rule of thumb is that it should be written so to complete the sentence "This change modifies Go to _____." That means it does not start with a capital letter, is not a complete sentence, and actually summarizes the result of the change.

(From https://go.dev/doc/contribute#first_line and emphasis mine.)

Write Preview Markdown
8

9
Fixes issue 1.
fuzz_test.go
FileFile
@@ -0,0 +1,42 @@
1
//go:build gofuzzbeta
2
// +build gofuzzbeta
dmitshur commented 1 month ago (Patch Set 1)

Go fuzzing support is out of beta and no longer uses this build constraint. It should be replaced with go1.18 by now.

Done in PS 3.

Write Preview Markdown
3

4
package generated_test
5

6
import (
7
	"regexp"
8
	"strings"
9
	"testing"
10

11
	"dmitri.shuralyov.com/go/generated"
12
)
13

14
func FuzzParse(f *testing.F) {
15
	f.Add(`// stuff
16

17
// Code generated by tool; DO NOT EDIT.
18
// yes really
19
/*
20
still so
21
even
22
after
23
this
24
*/
25
// stuff
26

27
// +build !dev
28

29
// Package comment.
30
package p
31
`)
32
	r := regexp.MustCompile(`(^|\n)// Code generated .* DO NOT EDIT\.(\n|$)`)
33
	f.Fuzz(func(t *testing.T, src string) {
34
		has, err := generated.Parse(strings.NewReader(src))
35
		if err != nil {
36
			t.Fatalf("Parse failed to parse the source file %q: %v", src, err)
37
		}
38
		if has && !r.MatchString(src) {
39
			t.Errorf("Parse reported positively yet can't find match in %q", src)
40
		}
41
	})
42
}
generated.go
FileFile
@@ -1,65 +1,119 @@
1
// Package generated provides a function that parses a Go file and reports
1
// Package generated provides a function that parses a source file and reports
22
// whether it contains a "// Code generated … DO NOT EDIT." line comment.
33
//
44
// It implements the specification at https://golang.org/s/generatedcode.
dmitshur commented 1 month ago (Patch Set 1)

Shortened 'golang.org' to 'go.dev' here, and on line 24.

Write Preview Markdown
55
//
66
// The first priority is correctness (no false negatives, no false positives).
7
// It must return accurate results even if the input Go source code is not gofmted.
7
// It must return accurate results even if the input source code is formatted
8
// unconventionally.
89
//
910
// The second priority is performance. The current version uses bufio.Reader and
1011
// ReadBytes. Performance can be optimized further by using lower level I/O
11
// primitives and allocating less. That can be explored later. A lot of the time
12
// is spent on reading the entire file without being able to stop early,
13
// since the specification allows the comment to appear anywhere in the file.
12
// primitives and allocating less. That can be explored later.
1413
package generated
1514

1615
import (
1716
	"bufio"
1817
	"bytes"
1918
	"io"
2019
	"os"
2120
)
2221

23
// Parse parses the source code of a single Go source file
24
// provided via src, and reports whether the file contains
25
// a "// Code generated ... DO NOT EDIT." line comment
22
// Parse parses a source file provided via src, and reports whether
23
// the file contains a "// Code generated ... DO NOT EDIT." line comment
2624
// matching the specification at https://golang.org/s/generatedcode:
2725
//
28
// 	Generated files are marked by a line of text that matches
29
// 	the regular expression, in Go syntax:
26
// 	To convey to humans and machine tools that code is generated,
27
// 	generated source should have a line that matches the following
28
// 	regular expression (in Go syntax):
3029
//
3130
// 		^// Code generated .* DO NOT EDIT\.$
3231
//
33
// 	The .* means the tool can put whatever folderol it wants in there,
34
// 	but the comment must be a single line and must start with Code generated
35
// 	and end with DO NOT EDIT., with a period.
36
//
37
// 	The text may appear anywhere in the file.
32
// 	This line must appear before the first non-comment, non-blank
33
// 	text in the file.
3834
func Parse(src io.Reader) (hasGeneratedComment bool, err error) {
3935
	br := bufio.NewReader(src)
36
	var inBlock bool // Whether we're inside a multi-line /* */ comment block.
4037
	for {
4138
		s, err := br.ReadBytes('\n')
4239
		if err == io.EOF {
43
			return containsComment(s), nil
40
			return containsGenComment(s), nil
4441
		} else if err != nil {
4542
			return false, err
4643
		}
4744
		if len(s) >= 2 && s[len(s)-2] == '\r' {
4845
			s = s[:len(s)-2] // Trim "\r\n".
4946
		} else {
5047
			s = s[:len(s)-1] // Trim "\n".
5148
		}
52
		if containsComment(s) {
49
		if containsGenComment(s) {
5350
			return true, nil
51
		} else if containsNonComment(s, &inBlock) {
52
			return false, nil
53
		}
54
	}
55
}
56

57
// containsNonComment reports whether a line of source code s (without newline)
58
// contains something other than a line comment, block comment, or white space.
59
// *inBlock, given at the start of the line and updated at the end of the line,
dominikh commented 1 month ago (Patch Set 1)

given at the start of the line and updated at the end of the line

I know what you mean, but I find the wording confusing. To be honest I'd probably not use a pointer and instead return the new state of inBlock.

Write Preview Markdown
dmitshur commented 1 month ago (Patch Set 1) · edited

I replaced the confusing description here with a better comment for var inBlock bool in Parse.

When writing this code originally, I started with returning the new inBlock value. It got awkward because containsNonComment already returns a bool, so it started needing named result parameters, but inBlock couldn't be reused, and another name becomes confusing. Similarly, dealing with two results in the caller made it clunky.

I also explored making containsNonComment a method, so it could just update its field, but that added more verbosity compared to just making this one bool a pointer. So, I don't like using a pointer either, but not using it didn't end up working out better.

Ultimately, there's not much point for Parse to break input into lines via ReadLines('\n') and process them separately. By now it makes more sense to just do everything in one pass, reading byte-by-byte, while keeping track of /* */ and // style comments, until the first non-comment byte. When that optimization happens, the pointer will go away.
Write Preview Markdown
60
// represents whether we're inside a multi-line /* */ comment block.
61
func containsNonComment(s []byte, inBlock *bool) bool {
62
	type state int
63
	const (
64
		normal state = iota
65
		normalSlash
66
		block
67
		blockStar
68
	)
69
	var p state // Parser state.
70
	if *inBlock {
71
		p = block
72
	}
73
	for _, c := range s {
74
		switch p {
75
		case normal:
76
			switch c {
77
			case ' ', '\t': // White space, ignore.
78
			case '/':
79
				p = normalSlash
80
			default: // Non-comment found.
81
				return true // Return early and don't bother updating *inBlock since it won't matter.
82
			}
83
		case normalSlash:
84
			switch c {
85
			case '/': // Start of inline comment, "//". Ignore the rest of the line.
86
				*inBlock = false
87
				return false
88
			case '*': // Start of comment block, "/*".
89
				p = block
90
			default: // Non-comment found.
91
				return true // Return early and don't bother updating *inBlock since it won't matter.
92
			}
93
		case block:
94
			switch c {
95
			case '*':
96
				p = blockStar
97
			}
98
		case blockStar:
99
			switch c {
100
			case '/': // End of comment block, "*/".
101
				p = normal
102
			case '*': // Another '*', stay in blockStar.
103
			default:
104
				p = block
105
			}
54106
		}
55107
	}
108
	*inBlock = p >= block
109
	return p == normalSlash
56110
}
57111

58
// containsComment reports whether a line of Go source code s (without newline character)
112
// containsGenComment reports whether a line of source code s (without newline)
59113
// contains the generated comment.
60
func containsComment(s []byte) bool {
114
func containsGenComment(s []byte) bool {
61115
	return len(s) >= len(prefix)+len(suffix) &&
62116
		bytes.HasPrefix(s, prefix) &&
63117
		bytes.HasSuffix(s, suffix)
64118
}
65119

generated_test.go
FileFile
@@ -24,18 +24,22 @@ func TestParseFile(t *testing.T) {
2424
		{"positive.6.src", true},
2525
		{"positive.7.src", true},
2626
		{"positive.8.src", true},
2727
		{"positive.9.src", true},
2828
		{"positive.10.src", true},
29
		{"positive.11.src", true},
30
		{"positive.12.src", true},
3129

3230
		// Negative matches.
3331
		{"negative.0.src", false},
3432
		{"negative.1.src", false},
3533
		{"negative.2.src", false},
3634
		{"negative.3.src", false},
35
		{"negative.4.src", false},
36
		{"negative.5.src", false},
37
		{"../generated.go", false},
38
		{"../generated_test.go", false},
39
		{"../fuzz_test.go", false},
40
		{"../LICENSE", false},
3741
	}
3842
	for _, tc := range tests {
3943
		tc := tc
4044
		t.Run(tc.name, func(t *testing.T) {
4145
			hasGeneratedComment, err := generated.ParseFile(filepath.Join("testdata", tc.name))
testdata/negative.3.src
FileFile
@@ -57,6 +57,5 @@ func (s service) List(ctx context.Context, repo issues.RepoSpec, opt issues.Issu
5757

5858
	return is, nil
5959
}
6060

6161
// Doesn't match because there's no generated comment.
62
// But we still need to read the entire file to be sure.
testdata/negative.4.src
FileFile
@@ -0,0 +1,8 @@
1
package p
2

3
/*
4
It can no longer be anywhere in the file.
5
Even at the end, without a final newline.
6
*/
7

8
// Code generated by tool; DO NOT EDIT.
testdata/positive.12.src → testdata/negative.5.src
No modification.
testdata/positive.11.src
FileFile
@@ -1,8 +0,0 @@
1
package p
2

3
/*
4
It can be anywhere in the file.
5
Even at the end, without a final newline.
6
*/
7

8
// Code generated by tool; DO NOT EDIT.