dmitri.shuralyov.com/go/generated/...

update implementation for 2020 spec changes

With the new specification, it's possible to stop reading a file early
even when the generated comment is absent, since the generated comment
may no longer appear anywhere in the file. Begin parsing /* */ comment
blocks, which may span multiple lines, to be able to be able to detect
when we run into the first non-comment, non-blank text in the file.

It was mentioned in https://go.dev/issue/41196#issuecomment-686607452:

> [...] the primary focus for these comments is Go code
> and more generally Go package sources.
> Go package sources today can be Go, assembly, C, C++, Objective C,
> SWIG, and Fortran.
> The comment being discussed here, like the //go:build comment,
> handles all of these these except Fortran, which seems fine.

The initial scope of this package was to support parsing Go source only,
but it happens to work well with many other source types that are found
in Go packages. Replace mentions of "a Go source file" in documentation
and code with "a source file", to make it apply more generally. If this
will mean we need to parse comments differently depending on the source
type in the future, we can revisit this decision. Let's see how it goes.

Fixes issue 1.

Reviewed-On: https://dmitri.shuralyov.com/go/generated/...$changes/1
dmitshur committed 1 year ago commit c5b6cf572ec544b54bfbb13fb97e57907e0aec5b
fuzz_test.go
@@ -0,0 +1,42 @@
//go:build go1.18
// +build go1.18

package generated_test

import (
	"regexp"
	"strings"
	"testing"

	"dmitri.shuralyov.com/go/generated"
)

func FuzzParse(f *testing.F) {
	f.Add(`// stuff

// Code generated by tool; DO NOT EDIT.
// yes really
/*
still so
even
after
this
*/
// stuff

// +build !dev

// Package comment.
package p
`)
	r := regexp.MustCompile(`(^|\n)// Code generated .* DO NOT EDIT\.(\n|$)`)
	f.Fuzz(func(t *testing.T, src string) {
		has, err := generated.Parse(strings.NewReader(src))
		if err != nil {
			t.Fatalf("Parse failed to parse the source file %q: %v", src, err)
		}
		if has && !r.MatchString(src) {
			t.Errorf("Parse reported positively yet can't find match in %q", src)
		}
	})
}
generated.go
@@ -1,65 +1,119 @@
// Package generated provides a function that parses a Go file and reports
// Package generated provides a function that parses a source file and reports
// whether it contains a "// Code generated … DO NOT EDIT." line comment.
//
// It implements the specification at https://golang.org/s/generatedcode.
// It implements the specification at https://go.dev/s/generatedcode.
//
// The first priority is correctness (no false negatives, no false positives).
// It must return accurate results even if the input Go source code is not gofmted.
// It must return accurate results even if the input source code is formatted
// unconventionally.
//
// The second priority is performance. The current version uses bufio.Reader and
// ReadBytes. Performance can be optimized further by using lower level I/O
// primitives and allocating less. That can be explored later. A lot of the time
// is spent on reading the entire file without being able to stop early,
// since the specification allows the comment to appear anywhere in the file.
// primitives and allocating less. That can be explored later.
package generated

import (
	"bufio"
	"bytes"
	"io"
	"os"
)

// Parse parses the source code of a single Go source file
// provided via src, and reports whether the file contains
// a "// Code generated ... DO NOT EDIT." line comment
// matching the specification at https://golang.org/s/generatedcode:
// Parse parses a source file provided via src, and reports whether
// the file contains a "// Code generated ... DO NOT EDIT." line comment
// matching the specification at https://go.dev/s/generatedcode:
//
// 	Generated files are marked by a line of text that matches
// 	the regular expression, in Go syntax:
// 	To convey to humans and machine tools that code is generated,
// 	generated source should have a line that matches the following
// 	regular expression (in Go syntax):
//
// 		^// Code generated .* DO NOT EDIT\.$
//
// 	The .* means the tool can put whatever folderol it wants in there,
// 	but the comment must be a single line and must start with Code generated
// 	and end with DO NOT EDIT., with a period.
//
// 	The text may appear anywhere in the file.
// 	This line must appear before the first non-comment, non-blank
// 	text in the file.
func Parse(src io.Reader) (hasGeneratedComment bool, err error) {
	br := bufio.NewReader(src)
	// Use inBlock to track whether we're inside a multi-line
	// /* */ comment block across calls to containsNonComment.
	var inBlock bool
	for {
		s, err := br.ReadBytes('\n')
		if err == io.EOF {
			return containsComment(s), nil
			return containsGenComment(s), nil
		} else if err != nil {
			return false, err
		}
		if len(s) >= 2 && s[len(s)-2] == '\r' {
			s = s[:len(s)-2] // Trim "\r\n".
		} else {
			s = s[:len(s)-1] // Trim "\n".
		}
		if containsComment(s) {
		if containsGenComment(s) {
			return true, nil
		} else if containsNonComment(s, &inBlock) {
			return false, nil
		}
	}
}

// containsNonComment reports whether a line of source code s (without newline)
// contains something other than a line comment, block comment, or white space.
func containsNonComment(s []byte, inBlock *bool) bool {
	type state int
	const (
		normal state = iota
		normalSlash
		block
		blockStar
	)
	var p state // Parser state.
	if *inBlock {
		p = block
	}
	for _, c := range s {
		switch p {
		case normal:
			switch c {
			case ' ', '\t': // White space, ignore.
			case '/':
				p = normalSlash
			default: // Non-comment found.
				return true // Return early and don't bother updating *inBlock since it won't matter.
			}
		case normalSlash:
			switch c {
			case '/': // Start of inline comment, "//". Ignore the rest of the line.
				*inBlock = false
				return false
			case '*': // Start of comment block, "/*".
				p = block
			default: // Non-comment found.
				return true // Return early and don't bother updating *inBlock since it won't matter.
			}
		case block:
			switch c {
			case '*':
				p = blockStar
			}
		case blockStar:
			switch c {
			case '/': // End of comment block, "*/".
				p = normal
			case '*': // Another '*', stay in blockStar.
			default:
				p = block
			}
		}
	}
	*inBlock = p >= block
	return p == normalSlash
}

// containsComment reports whether a line of Go source code s (without newline character)
// containsGenComment reports whether a line of source code s (without newline)
// contains the generated comment.
func containsComment(s []byte) bool {
func containsGenComment(s []byte) bool {
	return len(s) >= len(prefix)+len(suffix) &&
		bytes.HasPrefix(s, prefix) &&
		bytes.HasSuffix(s, suffix)
}

generated_test.go
@@ -24,18 +24,22 @@ func TestParseFile(t *testing.T) {
		{"positive.6.src", true},
		{"positive.7.src", true},
		{"positive.8.src", true},
		{"positive.9.src", true},
		{"positive.10.src", true},
		{"positive.11.src", true},
		{"positive.12.src", true},

		// Negative matches.
		{"negative.0.src", false},
		{"negative.1.src", false},
		{"negative.2.src", false},
		{"negative.3.src", false},
		{"negative.4.src", false},
		{"negative.5.src", false},
		{"../generated.go", false},
		{"../generated_test.go", false},
		{"../fuzz_test.go", false},
		{"../LICENSE", false},
	}
	for _, tc := range tests {
		tc := tc
		t.Run(tc.name, func(t *testing.T) {
			hasGeneratedComment, err := generated.ParseFile(filepath.Join("testdata", tc.name))
testdata/negative.3.src
@@ -57,6 +57,5 @@ func (s service) List(ctx context.Context, repo issues.RepoSpec, opt issues.Issu

	return is, nil
}

// Doesn't match because there's no generated comment.
// But we still need to read the entire file to be sure.
testdata/negative.4.src
@@ -0,0 +1,8 @@
package p

/*
It can no longer be anywhere in the file.
Even at the end, without a final newline.
*/

// Code generated by tool; DO NOT EDIT.
\ No newline at end of file
testdata/positive.12.src → testdata/negative.5.src
No modification.
testdata/positive.11.src
@@ -1,8 +0,0 @@
package p

/*
It can be anywhere in the file.
Even at the end, without a final newline.
*/

// Code generated by tool; DO NOT EDIT.
\ No newline at end of file