dmitri.shuralyov.com/go/generated

update for specification change

The specification proposal at golang.org/issue/13560 has changed.
This updates the parser implementation for the latest spec
as described at https://golang.org/s/generatedcode:

	Generated files are marked by a line of text that matches
	the regular expression, in Go syntax:

		^// Code generated .* DO NOT EDIT\.$

	The .* means the tool can put whatever folderol it wants in there,
	but the comment must be a single line and must start with Code generated
	and end with DO NOT EDIT., with a period.

	The text may appear anywhere in the file.

The new implementation for this spec is simpler. It uses bufio.Reader
and ReadBytes. The performance can be optimized further by using lower
level IO primitives and allocating less, but the first priority is
getting a correct parser out for the latest spec. It can be optimized
later as needed.

The main cause of inefficiency is having to read the entire file,
without being able to stop early, because the new specification
allows the comment to appear anywhere in the file.

GitHub-Pull-Request: https://github.com/shurcooL/go/pull/29
dmitshur committed 7 years ago commit 6b8a8a102051edeb74fe6596b8cd05d2a312f0f3
Showing partial commit. Full Commit
Collapse all
generated.go
@@ -1,81 +1,76 @@
// Package generated provides a function that parses a Go file and reports
// whether it contains a "// Code generated … DO NOT EDIT." line comment.
//
// It's intended to stay up to date with the specification proposal in
// https://golang.org/issues/13560.
// It implements the specification at https://golang.org/s/generatedcode.
//
// The first priority is correctness (no false negatives, no false positives).
// It must return accurate results even if the input Go source code is not gofmted.
// The second priority is performance. The current version is implemented
// via go/parser, but it may be possible to improve performance via an
// alternative implementation. That can be explored later.
//
// The second priority is performance. The current version uses bufio.Reader and
// ReadBytes. Performance can be optimized further by using lower level I/O
// primitives and allocating less. That can be explored later. A lot of the time
// is spent on reading the entire file without being able to stop early,
// since the specification allows the comment to appear anywhere in the file.
//
// The exact API is undecided and can change. The current API style is somewhat
// based on go/parser, but that may not be the best approach.
package generated

import (
	"go/parser"
	"go/token"
	"strings"
	"bufio"
	"bytes"
	"io"
	"os"
)

// ParseFile parses the source code of a single Go source file
// specified by filename, and reports whether the file contains
// a "// Code generated ... DO NOT EDIT." line comment
// matching the specification proposal in
// https://golang.org/issues/13560#issuecomment-277804473:
//
// 	The text must appear as the first line of a properly formatted Go // comment,
// 	and that comment must appear before but not be attached to the package clause
// 	and before any /* */ comment. This is similar to the rules for build tags.
// matching the specification at https://golang.org/s/generatedcode:
//
// 	The comment line must match the case-sensitive regular expression (in Go syntax):
// 	Generated files are marked by a line of text that matches
// 	the regular expression, in Go syntax:
//
// 		^// Code generated .* DO NOT EDIT\.$
//
// 	The .* means the tool can put whatever folderol it wants in there,
// 	but the comment must be a single line and must start with Code generated
// 	and end with DO NOT EDIT., with a period.
//
// If the source couldn't be read, the error indicates the specific
// failure. If the source was read but syntax errors were found,
// the result is estimated on a best effort basis from a partial AST.
// 	The text may appear anywhere in the file.
//
// TODO: Decide on best policy of what to do in case of syntax errors
// being encountered during parsing.
// If the source couldn't be read, the error indicates the specific
// failure.
func ParseFile(filename string) (hasGeneratedComment bool, err error) {
	fset := token.NewFileSet()
	f, err := parser.ParseFile(fset, filename, nil, parser.PackageClauseOnly|parser.ParseComments)
	if f == nil { // Can only happen when err != nil.
	f, err := os.Open(filename)
	if err != nil {
		return false, err
	}
Outer:
	for _, cg := range f.Comments {
		if cg == f.Doc {
			// If we've reached the package comment, don't look any further,
			// because the generated comment must be before that.
			break
	defer f.Close()
	br := bufio.NewReader(f)
	for {
		s, err := br.ReadBytes('\n')
		if err == io.EOF {
			return containsComment(s), nil
		} else if err != nil {
			return false, err
		}
		// Check if this comment group is a match.
		// The text must appear as the first line of a properly formatted line comment (//-style).
		if len(cg.List[0].Text) >= smallestMatchingComment &&
			strings.HasPrefix(cg.List[0].Text, "// Code generated ") &&
			strings.HasSuffix(cg.List[0].Text, " DO NOT EDIT.") &&
			fset.Position(cg.List[0].Pos()).Column == 1 {

		s = s[:len(s)-1] // Trim newline.
		if containsComment(s) {
			return true, nil
		}
		// Ensure none of the comments in this comment group are general comments (/*-style).
		for _, c := range cg.List {
			if strings.HasPrefix(c.Text, "/*") {
				// If we've reached a general comment (/*-style), don't look any further,
				// because the generated comment must be before that.
				break Outer
			}
		}
	}
	return false, nil
}

const smallestMatchingComment = len("// Code generated  DO NOT EDIT.")
// containsComment reports whether a line of Go source code s (without newline character)
// contains the generated comment.
func containsComment(s []byte) bool {
	return len(s) >= len(prefix)+len(suffix) &&
		bytes.HasPrefix(s, prefix) &&
		bytes.HasSuffix(s, suffix)
}

var (
	prefix = []byte("// Code generated ")
	suffix = []byte(" DO NOT EDIT.")
)
generated_test.go
@@ -16,21 +16,24 @@ func TestParseFile(t *testing.T) {
		{"positive.0.src", true},
		{"positive.1.src", true},
		{"positive.2.src", true},
		{"positive.3.src", true},
		{"positive.4.src", true},
		{"positive.5.src", true},
		{"positive.6.src", true},
		{"positive.7.src", true},
		{"positive.8.src", true},
		{"positive.9.src", true},
		{"positive.10.src", true},
		{"positive.11.src", true},
		{"positive.12.src", true},

		// Negative matches.
		{"negative.0.src", false},
		{"negative.1.src", false},
		{"negative.2.src", false},
		{"negative.3.src", false},
		{"negative.4.src", false},
		{"negative.5.src", false},
		{"negative.6.src", false},
		{"negative.7.src", false},
		{"negative.8.src", false},
	}
	for _, tc := range tests {
		tc := tc
		t.Run(tc.name, func(t *testing.T) {
			hasGeneratedComment, err := generated.ParseFile(filepath.Join("testdata", tc.name))
@@ -41,5 +44,19 @@ func TestParseFile(t *testing.T) {
				t.Errorf("got hasGeneratedComment %v, want %v", got, want)
			}
		})
	}
}

func TestParseFileError(t *testing.T) {
	_, err := generated.ParseFile(filepath.Join("testdata", "doesnotexist"))
	if err == nil {
		t.Fatal("got nil error, want non-nil")
	}
}

func BenchmarkParseFile(b *testing.B) {
	for i := 0; i < b.N; i++ {
		generated.ParseFile(filepath.Join("testdata", "positive.6.src"))
		generated.ParseFile(filepath.Join("testdata", "negative.3.src"))
	}
}