From de798fe814bb37c1fd9d5127d3cc525a7e99df32 Mon Sep 17 00:00:00 2001 From: tommaso-moro Date: Fri, 26 Jun 2026 11:00:07 +0100 Subject: [PATCH] Add fields param to search_code and get_file_contents Add an optional `fields` array parameter to the `search_code` and `get_file_contents` tools so callers can request only the fields they need, reducing tool response size and context usage. - search_code: filters each result item to the selected fields while preserving the total_count / incomplete_results wrapper. - get_file_contents: filters each directory entry when listing a directory; ignored for single-file responses. Adds shared filterFields / filterEachField helpers and per-tool field enums, plus unit tests and regenerated toolsnaps and docs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 2 + .../__toolsnaps__/get_file_contents.snap | 18 ++++++ pkg/github/__toolsnaps__/search_code.snap | 14 +++++ pkg/github/minimal_types.go | 53 ++++++++++++++++ pkg/github/repositories.go | 23 ++++++- pkg/github/repositories_test.go | 63 +++++++++++++++++++ pkg/github/search.go | 27 +++++++- pkg/github/search_test.go | 57 +++++++++++++++++ 8 files changed, 255 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 614404c0a..e32a3011b 100644 --- a/README.md +++ b/README.md @@ -1255,6 +1255,7 @@ The following sets of tools are available: - **get_file_contents** - Get file or directory contents - **Required OAuth Scopes**: `repo` + - `fields`: Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'. (string[], optional) - `owner`: Repository owner (username or organization) (string, required) - `path`: Path to file/directory (string, optional) - `ref`: Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head` (string, optional) @@ -1329,6 +1330,7 @@ The following sets of tools are available: - **search_code** - Search code - **Required OAuth Scopes**: `repo` + - `fields`: Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data. (string[], optional) - `order`: Sort order for results (string, optional) - `page`: Page number for pagination (min 1) (number, optional) - `perPage`: Results per page for pagination (min 1, max 100) (number, optional) diff --git a/pkg/github/__toolsnaps__/get_file_contents.snap b/pkg/github/__toolsnaps__/get_file_contents.snap index 94b7aeeda..5e021db5e 100644 --- a/pkg/github/__toolsnaps__/get_file_contents.snap +++ b/pkg/github/__toolsnaps__/get_file_contents.snap @@ -6,6 +6,24 @@ "description": "Get the contents of a file or directory from a GitHub repository", "inputSchema": { "properties": { + "fields": { + "description": "Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'.", + "items": { + "enum": [ + "type", + "name", + "path", + "size", + "sha", + "url", + "git_url", + "html_url", + "download_url" + ], + "type": "string" + }, + "type": "array" + }, "owner": { "description": "Repository owner (username or organization)", "type": "string" diff --git a/pkg/github/__toolsnaps__/search_code.snap b/pkg/github/__toolsnaps__/search_code.snap index 79cbbf04e..c4a8b11be 100644 --- a/pkg/github/__toolsnaps__/search_code.snap +++ b/pkg/github/__toolsnaps__/search_code.snap @@ -6,6 +6,20 @@ "description": "Fast and precise code search across ALL GitHub repositories using GitHub's native search engine. Best for finding exact symbols, functions, classes, or specific code patterns.", "inputSchema": { "properties": { + "fields": { + "description": "Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data.", + "items": { + "enum": [ + "name", + "path", + "sha", + "repository", + "text_matches" + ], + "type": "string" + }, + "type": "array" + }, "order": { "description": "Sort order for results", "enum": [ diff --git a/pkg/github/minimal_types.go b/pkg/github/minimal_types.go index 256bdcb91..467fcda2c 100644 --- a/pkg/github/minimal_types.go +++ b/pkg/github/minimal_types.go @@ -1,6 +1,8 @@ package github import ( + "bytes" + "encoding/json" "fmt" "net/url" "strconv" @@ -12,6 +14,57 @@ import ( "github.com/github/github-mcp-server/pkg/sanitize" ) +// codeSearchItemFieldEnum lists the selectable fields for search_code result +// items, matching the JSON field names of MinimalCodeResult. The repository and +// text_matches fields are the heaviest, so omitting them is the main lever for +// shrinking large result sets. +var codeSearchItemFieldEnum = []any{"name", "path", "sha", "repository", "text_matches"} + +// fileContentFieldEnum lists the selectable fields for get_file_contents +// directory listings, matching the JSON field names of +// github.RepositoryContent that appear for directory entries. Only applied when +// the requested path is a directory; ignored for single files. +var fileContentFieldEnum = []any{"type", "name", "path", "size", "sha", "url", "git_url", "html_url", "download_url"} + +// filterFields marshals v to a JSON object and returns a map containing only the +// requested fields. Fields that are unknown or absent from the JSON (for example +// empty values dropped via omitempty) are skipped. +func filterFields(v any, fields []string) (map[string]any, error) { + data, err := json.Marshal(v) + if err != nil { + return nil, err + } + + decoder := json.NewDecoder(bytes.NewReader(data)) + decoder.UseNumber() // preserve integer precision for fields such as IDs + var object map[string]any + if err := decoder.Decode(&object); err != nil { + return nil, err + } + + picked := make(map[string]any, len(fields)) + for _, field := range fields { + if value, ok := object[field]; ok { + picked[field] = value + } + } + return picked, nil +} + +// filterEachField applies filterFields to every item, returning a slice in which +// each element contains only the requested fields. +func filterEachField[T any](items []T, fields []string) ([]map[string]any, error) { + filtered := make([]map[string]any, 0, len(items)) + for _, item := range items { + picked, err := filterFields(item, fields) + if err != nil { + return nil, err + } + filtered = append(filtered, picked) + } + return filtered, nil +} + // MinimalUser is the output type for user and organization search results. type MinimalUser struct { Login string `json:"login"` diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index 949a18008..8526b888b 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -728,6 +728,14 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool Type: "string", Description: "Accepts optional commit SHA. If specified, it will be used instead of ref", }, + "fields": { + Type: "array", + Description: "Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'.", + Items: &jsonschema.Schema{ + Type: "string", + Enum: fileContentFieldEnum, + }, + }, }, Required: []string{"owner", "repo"}, }, @@ -760,6 +768,11 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool return utils.NewToolResultError(err.Error()), nil, nil } + fields, err := OptionalStringArrayParam(args, "fields") + if err != nil { + return utils.NewToolResultError(err.Error()), nil, nil + } + client, err := deps.GetClient(ctx) if err != nil { return utils.NewToolResultError("failed to get GitHub client"), nil, nil @@ -883,7 +896,15 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool return attachIFC(utils.NewToolResultResource(fmt.Sprintf("successfully downloaded binary file (SHA: %s)%s", fileSHA, successNote), result)), nil, nil } else if dirContent != nil { // file content or file SHA is nil which means it's a directory - r, err := json.Marshal(dirContent) + var payload any = dirContent + if len(fields) > 0 { + filtered, err := filterEachField(dirContent, fields) + if err != nil { + return utils.NewToolResultErrorFromErr("failed to filter directory contents", err), nil, nil + } + payload = filtered + } + r, err := json.Marshal(payload) if err != nil { return utils.NewToolResultError("failed to marshal response"), nil, nil } diff --git a/pkg/github/repositories_test.go b/pkg/github/repositories_test.go index e5531cc55..5f93a9682 100644 --- a/pkg/github/repositories_test.go +++ b/pkg/github/repositories_test.go @@ -480,6 +480,69 @@ func Test_GetFileContents(t *testing.T) { } } +func Test_GetFileContents_DirectoryFieldFiltering(t *testing.T) { + mockDirContent := []*github.RepositoryContent{ + { + Type: github.Ptr("file"), + Name: github.Ptr("README.md"), + Path: github.Ptr("README.md"), + SHA: github.Ptr("abc123"), + Size: github.Ptr(42), + URL: github.Ptr("https://api.github.com/repos/owner/repo/contents/README.md"), + HTMLURL: github.Ptr("https://github.com/owner/repo/blob/main/README.md"), + DownloadURL: github.Ptr("https://raw.githubusercontent.com/owner/repo/main/README.md"), + }, + { + Type: github.Ptr("dir"), + Name: github.Ptr("src"), + Path: github.Ptr("src"), + SHA: github.Ptr("def456"), + HTMLURL: github.Ptr("https://github.com/owner/repo/tree/main/src"), + }, + } + + serverTool := GetFileContents(translations.NullTranslationHelper) + client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{ + GetReposByOwnerByRepo: mockResponse(t, http.StatusOK, "{\"name\": \"repo\", \"default_branch\": \"main\"}"), + GetReposGitRefByOwnerByRepoByRef: mockResponse(t, http.StatusOK, "{\"ref\": \"refs/heads/main\", \"object\": {\"sha\": \"\"}}"), + GetReposContentsByOwnerByRepoByPath: expectQueryParams(t, map[string]string{}).andThen( + mockResponse(t, http.StatusOK, mockDirContent), + ), + GetRawReposContentsByOwnerByRepoByPath: expectQueryParams(t, map[string]string{"branch": "main"}).andThen( + mockResponse(t, http.StatusNotFound, nil), + ), + })) + deps := BaseDeps{Client: client} + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "owner": "owner", + "repo": "repo", + "path": "src/", + "fields": []any{"name", "type"}, + }) + + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + textContent := getTextResult(t, result) + + // Each directory entry is reduced to the requested fields only; heavier + // fields such as html_url and download_url are dropped. + var returned []map[string]any + require.NoError(t, json.Unmarshal([]byte(textContent.Text), &returned)) + require.Len(t, returned, len(mockDirContent)) + for _, entry := range returned { + require.Len(t, entry, 2) + assert.Contains(t, entry, "name") + assert.Contains(t, entry, "type") + } + + assert.NotContains(t, textContent.Text, "html_url") + assert.NotContains(t, textContent.Text, "download_url") +} + func Test_GetFileContents_IFC_InsidersMode(t *testing.T) { t.Parallel() diff --git a/pkg/github/search.go b/pkg/github/search.go index 23ccbd838..7ffade420 100644 --- a/pkg/github/search.go +++ b/pkg/github/search.go @@ -209,6 +209,14 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { Description: "Sort order for results", Enum: []any{"asc", "desc"}, }, + "fields": { + Type: "array", + Description: "Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data.", + Items: &jsonschema.Schema{ + Type: "string", + Enum: codeSearchItemFieldEnum, + }, + }, }, Required: []string{"query"}, } @@ -239,6 +247,10 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { if err != nil { return utils.NewToolResultError(err.Error()), nil, nil } + fields, err := OptionalStringArrayParam(args, "fields") + if err != nil { + return utils.NewToolResultError(err.Error()), nil, nil + } pagination, err := OptionalPaginationParams(args) if err != nil { return utils.NewToolResultError(err.Error()), nil, nil @@ -297,7 +309,20 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { Items: minimalItems, } - r, err := json.Marshal(minimalResult) + var payload any = minimalResult + if len(fields) > 0 { + filteredItems, err := filterEachField(minimalItems, fields) + if err != nil { + return utils.NewToolResultErrorFromErr("failed to filter code search results", err), nil, nil + } + payload = map[string]any{ + "total_count": minimalResult.TotalCount, + "incomplete_results": minimalResult.IncompleteResults, + "items": filteredItems, + } + } + + r, err := json.Marshal(payload) if err != nil { return utils.NewToolResultErrorFromErr("failed to marshal response", err), nil, nil } diff --git a/pkg/github/search_test.go b/pkg/github/search_test.go index 5ebf60842..f0eeb882b 100644 --- a/pkg/github/search_test.go +++ b/pkg/github/search_test.go @@ -509,6 +509,63 @@ func Test_SearchCode(t *testing.T) { } } +func Test_SearchCode_FieldFiltering(t *testing.T) { + mockSearchResult := &github.CodeSearchResult{ + Total: github.Ptr(1), + IncompleteResults: github.Ptr(false), + CodeResults: []*github.CodeResult{ + { + Name: github.Ptr("file1.go"), + Path: github.Ptr("path/to/file1.go"), + SHA: github.Ptr("abc123def456"), + Repository: &github.Repository{ + Name: github.Ptr("repo"), + FullName: github.Ptr("owner/repo"), + }, + TextMatches: []*github.TextMatch{ + {Fragment: github.Ptr("func main() {}")}, + }, + }, + }, + } + + serverTool := SearchCode(translations.NullTranslationHelper) + client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{ + GetSearchCode: mockResponse(t, http.StatusOK, mockSearchResult), + })) + deps := BaseDeps{Client: client} + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "query": "fmt.Println language:go", + "fields": []any{"name", "path"}, + }) + + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + textContent := getTextResult(t, result) + + // The wrapper metadata is preserved while each item is reduced to the + // requested fields only; the heavier repository and text_matches data is + // dropped. + var returned struct { + TotalCount int `json:"total_count"` + IncompleteResults bool `json:"incomplete_results"` + Items []map[string]any `json:"items"` + } + require.NoError(t, json.Unmarshal([]byte(textContent.Text), &returned)) + assert.Equal(t, 1, returned.TotalCount) + require.Len(t, returned.Items, 1) + require.Len(t, returned.Items[0], 2) + assert.Contains(t, returned.Items[0], "name") + assert.Contains(t, returned.Items[0], "path") + + assert.NotContains(t, textContent.Text, "repository") + assert.NotContains(t, textContent.Text, "text_matches") +} + func Test_SearchUsers(t *testing.T) { // Verify tool definition once serverTool := SearchUsers(translations.NullTranslationHelper)