Skip to content
2 changes: 1 addition & 1 deletion pkg/github/__toolsnaps__/get_file_contents.snap
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"type": "string"
},
"symbol": {
"description": "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. If the symbol is not found, returns a list of available symbols.",
"description": "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. For methods, use receiver prefix format: (*TypeName).MethodName — bare method names also work when unambiguous. If the symbol is not found, returns available symbols with suggestions.",
"type": "string"
}
},
Expand Down
167 changes: 167 additions & 0 deletions pkg/github/diff_integration.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package github

import (
"fmt"
"strings"
)

// applySemanticDiffToUnifiedPatch takes a unified diff patch for a single file
// and attempts to produce a semantic diff. It reconstructs the base and head
// content from the patch hunks, then runs them through SemanticDiff.
// Returns the original patch unchanged if the file type doesn't benefit from
// semantic diffing or if reconstruction fails.
func applySemanticDiffToUnifiedPatch(filename, patch string) string {
if patch == "" {
return patch
}

format := DetectDiffFormat(filename)
if format == DiffFormatUnified {
// Not a structured data or code file — keep the original patch
return patch
}

base, head, ok := reconstructFromPatch(patch)
if !ok {
return patch
}

result := SemanticDiff(filename, base, head)
if result.Format == DiffFormatFallback {
return patch
}

return result.Diff
}

// reconstructFromPatch extracts the base and head file content from a unified
// diff patch. Returns the reconstructed contents and true if successful.
// This only works well for complete file diffs — partial context diffs will
// produce incomplete content, which is fine for semantic comparison of
// structured data where the full structure is usually in the diff.
func reconstructFromPatch(patch string) (base, head []byte, ok bool) {
lines := strings.Split(patch, "\n")

var baseLines, headLines []string
inHunk := false

for _, line := range lines {
if strings.HasPrefix(line, "@@") {
inHunk = true
continue
}
if !inHunk {
continue
}

switch {
case strings.HasPrefix(line, "-"):
baseLines = append(baseLines, line[1:])
case strings.HasPrefix(line, "+"):
headLines = append(headLines, line[1:])
case strings.HasPrefix(line, " "):
baseLines = append(baseLines, line[1:])
headLines = append(headLines, line[1:])
case line == "":
// Could be end of patch or an empty context line
baseLines = append(baseLines, "")
headLines = append(headLines, "")
}
}

if len(baseLines) == 0 && len(headLines) == 0 {
return nil, nil, false
}

return []byte(strings.Join(baseLines, "\n")),
[]byte(strings.Join(headLines, "\n")),
true
}

// processMultiFileDiff splits a multi-file unified diff into per-file sections
// and applies semantic diffing to each file where applicable. Returns a
// combined result with structural diffs for supported formats and original
// patches for unsupported ones.
func processMultiFileDiff(rawDiff string) string {
sections := splitDiffByFile(rawDiff)
if len(sections) == 0 {
return rawDiff
}

var result strings.Builder
for i, section := range sections {
if i > 0 {
result.WriteString("\n")
}

semanticPatch := applySemanticDiffToUnifiedPatch(section.filename, section.patch)
if semanticPatch != section.patch {

Copilot AI Feb 9, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a per-file patch is converted to a semantic diff, the output drops the original per-file diff header (e.g., diff --git, index, new file mode, rename from/to, deleted file mode). This loses important metadata (notably renames and mode changes) and can make the result misleading. Preserve the original header information even when emitting a semantic/structural diff (or explicitly include any rename/mode metadata alongside the semantic diff).

Suggested change
if semanticPatch != section.patch {
if semanticPatch != section.patch {
// Preserve the original per-file header (including metadata such as renames/mode changes)
result.WriteString(section.header)
if section.header != "" {
result.WriteString("\n")
}
Copilot uses AI. Check for mistakes.
result.WriteString(fmt.Sprintf("--- %s (semantic diff) ---\n", section.filename))
result.WriteString(semanticPatch)
} else {
result.WriteString(section.header)
if section.patch != "" {
result.WriteString("\n")
result.WriteString(section.patch)
}
}
}

return result.String()
}

type diffSection struct {
filename string
header string
patch string
}

// splitDiffByFile splits a raw multi-file unified diff into per-file sections.
func splitDiffByFile(rawDiff string) []diffSection {
lines := strings.Split(rawDiff, "\n")
var sections []diffSection
var current *diffSection

for _, line := range lines {
if strings.HasPrefix(line, "diff --git ") {
if current != nil {
sections = append(sections, *current)
}
// Extract filename from "diff --git a/path b/path"
parts := strings.SplitN(line, " b/", 2)
filename := ""
if len(parts) == 2 {
filename = parts[1]
}
current = &diffSection{
filename: filename,
header: line,
}
continue
}

if current == nil {
continue
}

if strings.HasPrefix(line, "---") || strings.HasPrefix(line, "+++") ||
strings.HasPrefix(line, "index ") || strings.HasPrefix(line, "new file") ||
strings.HasPrefix(line, "deleted file") || strings.HasPrefix(line, "old mode") ||
strings.HasPrefix(line, "new mode") || strings.HasPrefix(line, "similarity") ||
strings.HasPrefix(line, "rename ") || strings.HasPrefix(line, "Binary") {
current.header += "\n" + line
} else {
if current.patch != "" {
current.patch += "\n" + line
} else {
current.patch = line
}
}
}

if current != nil {
sections = append(sections, *current)
}

return sections
}
142 changes: 142 additions & 0 deletions pkg/github/diff_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package github

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestApplySemanticDiffToUnifiedPatch(t *testing.T) {
t.Run("JSON file gets semantic diff", func(t *testing.T) {
patch := `@@ -1,3 +1,3 @@
{
- "name": "old"
+ "name": "new"
}`
result := applySemanticDiffToUnifiedPatch("config.json", patch)
assert.NotEqual(t, patch, result)
assert.Contains(t, result, `name: "old" → "new"`)
})

t.Run("Go file gets structural diff", func(t *testing.T) {
patch := `@@ -1,3 +1,4 @@
func hello() {
+ fmt.Println("world")
}`
result := applySemanticDiffToUnifiedPatch("main.go", patch)
assert.NotEqual(t, patch, result)
assert.Contains(t, result, "function_declaration")
})

t.Run("Markdown file keeps original patch", func(t *testing.T) {
patch := `@@ -1,3 +1,3 @@
# Title
-old text
+new text`
result := applySemanticDiffToUnifiedPatch("README.md", patch)
assert.Equal(t, patch, result)
})

t.Run("empty patch returns empty", func(t *testing.T) {
result := applySemanticDiffToUnifiedPatch("config.json", "")
assert.Equal(t, "", result)
})

t.Run("YAML file gets semantic diff", func(t *testing.T) {
patch := `@@ -1,2 +1,2 @@
-name: old
+name: new`
result := applySemanticDiffToUnifiedPatch("config.yaml", patch)
assert.NotEqual(t, patch, result)
assert.Contains(t, result, `name: "old" → "new"`)
})
}

func TestReconstructFromPatch(t *testing.T) {
t.Run("simple patch", func(t *testing.T) {
patch := `@@ -1,3 +1,3 @@
{
- "name": "old"
+ "name": "new"
}`
base, head, ok := reconstructFromPatch(patch)
require.True(t, ok)
assert.Contains(t, string(base), `"name": "old"`)
assert.Contains(t, string(head), `"name": "new"`)
})

t.Run("addition only", func(t *testing.T) {
patch := `@@ -0,0 +1,3 @@
+{
+ "new": true
+}`
base, head, ok := reconstructFromPatch(patch)
require.True(t, ok)
assert.Empty(t, string(base))
assert.Contains(t, string(head), `"new": true`)
})

t.Run("empty patch", func(t *testing.T) {
_, _, ok := reconstructFromPatch("")
assert.False(t, ok)
})
}

func TestSplitDiffByFile(t *testing.T) {
rawDiff := `diff --git a/config.json b/config.json
index abc..def 100644
--- a/config.json
+++ b/config.json
@@ -1,3 +1,3 @@
{
- "name": "old"
+ "name": "new"
}
diff --git a/main.go b/main.go
index abc..def 100644
--- a/main.go
+++ b/main.go
@@ -1,3 +1,4 @@
func hello() {
+ fmt.Println("world")
}`

sections := splitDiffByFile(rawDiff)
require.Len(t, sections, 2)
assert.Equal(t, "config.json", sections[0].filename)
assert.Equal(t, "main.go", sections[1].filename)
assert.Contains(t, sections[0].patch, `"name": "old"`)
assert.Contains(t, sections[1].patch, `fmt.Println`)
}

func TestProcessMultiFileDiff(t *testing.T) {
rawDiff := `diff --git a/config.json b/config.json
index abc..def 100644
--- a/config.json
+++ b/config.json
@@ -1,3 +1,3 @@
{
- "name": "old"
+ "name": "new"
}
diff --git a/README.md b/README.md
index abc..def 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,3 @@
# Title
-old text
+new text`

result := processMultiFileDiff(rawDiff)

// JSON file should get semantic diff
assert.Contains(t, result, "semantic diff")
assert.Contains(t, result, `name: "old" → "new"`)

// Markdown should keep original patch format
assert.Contains(t, result, "README.md")
assert.Contains(t, result, "-old text")
assert.Contains(t, result, "+new text")
}
6 changes: 6 additions & 0 deletions pkg/github/minimal_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ type MinimalCommitFile struct {
Additions int `json:"additions,omitempty"`
Deletions int `json:"deletions,omitempty"`
Changes int `json:"changes,omitempty"`
Patch string `json:"patch,omitempty"`
}

// MinimalCommit is the trimmed output type for commit objects.
Expand Down Expand Up @@ -236,12 +237,17 @@ func convertToMinimalCommit(commit *github.RepositoryCommit, includeDiffs bool)
if len(commit.Files) > 0 {
minimalCommit.Files = make([]MinimalCommitFile, 0, len(commit.Files))
for _, file := range commit.Files {
patch := file.GetPatch()
if patch != "" {
patch = applySemanticDiffToUnifiedPatch(file.GetFilename(), patch)
}
Comment on lines 239 to +243

Copilot AI Feb 9, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MinimalCommitFile now includes a patch field and convertToMinimalCommit applies semantic/structural diffing to it. There doesn't appear to be test coverage asserting that get_commit returns the new patch field and that semantic diffing is applied (or that include_diff=false still omits diffs). Adding a focused test will help prevent accidental regressions in commit output.

Copilot generated this review using guidance from repository custom instructions.
minimalFile := MinimalCommitFile{
Filename: file.GetFilename(),
Status: file.GetStatus(),
Additions: file.GetAdditions(),
Deletions: file.GetDeletions(),
Changes: file.GetChanges(),
Patch: patch,
}
minimalCommit.Files = append(minimalCommit.Files, minimalFile)
}
Expand Down
12 changes: 10 additions & 2 deletions pkg/github/pullrequests.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ func GetPullRequestDiff(ctx context.Context, client *github.Client, owner, repo

defer func() { _ = resp.Body.Close() }()

// Return the raw response
return utils.NewToolResultText(string(raw)), nil
// Return the raw response, with semantic diffs applied where beneficial
return utils.NewToolResultText(processMultiFileDiff(string(raw))), nil
Comment on lines +223 to +224

Copilot AI Feb 9, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes get_diff output by post-processing the raw multi-file diff, but existing pull request tests only cover a Markdown diff (which is expected to remain unchanged). Add a test case that includes at least one supported semantic/structural file type (e.g., JSON or Go) and assert that the returned diff is transformed accordingly, so regressions in semantic application/splitting are caught.

This issue also appears on line 296 of the same file.

Copilot generated this review using guidance from repository custom instructions.
}

func GetPullRequestStatus(ctx context.Context, client *github.Client, owner, repo string, pullNumber int) (*mcp.CallToolResult, error) {
Expand Down Expand Up @@ -293,6 +293,14 @@ func GetPullRequestFiles(ctx context.Context, client *github.Client, owner, repo
return ghErrors.NewGitHubAPIStatusErrorResponse(ctx, "failed to get pull request files", resp, body), nil
}

// Apply semantic diffs to file patches where beneficial
for _, file := range files {
if file.Patch != nil && file.Filename != nil {
semanticPatch := applySemanticDiffToUnifiedPatch(file.GetFilename(), file.GetPatch())
file.Patch = &semanticPatch
}
}

r, err := json.Marshal(files)
if err != nil {
return nil, fmt.Errorf("failed to marshal response: %w", err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/github/repositories.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool
},
"symbol": {
Type: "string",
Description: "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. If the symbol is not found, returns a list of available symbols.",
Description: "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. For methods, use receiver prefix format: (*TypeName).MethodName — bare method names also work when unambiguous. If the symbol is not found, returns available symbols with suggestions.",
},
},
Required: []string{"owner", "repo"},
Expand Down
Loading