@@ -232,13 +232,14 @@ def _process_diff_item(diff_item: Any, repo: Repo, include_unsupported: bool) ->
232232
233233
234234def _get_diff_no_comments (
235- repo : Repo , commit_id : str , include_unsupported : bool = True
235+ repo : Repo , commit_id : str , max_diff_size : int , include_unsupported : bool = True
236236) -> str | None :
237237 """Generate diff with comments stripped.
238238
239239 Args:
240240 repo: Git repository object
241241 commit_id: Commit SHA to process
242+ max_diff_size: Maximum diff size (heuristic)
242243 include_unsupported: If True, include original diff for unsupported files.
243244 If False, skip unsupported files entirely.
244245 """
@@ -247,6 +248,10 @@ def _get_diff_no_comments(
247248 if not commit .parents :
248249 return None
249250
251+ # Note: Using line count as a safe fast-path heuristic for max_diff_size (chars).
252+ if commit .stats .total ["lines" ] > max_diff_size :
253+ return None
254+
250255 diffs = commit .parents [0 ].diff (commit , create_patch = True )
251256 file_diffs : list [str ] = []
252257
@@ -261,15 +266,17 @@ def _get_diff_no_comments(
261266 return None
262267
263268
264- def _process_batch (args : tuple [str , list [str ], bool ]) -> dict [str , str ]:
269+ def _process_batch (args : tuple [str , list [str ], int , bool ]) -> dict [str , str ]:
265270 """Process a batch of commits."""
266- repo_path , commit_ids , include_unsupported = args
271+ repo_path , commit_ids , max_diff_size , include_unsupported = args
267272 results : dict [str , str ] = {}
268273
269274 try :
270275 with Repo (repo_path ) as repository :
271276 for commit_id in commit_ids :
272- diff = _get_diff_no_comments (repository , commit_id , include_unsupported )
277+ diff = _get_diff_no_comments (
278+ repository , commit_id , max_diff_size , include_unsupported
279+ )
273280 if diff is not None :
274281 results [commit_id ] = diff
275282 except Exception as e :
@@ -313,6 +320,6 @@ def strip_diff_comments(
313320 ),
314321 batch_fn = _process_batch ,
315322 apply_fn = _apply_diff ,
316- batch_extra_args = (include_unsupported , ),
323+ batch_extra_args = (MAX_DIFF_SIZE , include_unsupported ),
317324 desc = "Stripping comments" ,
318325 )
0 commit comments