diff options
author | Yuan Fu <casouri@gmail.com> | 2025-03-18 17:26:26 -0700 |
---|---|---|
committer | Yuan Fu <casouri@gmail.com> | 2025-03-19 11:59:42 -0700 |
commit | 289e1bdae28d7dbb27ae8f323b6f50f5c430a961 (patch) | |
tree | eca2b73e0fba0d5c58e832fa80d08ce5525d1ae6 | |
parent | 72f6cf8e77f3eac8fe2e5fc6ad29c2348314b84b (diff) | |
download | emacs-scratch/ts-linecol.tar.gz |
-rw-r--r-- | src/buffer.c | 21 | ||||
-rw-r--r-- | src/buffer.h | 30 | ||||
-rw-r--r-- | src/casefiddle.c | 13 | ||||
-rw-r--r-- | src/editfns.c | 44 | ||||
-rw-r--r-- | src/insdel.c | 55 | ||||
-rw-r--r-- | src/treesit.c | 433 | ||||
-rw-r--r-- | src/treesit.h | 23 | ||||
-rw-r--r-- | test/src/treesit-tests.el | 45 |
8 files changed, 617 insertions, 47 deletions
diff --git a/src/buffer.c b/src/buffer.c index a408b79..e1f138d 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -641,6 +641,13 @@ even if it is dead. The return value is never nil. */) bset_width_table (b, Qnil); b->prevent_redisplay_optimizations_p = 1; +#ifdef HAVE_TREE_SITTER + b->ts_linecol_cache.pos = 1; + b->ts_linecol_cache.byte_pos = 1; + b->ts_linecol_cache.line = 1; + b->ts_linecol_cache.col = 0; +#endif + /* An ordinary buffer normally doesn't need markers to handle BEGV and ZV. */ bset_pt_marker (b, Qnil); @@ -867,6 +874,13 @@ Interactively, CLONE and INHIBIT-BUFFER-HOOKS are nil. */) b->bidi_paragraph_cache = 0; bset_width_table (b, Qnil); +#ifdef HAVE_TREE_SITTER + b->ts_linecol_cache.pos = 1; + b->ts_linecol_cache.byte_pos = 1; + b->ts_linecol_cache.line = 1; + b->ts_linecol_cache.col = 0; +#endif + name = Fcopy_sequence (name); set_string_intervals (name, NULL); bset_name (b, name); @@ -2618,6 +2632,11 @@ results, see Info node `(elisp)Swapping Text'. */) bset_point_before_scroll (current_buffer, Qnil); bset_point_before_scroll (other_buffer, Qnil); +#ifdef HAVE_TREE_SITTER + swapfield_ (ts_parser_list, Lisp_Object); + swapfield (ts_linecol_cache, struct ts_linecol); +#endif + modiff_incr (¤t_buffer->text->modiff, 1); modiff_incr (&other_buffer->text->modiff, 1); modiff_incr (¤t_buffer->text->chars_modiff, 1); @@ -5019,8 +5038,6 @@ DEFUN ("overlay-tree", Foverlay_tree, Soverlay_tree, 0, 1, 0, } #endif - - /* Initialize the buffer routines. */ void syms_of_buffer (void) diff --git a/src/buffer.h b/src/buffer.h index 5c0a6ab..9479b14 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -220,6 +220,23 @@ extern ptrdiff_t advance_to_char_boundary (ptrdiff_t byte_pos); /* Define the actual buffer data structures. */ +/* This data structure stores the cache of a position and its line and + column number. The column number is counted in bytes. The line + number and column number don't consider narrowing. */ +struct ts_linecol +{ + /* A position in the buffer. */ + ptrdiff_t pos; + /* The byte position of POS. */ + ptrdiff_t byte_pos; + /* The line number of this position. */ + ptrdiff_t line; + /* The column number (in bytes) of this position. Unlike Emacs' + convention, this is 0-based (because tree-sitter column is + 0-based). Simply the byte offset from BOL (or BOB). */ + ptrdiff_t col; +}; + /* This data structure describes the actual text contents of a buffer. It is shared between indirect buffers and their base buffer. */ @@ -700,6 +717,19 @@ struct buffer /* The interval tree containing this buffer's overlays. */ struct itree_tree *overlays; + /* Right now only tree-sitter makes use of this, so I don't want + non-tree-sitter build to pay for it. If something else can make + use of this, we can remove the gate. */ +#ifdef HAVE_TREE_SITTER + /* Cache of line and column number of a position. The position cached + is usually near point. Tree-sitter uses this cache to calculate + line and column of the beginning and end of buffer edits. This + cache is refreshed in buffer edit functions, so it's always + up-to-date. Usually, the newly calculated position and line/column + are saved to this field. Initialized to position 1. */ + struct ts_linecol ts_linecol_cache; +#endif + /* Changes in the buffer are recorded here for undo, and t means don't record anything. This information belongs to the base buffer of an indirect buffer. But we can't store it in the diff --git a/src/casefiddle.c b/src/casefiddle.c index faeb16f..dba4834 100644 --- a/src/casefiddle.c +++ b/src/casefiddle.c @@ -543,6 +543,12 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e) #ifdef HAVE_TREE_SITTER ptrdiff_t start_byte = CHAR_TO_BYTE (start); ptrdiff_t old_end_byte = CHAR_TO_BYTE (end); + struct ts_linecol start_linecol + = treesit_linecol_maybe (start, start_byte, + current_buffer->ts_linecol_cache); + struct ts_linecol old_end_linecol + = treesit_linecol_maybe (end, old_end_byte, + current_buffer->ts_linecol_cache); #endif ptrdiff_t orig_end = end; @@ -565,8 +571,11 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e) update_compositions (start, end, CHECK_ALL); } #ifdef HAVE_TREE_SITTER - treesit_record_change (start_byte, old_end_byte, - CHAR_TO_BYTE (orig_end + added)); + ptrdiff_t new_end = orig_end + added; + ptrdiff_t new_end_byte = CHAR_TO_BYTE (new_end); + + treesit_record_change (start_byte, old_end_byte, new_end_byte, + start_linecol, old_end_linecol, new_end); #endif return orig_end + added; diff --git a/src/editfns.c b/src/editfns.c index f4e9a02..5d5f4b3 100644 --- a/src/editfns.c +++ b/src/editfns.c @@ -2234,6 +2234,19 @@ Both characters must have the same length of multi-byte form. */) = !NILP (BVAR (current_buffer, enable_multibyte_characters)); int fromc, toc; +#ifdef HAVE_TREE_SITTER + ptrdiff_t start_char = fix_position (start); + ptrdiff_t old_end_char = fix_position (end); + ptrdiff_t start_byte = CHAR_TO_BYTE (start_char); + ptrdiff_t old_end_byte = CHAR_TO_BYTE (old_end_char); + struct ts_linecol start_linecol + = treesit_linecol_maybe (start_char, start_byte, + current_buffer->ts_linecol_cache); + struct ts_linecol old_end_linecol + = treesit_linecol_maybe (old_end_char, old_end_byte, + current_buffer->ts_linecol_cache); +#endif + restart: validate_region (&start, &end); @@ -2334,7 +2347,8 @@ Both characters must have the same length of multi-byte form. */) if (changed > 0) { #ifdef HAVE_TREE_SITTER - treesit_record_change (changed, last_changed, last_changed); + treesit_record_change (start_byte, old_end_byte, old_end_byte, + start_linecol, old_end_linecol, old_end_char); #endif signal_after_change (changed, last_changed - changed, last_changed - changed); @@ -2521,6 +2535,14 @@ It returns the number of characters changed. */) } else { +#ifdef HAVE_TREE_SITTER + struct ts_linecol start_linecol + = treesit_linecol_maybe (pos, pos_byte, + current_buffer->ts_linecol_cache); + struct ts_linecol old_end_linecol + = treesit_linecol_maybe (pos + 1, pos_byte + len, + start_linecol); +#endif record_change (pos, 1); while (str_len-- > 0) *p++ = *str++; @@ -2528,12 +2550,13 @@ It returns the number of characters changed. */) update_compositions (pos, pos + 1, CHECK_BORDER); #ifdef HAVE_TREE_SITTER - /* In the previous branch, replace_range() notifies + /* In the previous branch, replace_range() notifies changes to tree-sitter, but in this branch, we modified buffer content manually, so we need to notify tree-sitter manually. */ treesit_record_change (pos_byte, pos_byte + len, - pos_byte + len); + pos_byte + len, start_linecol, + old_end_linecol, pos + 1); #endif } characters_changed++; @@ -4481,6 +4504,15 @@ ring. */) start1_byte = CHAR_TO_BYTE (start1); end2_byte = CHAR_TO_BYTE (end2); +#ifdef HAVE_TREE_SITTER + struct ts_linecol start_linecol + = treesit_linecol_maybe (start1, start1_byte, + current_buffer->ts_linecol_cache); + struct ts_linecol old_end_linecol + = treesit_linecol_maybe (end2, end2_byte, + current_buffer->ts_linecol_cache); +#endif + /* Make sure the gap won't interfere, by moving it out of the text we will operate on. */ if (start1 < gap && gap < end2) @@ -4620,10 +4652,8 @@ ring. */) } #ifdef HAVE_TREE_SITTER - /* I don't think it's common to transpose two far-apart regions, so - amalgamating the edit into one should be fine. This is what the - signal_after_change below does, too. */ - treesit_record_change (start1_byte, end2_byte, end2_byte); + treesit_record_change (start1_byte, end2_byte, end2_byte, + start_linecol, old_end_linecol, end2); #endif signal_after_change (start1, end2 - start1, end2 - start1); diff --git a/src/insdel.c b/src/insdel.c index 9b77072..dd5be1d 100644 --- a/src/insdel.c +++ b/src/insdel.c @@ -891,6 +891,11 @@ insert_1_both (const char *string, if (NILP (BVAR (current_buffer, enable_multibyte_characters))) nchars = nbytes; +#ifdef HAVE_TREE_SITTER + struct ts_linecol start_linecol + = treesit_linecol_maybe (PT, PT_BYTE, current_buffer->ts_linecol_cache); +#endif + if (prepare) /* Do this before moving and increasing the gap, because the before-change hooks might move the gap @@ -945,7 +950,9 @@ insert_1_both (const char *string, #ifdef HAVE_TREE_SITTER eassert (nbytes >= 0); eassert (PT_BYTE >= 0); - treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes); + + treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes, + start_linecol, start_linecol, PT + nchars); #endif adjust_point (nchars, nbytes); @@ -1017,6 +1024,11 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, = count_size_as_multibyte (SDATA (string) + pos_byte, nbytes); +#ifdef HAVE_TREE_SITTER + struct ts_linecol start_linecol + = treesit_linecol_maybe (PT, PT_BYTE, current_buffer->ts_linecol_cache); +#endif + /* Do this before moving and increasing the gap, because the before-change hooks might move the gap or make it smaller. */ @@ -1081,7 +1093,9 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, #ifdef HAVE_TREE_SITTER eassert (nbytes >= 0); eassert (PT_BYTE >= 0); - treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes); + + treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes, + start_linecol, start_linecol, PT + nchars); #endif adjust_point (nchars, outgoing_nbytes); @@ -1094,7 +1108,8 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, GPT_ADDR (if not text_at_gap_tail). Contrary to insert_from_gap, this does not invalidate any cache, nor update any markers, nor record any buffer modification information - of any sort, with the single exception of notifying tree-sitter. */ + of any sort, with the single exception of notifying tree-sitter and + updating tree-sitter linecol cache. */ void insert_from_gap_1 (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail) { @@ -1103,6 +1118,8 @@ insert_from_gap_1 (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail) #ifdef HAVE_TREE_SITTER ptrdiff_t ins_bytepos = GPT_BYTE; + struct ts_linecol start_linecol + = treesit_linecol_maybe (GPT, GPT_BYTE, current_buffer->ts_linecol_cache); #endif GAP_SIZE -= nbytes; @@ -1123,7 +1140,9 @@ insert_from_gap_1 (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail) #ifdef HAVE_TREE_SITTER eassert (nbytes >= 0); eassert (ins_bytepos >= 0); - treesit_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes); + + treesit_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes, + start_linecol, start_linecol, ins_bytepos + nbytes); #endif } @@ -1186,6 +1205,8 @@ insert_from_buffer (struct buffer *buf, #ifdef HAVE_TREE_SITTER ptrdiff_t obyte = PT_BYTE; + struct ts_linecol start_linecol + = treesit_linecol_maybe (opoint, obyte, current_buffer->ts_linecol_cache); #endif insert_from_buffer_1 (buf, charpos, nchars, inherit); @@ -1196,7 +1217,9 @@ insert_from_buffer (struct buffer *buf, eassert (PT_BYTE >= BEG_BYTE); eassert (obyte >= BEG_BYTE); eassert (PT_BYTE >= obyte); - treesit_record_change (obyte, obyte, PT_BYTE); + + treesit_record_change (obyte, obyte, PT_BYTE, + start_linecol, start_linecol, PT); #endif } @@ -1481,6 +1504,14 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new, if (nbytes_del <= 0 && inschars == 0) return; +#ifdef HAVE_TREE_SITTER + struct ts_linecol start_linecol + = treesit_linecol_maybe (from, from_byte, current_buffer->ts_linecol_cache); + struct ts_linecol old_end_linecol + = treesit_linecol_maybe (to, to_byte, current_buffer->ts_linecol_cache); +#endif + + ptrdiff_t insbeg_bytes, insend_bytes; ptrdiff_t insbytes; unsigned char *insbeg_ptr; @@ -1623,7 +1654,9 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new, eassert (to_byte >= from_byte); eassert (outgoing_insbytes >= 0); eassert (from_byte >= 0); - treesit_record_change (from_byte, to_byte, from_byte + outgoing_insbytes); + + treesit_record_change (from_byte, to_byte, from_byte + outgoing_insbytes, + start_linecol, old_end_linecol, from + inschars); #endif /* Relocate point as if it were a marker. */ @@ -1948,6 +1981,13 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte, nchars_del = to - from; nbytes_del = to_byte - from_byte; +#ifdef HAVE_TREE_SITTER + struct ts_linecol start_linecol + = treesit_linecol_maybe (from, from_byte, current_buffer->ts_linecol_cache); + struct ts_linecol old_end_linecol + = treesit_linecol_maybe (to, to_byte, current_buffer->ts_linecol_cache); +#endif + /* Make sure the gap is somewhere in or next to what we are deleting. */ if (from > GPT) gap_right (from, from_byte); @@ -2007,7 +2047,8 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte, #ifdef HAVE_TREE_SITTER eassert (from_byte <= to_byte); eassert (from_byte >= 0); - treesit_record_change (from_byte, to_byte, from_byte); + treesit_record_change (from_byte, to_byte, from_byte, + start_linecol, old_end_linecol, from); #endif return deletion; diff --git a/src/treesit.c b/src/treesit.c index b097939..583a01b 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -864,6 +864,176 @@ loaded or the file name couldn't be determined, return nil. */) } +/*** Linecol functions */ + +#define TREE_SITTER_DEBUG_LINECOL true + +static void +treesit_print_linecol (struct ts_linecol linecol) +{ + printf ("{ line=%ld col=%ld pos=%ld byte_pos=%ld }\n", linecol.line, linecol.col, linecol.pos, linecol.byte_pos); +} + +static void +treesit_validate_linecol (const char *name, struct ts_linecol linecol) +{ + eassert (linecol.pos <= Z); + + if (linecol.pos > Z) + { + printf ("OUT OF RANGE\n"); + treesit_print_linecol (linecol); + printf ("Z: %ld\n", Z); + } + + ptrdiff_t true_line_count = count_lines (BEG, linecol.byte_pos) + 1; + eassert (true_line_count == linecol.line); + + if (true_line_count != linecol.line) + { + printf ("MISMATCH\n"); + printf ("%s: ", name); + treesit_print_linecol (linecol); + printf ("true: %ld\n", true_line_count); + } +} + +/* Calculate and return the line and column number of BYTE_POS by + scanning newlines from CACHE. CACHE must be valid. */ +static struct ts_linecol +treesit_linecol_of_pos (ptrdiff_t target_pos, ptrdiff_t target_byte_pos, + struct ts_linecol cache) +{ + eassert (target_pos == target_byte_pos); + + if (TREE_SITTER_DEBUG_LINECOL) + { + /* eassert (true_line_count == cache.line); */ + treesit_validate_linecol ("cache", cache); + } + + /* When we finished searching for newlines between CACHE and + TARGET_POS, BYTE_POS_2 is at TARGET_POS, and BYTE_POS_1 is at the + previous newline. If TARGET_POS happends to be on a newline, + BYTE_POS_1 will be on that position. BYTE_POS_1 is used for + calculating the column. (If CACHE and TARGET_POS are in the same + line, BYTE_POS_1 is unset and we don't use it.) */ + ptrdiff_t byte_pos_1 = 0; + ptrdiff_t pos_2 = 0; + ptrdiff_t byte_pos_2 = 0; + /* Number of lines between CACHE and TARGET_POS. */ + ptrdiff_t line_delta = 0; + + if (target_byte_pos == cache.byte_pos) + return cache; + + /* Search forward. */ + if (cache.byte_pos < target_byte_pos) + { + pos_2 = cache.pos; + byte_pos_2 = cache.byte_pos; + while (byte_pos_2 < target_byte_pos) + { + ptrdiff_t counted = 0; + pos_2 = find_newline (pos_2, byte_pos_2, target_pos, target_byte_pos, + 1, &counted, &byte_pos_2, false); + + if (counted > 0) byte_pos_1 = byte_pos_2; + line_delta += counted; + /* printf ("byte_pos_2=%ld counted=%ld line_delta=%ld\n", byte_pos_2, counted, line_delta); */ + } + eassert (byte_pos_2 == target_byte_pos); + /* At this point, byte_pos_2 is at target_pos, and byte_pos_1 is + at the previous newline if we went across any. */ + + struct ts_linecol target_linecol; + target_linecol.pos = target_pos; + target_linecol.byte_pos = target_byte_pos; + target_linecol.line = cache.line + line_delta; + /* If we moved across any newline, use the previous newline to + calculate the column; if we stayed at the same line, use the + cached column to calculate the new column. */ + target_linecol.col = line_delta > 0 + ? target_byte_pos - byte_pos_1 + : target_byte_pos - cache.byte_pos + cache.col; + + if (TREE_SITTER_DEBUG_LINECOL) + { + /* eassert (true_line_count == target_linecol.line); */ + treesit_validate_linecol ("target", target_linecol); + } + + return target_linecol; + } + + /* Search backward. */ + printf ("BACK\n"); + pos_2 = cache.pos + 1; + /* The "+1" Cancels out with the "-1" in the first iteration. */ + byte_pos_2 = cache.byte_pos + 1; + while (byte_pos_2 > target_byte_pos) + { + ptrdiff_t counted = 0; + /* pos_2 - 1 won't underflow because of the loop condition. */ + pos_2 = find_newline (pos_2 - 1, byte_pos_2 - 1, + target_pos, target_byte_pos, + -1, &counted, &byte_pos_2, false); + line_delta += counted; + } + eassert (byte_pos_2 == target_byte_pos); + /* At this point, pos_2 is at target_pos. */ + + struct ts_linecol target_linecol; + target_linecol.pos = target_pos; + target_linecol.byte_pos = target_byte_pos; + target_linecol.line = cache.line + line_delta; + eassert (cache.line + line_delta > 0); + + /* Calculate the column. */ + if (line_delta == 0) + { + target_linecol.col = cache.col - (cache.byte_pos - target_byte_pos); + } + else + { + /* We need to find the previous newline in order to calculate the + column. Use POS_2 instead of POS_2 - 1, this way, if POS_2 is + at BOL, we stay in the same place. */ + pos_2 = find_newline (pos_2, byte_pos_2, BEG, BEG_BYTE, -1, + NULL, &byte_pos_2, false); + target_linecol.col = target_byte_pos - byte_pos_2; + } + + if (TREE_SITTER_DEBUG_LINECOL) + { + treesit_validate_linecol ("target", target_linecol); + } + + return target_linecol; +} + +/* Return a TSPoint given POS and VISIBLE_BEG. VISIBLE_BEG must be + before POS. */ +static TSPoint +treesit_make_ts_point (struct ts_linecol visible_beg, + struct ts_linecol pos) +{ + TSPoint point; + if (visible_beg.line == pos.line) + { + point.row = 0; + point.column = pos.col - visible_beg.col; + eassert (point.column >= 0); + } + else + { + point.row = pos.line - visible_beg.line; + eassert (point.row > 0); + point.column = pos.col; + } + return point; +} + /*** Parsing functions */ static void @@ -879,28 +1049,34 @@ treesit_check_parser (Lisp_Object obj) larger than UINT32_MAX. */ static inline void treesit_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte, - ptrdiff_t old_end_byte, ptrdiff_t new_end_byte) + ptrdiff_t old_end_byte, ptrdiff_t new_end_byte, + TSPoint start_point, TSPoint old_end_point, + TSPoint new_end_point) { eassert (start_byte >= 0); eassert (start_byte <= old_end_byte); eassert (start_byte <= new_end_byte); - TSPoint dummy_point = {0, 0}; eassert (start_byte <= UINT32_MAX); eassert (old_end_byte <= UINT32_MAX); eassert (new_end_byte <= UINT32_MAX); TSInputEdit edit = {(uint32_t) start_byte, (uint32_t) old_end_byte, (uint32_t) new_end_byte, - dummy_point, dummy_point, dummy_point}; + start_point, old_end_point, new_end_point}; ts_tree_edit (tree, &edit); } -/* Update each parser's tree after the user made an edit. This - function does not parse the buffer and only updates the tree, so it - should be very fast. */ -void -treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, - ptrdiff_t new_end_byte) +/* Update each parser's tree after the user made an edit. This function + does not parse the buffer and only updates the tree, so it should be + very fast. If the caller knows there's no parser in the current + buffer, they can pass empty linecol for + START/OLD_END/NEW_END_linecol. */ +static void +treesit_record_change_1 (ptrdiff_t start_byte, ptrdiff_t old_end_byte, + ptrdiff_t new_end_byte, + struct ts_linecol start_linecol, + struct ts_linecol old_end_linecol, + struct ts_linecol new_end_linecol) { struct buffer *base_buffer = current_buffer; if (current_buffer->base_buffer) @@ -920,12 +1096,15 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, { eassert (start_byte <= old_end_byte); eassert (start_byte <= new_end_byte); - /* Think the recorded change as a delete followed by an - insert, and think of them as moving unchanged text back - and forth. After all, the whole point of updating the - tree is to update the position of unchanged text. */ - ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg; - ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end; + /* Before sending the edit to tree-sitter, we need to first + clip the beg/end to visible_beg and visible_end of the + parser. A tip for understanding the code below: think the + recorded change as a delete followed by an insert, and + think of them as moving unchanged text back and forth. + After all, the whole point of updating the tree is to + update the position of unchanged text. */ + const ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg; + const ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end; eassert (visible_beg >= 0); eassert (visible_beg <= visible_end); @@ -949,9 +1128,9 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, eassert (start_offset <= old_end_offset); eassert (start_offset <= new_end_offset); - treesit_tree_edit_1 (tree, start_offset, old_end_offset, - new_end_offset); - XTS_PARSER (lisp_parser)->need_reparse = true; + /* We have the correct offset for start/end now, but don't + update the tree yet, because we still need to calculate the + TSPoint, which needs the updated visible_beg linecol. */ /* VISIBLE_BEG/END records tree-sitter's range of view in the buffer. We need to adjust them when tree-sitter's @@ -966,19 +1145,99 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, visi_beg_delta = (old_end_byte < visible_beg ? new_end_byte - old_end_byte : 0); - XTS_PARSER (lisp_parser)->visible_beg = visible_beg + visi_beg_delta; - XTS_PARSER (lisp_parser)->visible_end = (visible_end - + visi_beg_delta - + (new_end_offset - - old_end_offset)); + ptrdiff_t old_visi_beg = visible_beg; + struct ts_linecol old_visi_beg_linecol + = XTS_PARSER (lisp_parser)->visi_beg_linecol; + /* struct ts_linecol old_visi_end_linecol */ + /* = XTS_PARSER (lisp_parser)->visi_end_linecol; */ + + const ptrdiff_t new_visible_beg = visible_beg + visi_beg_delta; + const ptrdiff_t new_visible_end + = (visible_end + visi_beg_delta + + (new_end_offset - old_end_offset)); + + XTS_PARSER (lisp_parser)->visible_beg = new_visible_beg; + XTS_PARSER (lisp_parser)->visible_end = new_visible_end; + XTS_PARSER (lisp_parser)->visi_beg_linecol + = treesit_linecol_of_pos (BYTE_TO_CHAR (new_visible_beg), + new_visible_beg, + old_visi_beg <= start_byte + ? old_visi_beg_linecol + : start_linecol); + /* FIXME: computing visi_end_linecol from new_end_linecol + could be expensive, we need a more efficient way to compute + it. */ + XTS_PARSER (lisp_parser)->visi_end_linecol + = treesit_linecol_of_pos (BYTE_TO_CHAR (new_visible_end), + new_visible_end, + new_end_linecol); eassert (XTS_PARSER (lisp_parser)->visible_beg >= 0); eassert (XTS_PARSER (lisp_parser)->visible_beg - <= XTS_PARSER (lisp_parser)->visible_end); + <= XTS_PARSER (lisp_parser)->visible_end); + + /* Now, calculate TSPoints and finally update the tree. */ + struct ts_linecol new_begv_linecol + = XTS_PARSER (lisp_parser)->visi_beg_linecol; + TSPoint old_end_point = treesit_make_ts_point (old_visi_beg_linecol, + old_end_linecol); + TSPoint start_point = treesit_make_ts_point (new_begv_linecol, + start_linecol); + TSPoint new_end_point = treesit_make_ts_point (new_begv_linecol, + new_end_linecol); + + treesit_tree_edit_1 (tree, start_offset, old_end_offset, + new_end_offset, start_point, old_end_point, + new_end_point); + XTS_PARSER (lisp_parser)->need_reparse = true; } } } +/* Return the linecol of POS, calculated from CACHE. But if there's no + parser in the current buffer, skip calculation and return an empty + linecol instead. */ +struct ts_linecol +treesit_linecol_maybe (ptrdiff_t pos, ptrdiff_t pos_byte, + struct ts_linecol cache) +{ + if (NILP (BVAR (current_buffer, ts_parser_list))) + return TREESIT_EMPTY_LINECOL; + + return treesit_linecol_of_pos (pos, pos_byte, cache); +} + +/* Update each parser's tree after the user made an edit. This function + does not parse the buffer and only updates the tree, so it should be + very fast. + + This is a wrapper over treesit_record_change that does a bit more + boilerplate work: it (optionally) calculates linecol for new_end, + pass all the positions into treesit_record_change_1 which does the + real work, and finally (optionally) sets buffer's linecol cache to + new_end's linecol. + + If NEW_END is next to NEW_END_BYTE in the arglist, caller might + accidentally swap them, so I placed NEW_END at the end of the + arglist. */ +void +treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, + ptrdiff_t new_end_byte, + struct ts_linecol start_linecol, + struct ts_linecol old_end_linecol, + ptrdiff_t new_end) +{ + struct ts_linecol new_end_linecol + = treesit_linecol_maybe (new_end, new_end_byte, start_linecol); + + treesit_record_change_1 (start_byte, old_end_byte, new_end_byte, + start_linecol, old_end_linecol, new_end_linecol); + + treesit_print_linecol (new_end_linecol); + if (new_end_linecol.pos != 0) + current_buffer->ts_linecol_cache = new_end_linecol; +} + static TSRange *treesit_make_ts_ranges (Lisp_Object, Lisp_Object, uint32_t *); @@ -1046,6 +1305,7 @@ treesit_sync_visible_region (Lisp_Object parser) ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg; ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end; + eassert (0 <= visible_beg); eassert (visible_beg <= visible_end); @@ -1066,39 +1326,72 @@ treesit_sync_visible_region (Lisp_Object parser) from ________|xxxx|__ to |xxxx|__________ */ + struct ts_linecol buffer_linecol_cache = buffer->ts_linecol_cache; + struct ts_linecol visi_beg_linecol = XTS_PARSER (parser)->visi_beg_linecol; + struct ts_linecol visi_end_linecol = XTS_PARSER (parser)->visi_end_linecol; + struct ts_linecol buffer_begv_linecol + = treesit_linecol_of_pos (BUF_BEGV (buffer), BUF_BEGV_BYTE (buffer), + visi_beg_linecol); + struct ts_linecol buffer_zv_linecol + = treesit_linecol_of_pos (BUF_ZV (buffer), BUF_ZV_BYTE (buffer), + buffer_linecol_cache); + eassert (visi_beg_linecol.byte_pos == visible_beg); + /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */ if (visible_beg > BUF_BEGV_BYTE (buffer)) { + TSPoint new_end = treesit_make_ts_point (buffer_begv_linecol, + visi_beg_linecol); /* Tree-sitter sees: insert at the beginning. */ - treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer)); + treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer), + TREESIT_TS_POINT_1_0, TREESIT_TS_POINT_1_0, + new_end); visible_beg = BUF_BEGV_BYTE (buffer); + visi_beg_linecol = buffer_begv_linecol; eassert (visible_beg <= visible_end); } /* 2. Make sure visible_end = BUF_ZV_BYTE. */ if (visible_end < BUF_ZV_BYTE (buffer)) { + TSPoint start = treesit_make_ts_point (visi_beg_linecol, + visi_end_linecol); + TSPoint new_end = treesit_make_ts_point (visi_beg_linecol, + buffer_zv_linecol); /* Tree-sitter sees: insert at the end. */ treesit_tree_edit_1 (tree, visible_end - visible_beg, visible_end - visible_beg, - BUF_ZV_BYTE (buffer) - visible_beg); + BUF_ZV_BYTE (buffer) - visible_beg, + start, start, new_end); visible_end = BUF_ZV_BYTE (buffer); + visi_end_linecol = buffer_zv_linecol; eassert (visible_beg <= visible_end); } else if (visible_end > BUF_ZV_BYTE (buffer)) { + TSPoint start = treesit_make_ts_point (visi_beg_linecol, + buffer_zv_linecol); + TSPoint old_end = treesit_make_ts_point (visi_beg_linecol, + visi_end_linecol); /* Tree-sitter sees: delete at the end. */ treesit_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg, visible_end - visible_beg, - BUF_ZV_BYTE (buffer) - visible_beg); + BUF_ZV_BYTE (buffer) - visible_beg, + start, old_end, start); visible_end = BUF_ZV_BYTE (buffer); + visi_end_linecol = buffer_zv_linecol; eassert (visible_beg <= visible_end); } /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */ if (visible_beg < BUF_BEGV_BYTE (buffer)) { + TSPoint old_end = treesit_make_ts_point (visi_beg_linecol, + buffer_begv_linecol); /* Tree-sitter sees: delete at the beginning. */ - treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0); + treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0, + TREESIT_TS_POINT_1_0, old_end, + TREESIT_TS_POINT_1_0); visible_beg = BUF_BEGV_BYTE (buffer); + visi_beg_linecol = buffer_begv_linecol; eassert (visible_beg <= visible_end); } eassert (0 <= visible_beg); @@ -1108,6 +1401,8 @@ treesit_sync_visible_region (Lisp_Object parser) XTS_PARSER (parser)->visible_beg = visible_beg; XTS_PARSER (parser)->visible_end = visible_end; + XTS_PARSER (parser)->visi_beg_linecol = visi_beg_linecol; + XTS_PARSER (parser)->visi_end_linecol = visi_end_linecol; /* Fix ranges so that the ranges stays with in visible_end. Here we try to do minimal work so that the ranges is minimally correct and @@ -1381,6 +1676,19 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser, lisp_parser->need_to_gc_buffer = false; lisp_parser->within_reparse = false; eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); + + struct buffer *old_buf = current_buffer; + set_buffer_internal (XBUFFER (buffer)); + + /* treesit_linecol_of_pos doesn't signal, so no need to + unwind-protect. */ + lisp_parser->visi_beg_linecol = treesit_linecol_of_pos (BEGV, BEGV_BYTE, + TREESIT_BOB_LINECOL); + lisp_parser->visi_end_linecol + = treesit_linecol_of_pos (ZV, ZV_BYTE, lisp_parser->visi_beg_linecol); + + set_buffer_internal (old_buf); + return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); } @@ -4376,6 +4684,66 @@ nodes in the subtree, including NODE. */) } } +DEFUN ("treesit--linecol-at", Ftreesit__linecol_at, + Streesit__linecol_at, 1, 1, 0, + doc: /* Test buffer-local linecol cache. + +Calculate the line and column at POS using the buffer-local cache, +return the line and column in the form of + + (LINE . COL) + +This is used for testing and debugging only. */) + (Lisp_Object pos) +{ + CHECK_NUMBER (pos); + struct ts_linecol pos_linecol + = treesit_linecol_of_pos (XFIXNUM (pos), CHAR_TO_BYTE (XFIXNUM (pos)), + current_buffer->ts_linecol_cache); + return Fcons (make_fixnum (pos_linecol.line), make_fixnum (pos_linecol.col)); +} + +DEFUN ("treesit--linecol-cache-set", Ftreesit__linecol_cache_set, + Streesit__linecol_cache_set, 4, 4, 0, + doc: /* Set the linecol cache for the current buffer. + +This is used for testing and debugging only. */) + (Lisp_Object line, Lisp_Object col, Lisp_Object pos, Lisp_Object bytepos) +{ + CHECK_FIXNUM (line); + CHECK_FIXNUM (col); + CHECK_FIXNUM (pos); + CHECK_FIXNUM (bytepos); + + current_buffer->ts_linecol_cache.line = XFIXNUM (line); + current_buffer->ts_linecol_cache.col = XFIXNUM (col); + current_buffer->ts_linecol_cache.pos = XFIXNUM (pos); + current_buffer->ts_linecol_cache.byte_pos = XFIXNUM (bytepos); + + return Qnil; +} + +DEFUN ("treesit--linecol-cache", Ftreesit__linecol_cache, + Streesit__linecol_cache, 0, 0, 0, + doc: /* Return the buffer-local linecol cache for debugging. + +Return a plist (:line LINE :col COL :pos POS :bytepos BYTEPOS). This is +used for testing and debugging only. */) + (void) +{ + struct ts_linecol cache = current_buffer->ts_linecol_cache; + + Lisp_Object plist = (list4 (QCpos, make_fixnum (cache.pos), + QCbytepos, make_fixnum (cache.byte_pos))); + plist = Fcons (make_fixnum (cache.col), plist); + plist = Fcons (QCcol, plist); + plist = Fcons (make_fixnum (cache.line), plist); + plist = Fcons (QCline, plist); + + return plist; +} + + #endif /* HAVE_TREE_SITTER */ DEFUN ("treesit-available-p", Ftreesit_available_p, @@ -4418,6 +4786,11 @@ syms_of_treesit (void) DEFSYM (QCequal, ":equal"); DEFSYM (QCmatch, ":match"); DEFSYM (QCpred, ":pred"); + DEFSYM (QCline, ":line"); + DEFSYM (QCcol, ":col"); + DEFSYM (QCpos, ":pos"); + DEFSYM (QCbytepos, ":bytepos"); + DEFSYM (Qnot_found, "not-found"); DEFSYM (Qsymbol_error, "symbol-error"); @@ -4649,6 +5022,10 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B instead. */); defsubr (&Streesit_induce_sparse_tree); defsubr (&Streesit_node_match_p); defsubr (&Streesit_subtree_stat); + + defsubr (&Streesit__linecol_at); + defsubr (&Streesit__linecol_cache); + defsubr (&Streesit__linecol_cache_set); #endif /* HAVE_TREE_SITTER */ defsubr (&Streesit_available_p); #ifdef WINDOWSNT diff --git a/src/treesit.h b/src/treesit.h index 0d4635f..b6a3f3a 100644 --- a/src/treesit.h +++ b/src/treesit.h @@ -26,6 +26,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ #include <tree_sitter/api.h> #include "lisp.h" +#include "buffer.h" INLINE_HEADER_BEGIN @@ -97,6 +98,14 @@ struct Lisp_TS_Parser (ref:visible-beg-null) in treesit.c for more explanation. */ ptrdiff_t visible_beg; ptrdiff_t visible_end; + /* Caches the line and column number of VISIBLE_BEG. It's always + valid and matches VISIBLE_BEG (because it's updated at each buffer + edit). (It has to be, because in treesit_record_change, we need to + calculate the line/col offset of old_end_linecol, the exact reason + why is left as an exercise to the reader.) */ + struct ts_linecol visi_beg_linecol; + /* Similar to VISI_BEG_LINECOL but caches VISIBLE_END. */ + struct ts_linecol visi_end_linecol; /* This counter is incremented every time a change is made to the buffer in treesit_record_change. The node retrieved from this parser inherits this timestamp. This way we can make sure the node is @@ -222,7 +231,19 @@ CHECK_TS_COMPILED_QUERY (Lisp_Object query) INLINE_HEADER_END -extern void treesit_record_change (ptrdiff_t, ptrdiff_t, ptrdiff_t); +/* A linecol_cache that points to BOB, this is always valid. */ +static const struct ts_linecol TREESIT_BOB_LINECOL = { 1, 1, 1, 0 }; +/* An uninitialized linecol. */ +static const struct ts_linecol TREESIT_EMPTY_LINECOL = { 0, 0, 0, 0 }; +static const TSPoint TREESIT_TS_POINT_1_0 = { 1, 0 }; + +extern struct ts_linecol linecol_offset (struct ts_linecol, + struct ts_linecol); +extern struct ts_linecol treesit_linecol_maybe (ptrdiff_t, ptrdiff_t, + struct ts_linecol); +extern void treesit_record_change (ptrdiff_t, ptrdiff_t, ptrdiff_t, + struct ts_linecol, struct ts_linecol, + ptrdiff_t); extern Lisp_Object make_treesit_parser (Lisp_Object, TSParser *, TSTree *, Lisp_Object, Lisp_Object); extern Lisp_Object make_treesit_node (Lisp_Object, TSNode); diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index caacb74..11d5e1b 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -224,6 +224,51 @@ (kill-buffer base) (kill-buffer indirect)))) +;;; Linecol + +(ert-deftest treesit-linecol-basic () + "Tests for basic lincol synchronization." + (with-temp-buffer + (should (equal (treesit--linecol-cache) + '(:line 1 :col 0 :pos 1 :bytepos 1))) + (should (equal (treesit--linecol-at (point)) + '(1 . 0))) + (insert "\n") + ;; Buffer content: a single newline. + (should (equal (treesit--linecol-at (point)) + '(2 . 0))) + + (treesit--linecol-cache-set 2 0 2 2) + (should (equal (treesit--linecol-cache) + '(:line 2 :col 0 :pos 2 :bytepos 2))) + + (goto-char (point-min)) + (should (equal (treesit--linecol-at (point)) + '(1 . 0))) + + (insert "0123456789") + ;; Buffer content: ten chars followed by a newline. + (treesit--linecol-cache-set 1 0 1 1) + (should (equal (treesit--linecol-at (point)) + '(1 . 10))) + + (goto-char (point-max)) + (should (equal (treesit--linecol-at (point)) + '(2 . 0))) + + (treesit--linecol-cache-set 1 5 6 6) + (should (equal (treesit--linecol-at (point)) + '(2 . 0))) + + (treesit--linecol-cache-set 2 0 12 12) + ;; Position 6 is in the middle of the first line. + (should (equal (treesit--linecol-at 6) + '(1 . 5))) + ;; Position 11 is at the end of the line. + (should (equal (treesit--linecol-at 11) + '(1 . 10))) + )) + ;;; Tree traversal (ert-deftest treesit-search-subtree () |