summaryrefslogtreecommitdiff
diff options
authorYuan Fu <casouri@gmail.com>2025-03-18 17:26:26 -0700
committerYuan Fu <casouri@gmail.com>2025-03-19 11:59:42 -0700
commit289e1bdae28d7dbb27ae8f323b6f50f5c430a961 (patch)
treeeca2b73e0fba0d5c58e832fa80d08ce5525d1ae6
parent72f6cf8e77f3eac8fe2e5fc6ad29c2348314b84b (diff)
downloademacs-scratch/ts-linecol.tar.gz
-rw-r--r--src/buffer.c21
-rw-r--r--src/buffer.h30
-rw-r--r--src/casefiddle.c13
-rw-r--r--src/editfns.c44
-rw-r--r--src/insdel.c55
-rw-r--r--src/treesit.c433
-rw-r--r--src/treesit.h23
-rw-r--r--test/src/treesit-tests.el45
8 files changed, 617 insertions, 47 deletions
diff --git a/src/buffer.c b/src/buffer.c
index a408b79..e1f138d 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -641,6 +641,13 @@ even if it is dead. The return value is never nil. */)
bset_width_table (b, Qnil);
b->prevent_redisplay_optimizations_p = 1;
+#ifdef HAVE_TREE_SITTER
+ b->ts_linecol_cache.pos = 1;
+ b->ts_linecol_cache.byte_pos = 1;
+ b->ts_linecol_cache.line = 1;
+ b->ts_linecol_cache.col = 0;
+#endif
+
/* An ordinary buffer normally doesn't need markers
to handle BEGV and ZV. */
bset_pt_marker (b, Qnil);
@@ -867,6 +874,13 @@ Interactively, CLONE and INHIBIT-BUFFER-HOOKS are nil. */)
b->bidi_paragraph_cache = 0;
bset_width_table (b, Qnil);
+#ifdef HAVE_TREE_SITTER
+ b->ts_linecol_cache.pos = 1;
+ b->ts_linecol_cache.byte_pos = 1;
+ b->ts_linecol_cache.line = 1;
+ b->ts_linecol_cache.col = 0;
+#endif
+
name = Fcopy_sequence (name);
set_string_intervals (name, NULL);
bset_name (b, name);
@@ -2618,6 +2632,11 @@ results, see Info node `(elisp)Swapping Text'. */)
bset_point_before_scroll (current_buffer, Qnil);
bset_point_before_scroll (other_buffer, Qnil);
+#ifdef HAVE_TREE_SITTER
+ swapfield_ (ts_parser_list, Lisp_Object);
+ swapfield (ts_linecol_cache, struct ts_linecol);
+#endif
+
modiff_incr (&current_buffer->text->modiff, 1);
modiff_incr (&other_buffer->text->modiff, 1);
modiff_incr (&current_buffer->text->chars_modiff, 1);
@@ -5019,8 +5038,6 @@ DEFUN ("overlay-tree", Foverlay_tree, Soverlay_tree, 0, 1, 0,
}
#endif
-
-
/* Initialize the buffer routines. */
void
syms_of_buffer (void)
diff --git a/src/buffer.h b/src/buffer.h
index 5c0a6ab..9479b14 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -220,6 +220,23 @@ extern ptrdiff_t advance_to_char_boundary (ptrdiff_t byte_pos);
/* Define the actual buffer data structures. */
+/* This data structure stores the cache of a position and its line and
+ column number. The column number is counted in bytes. The line
+ number and column number don't consider narrowing. */
+struct ts_linecol
+{
+ /* A position in the buffer. */
+ ptrdiff_t pos;
+ /* The byte position of POS. */
+ ptrdiff_t byte_pos;
+ /* The line number of this position. */
+ ptrdiff_t line;
+ /* The column number (in bytes) of this position. Unlike Emacs'
+ convention, this is 0-based (because tree-sitter column is
+ 0-based). Simply the byte offset from BOL (or BOB). */
+ ptrdiff_t col;
+};
+
/* This data structure describes the actual text contents of a buffer.
It is shared between indirect buffers and their base buffer. */
@@ -700,6 +717,19 @@ struct buffer
/* The interval tree containing this buffer's overlays. */
struct itree_tree *overlays;
+ /* Right now only tree-sitter makes use of this, so I don't want
+ non-tree-sitter build to pay for it. If something else can make
+ use of this, we can remove the gate. */
+#ifdef HAVE_TREE_SITTER
+ /* Cache of line and column number of a position. The position cached
+ is usually near point. Tree-sitter uses this cache to calculate
+ line and column of the beginning and end of buffer edits. This
+ cache is refreshed in buffer edit functions, so it's always
+ up-to-date. Usually, the newly calculated position and line/column
+ are saved to this field. Initialized to position 1. */
+ struct ts_linecol ts_linecol_cache;
+#endif
+
/* Changes in the buffer are recorded here for undo, and t means
don't record anything. This information belongs to the base
buffer of an indirect buffer. But we can't store it in the
diff --git a/src/casefiddle.c b/src/casefiddle.c
index faeb16f..dba4834 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -543,6 +543,12 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
#ifdef HAVE_TREE_SITTER
ptrdiff_t start_byte = CHAR_TO_BYTE (start);
ptrdiff_t old_end_byte = CHAR_TO_BYTE (end);
+ struct ts_linecol start_linecol
+ = treesit_linecol_maybe (start, start_byte,
+ current_buffer->ts_linecol_cache);
+ struct ts_linecol old_end_linecol
+ = treesit_linecol_maybe (end, old_end_byte,
+ current_buffer->ts_linecol_cache);
#endif
ptrdiff_t orig_end = end;
@@ -565,8 +571,11 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
update_compositions (start, end, CHECK_ALL);
}
#ifdef HAVE_TREE_SITTER
- treesit_record_change (start_byte, old_end_byte,
- CHAR_TO_BYTE (orig_end + added));
+ ptrdiff_t new_end = orig_end + added;
+ ptrdiff_t new_end_byte = CHAR_TO_BYTE (new_end);
+
+ treesit_record_change (start_byte, old_end_byte, new_end_byte,
+ start_linecol, old_end_linecol, new_end);
#endif
return orig_end + added;
diff --git a/src/editfns.c b/src/editfns.c
index f4e9a02..5d5f4b3 100644
--- a/src/editfns.c
+++ b/src/editfns.c
@@ -2234,6 +2234,19 @@ Both characters must have the same length of multi-byte form. */)
= !NILP (BVAR (current_buffer, enable_multibyte_characters));
int fromc, toc;
+#ifdef HAVE_TREE_SITTER
+ ptrdiff_t start_char = fix_position (start);
+ ptrdiff_t old_end_char = fix_position (end);
+ ptrdiff_t start_byte = CHAR_TO_BYTE (start_char);
+ ptrdiff_t old_end_byte = CHAR_TO_BYTE (old_end_char);
+ struct ts_linecol start_linecol
+ = treesit_linecol_maybe (start_char, start_byte,
+ current_buffer->ts_linecol_cache);
+ struct ts_linecol old_end_linecol
+ = treesit_linecol_maybe (old_end_char, old_end_byte,
+ current_buffer->ts_linecol_cache);
+#endif
+
restart:
validate_region (&start, &end);
@@ -2334,7 +2347,8 @@ Both characters must have the same length of multi-byte form. */)
if (changed > 0)
{
#ifdef HAVE_TREE_SITTER
- treesit_record_change (changed, last_changed, last_changed);
+ treesit_record_change (start_byte, old_end_byte, old_end_byte,
+ start_linecol, old_end_linecol, old_end_char);
#endif
signal_after_change (changed,
last_changed - changed, last_changed - changed);
@@ -2521,6 +2535,14 @@ It returns the number of characters changed. */)
}
else
{
+#ifdef HAVE_TREE_SITTER
+ struct ts_linecol start_linecol
+ = treesit_linecol_maybe (pos, pos_byte,
+ current_buffer->ts_linecol_cache);
+ struct ts_linecol old_end_linecol
+ = treesit_linecol_maybe (pos + 1, pos_byte + len,
+ start_linecol);
+#endif
record_change (pos, 1);
while (str_len-- > 0)
*p++ = *str++;
@@ -2528,12 +2550,13 @@ It returns the number of characters changed. */)
update_compositions (pos, pos + 1, CHECK_BORDER);
#ifdef HAVE_TREE_SITTER
- /* In the previous branch, replace_range() notifies
+ /* In the previous branch, replace_range() notifies
changes to tree-sitter, but in this branch, we
modified buffer content manually, so we need to
notify tree-sitter manually. */
treesit_record_change (pos_byte, pos_byte + len,
- pos_byte + len);
+ pos_byte + len, start_linecol,
+ old_end_linecol, pos + 1);
#endif
}
characters_changed++;
@@ -4481,6 +4504,15 @@ ring. */)
start1_byte = CHAR_TO_BYTE (start1);
end2_byte = CHAR_TO_BYTE (end2);
+#ifdef HAVE_TREE_SITTER
+ struct ts_linecol start_linecol
+ = treesit_linecol_maybe (start1, start1_byte,
+ current_buffer->ts_linecol_cache);
+ struct ts_linecol old_end_linecol
+ = treesit_linecol_maybe (end2, end2_byte,
+ current_buffer->ts_linecol_cache);
+#endif
+
/* Make sure the gap won't interfere, by moving it out of the text
we will operate on. */
if (start1 < gap && gap < end2)
@@ -4620,10 +4652,8 @@ ring. */)
}
#ifdef HAVE_TREE_SITTER
- /* I don't think it's common to transpose two far-apart regions, so
- amalgamating the edit into one should be fine. This is what the
- signal_after_change below does, too. */
- treesit_record_change (start1_byte, end2_byte, end2_byte);
+ treesit_record_change (start1_byte, end2_byte, end2_byte,
+ start_linecol, old_end_linecol, end2);
#endif
signal_after_change (start1, end2 - start1, end2 - start1);
diff --git a/src/insdel.c b/src/insdel.c
index 9b77072..dd5be1d 100644
--- a/src/insdel.c
+++ b/src/insdel.c
@@ -891,6 +891,11 @@ insert_1_both (const char *string,
if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
nchars = nbytes;
+#ifdef HAVE_TREE_SITTER
+ struct ts_linecol start_linecol
+ = treesit_linecol_maybe (PT, PT_BYTE, current_buffer->ts_linecol_cache);
+#endif
+
if (prepare)
/* Do this before moving and increasing the gap,
because the before-change hooks might move the gap
@@ -945,7 +950,9 @@ insert_1_both (const char *string,
#ifdef HAVE_TREE_SITTER
eassert (nbytes >= 0);
eassert (PT_BYTE >= 0);
- treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes);
+
+ treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes,
+ start_linecol, start_linecol, PT + nchars);
#endif
adjust_point (nchars, nbytes);
@@ -1017,6 +1024,11 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
= count_size_as_multibyte (SDATA (string) + pos_byte,
nbytes);
+#ifdef HAVE_TREE_SITTER
+ struct ts_linecol start_linecol
+ = treesit_linecol_maybe (PT, PT_BYTE, current_buffer->ts_linecol_cache);
+#endif
+
/* Do this before moving and increasing the gap,
because the before-change hooks might move the gap
or make it smaller. */
@@ -1081,7 +1093,9 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
#ifdef HAVE_TREE_SITTER
eassert (nbytes >= 0);
eassert (PT_BYTE >= 0);
- treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes);
+
+ treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes,
+ start_linecol, start_linecol, PT + nchars);
#endif
adjust_point (nchars, outgoing_nbytes);
@@ -1094,7 +1108,8 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
GPT_ADDR (if not text_at_gap_tail).
Contrary to insert_from_gap, this does not invalidate any cache,
nor update any markers, nor record any buffer modification information
- of any sort, with the single exception of notifying tree-sitter. */
+ of any sort, with the single exception of notifying tree-sitter and
+ updating tree-sitter linecol cache. */
void
insert_from_gap_1 (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail)
{
@@ -1103,6 +1118,8 @@ insert_from_gap_1 (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail)
#ifdef HAVE_TREE_SITTER
ptrdiff_t ins_bytepos = GPT_BYTE;
+ struct ts_linecol start_linecol
+ = treesit_linecol_maybe (GPT, GPT_BYTE, current_buffer->ts_linecol_cache);
#endif
GAP_SIZE -= nbytes;
@@ -1123,7 +1140,9 @@ insert_from_gap_1 (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail)
#ifdef HAVE_TREE_SITTER
eassert (nbytes >= 0);
eassert (ins_bytepos >= 0);
- treesit_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes);
+
+ treesit_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes,
+ start_linecol, start_linecol, ins_bytepos + nbytes);
#endif
}
@@ -1186,6 +1205,8 @@ insert_from_buffer (struct buffer *buf,
#ifdef HAVE_TREE_SITTER
ptrdiff_t obyte = PT_BYTE;
+ struct ts_linecol start_linecol
+ = treesit_linecol_maybe (opoint, obyte, current_buffer->ts_linecol_cache);
#endif
insert_from_buffer_1 (buf, charpos, nchars, inherit);
@@ -1196,7 +1217,9 @@ insert_from_buffer (struct buffer *buf,
eassert (PT_BYTE >= BEG_BYTE);
eassert (obyte >= BEG_BYTE);
eassert (PT_BYTE >= obyte);
- treesit_record_change (obyte, obyte, PT_BYTE);
+
+ treesit_record_change (obyte, obyte, PT_BYTE,
+ start_linecol, start_linecol, PT);
#endif
}
@@ -1481,6 +1504,14 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new,
if (nbytes_del <= 0 && inschars == 0)
return;
+#ifdef HAVE_TREE_SITTER
+ struct ts_linecol start_linecol
+ = treesit_linecol_maybe (from, from_byte, current_buffer->ts_linecol_cache);
+ struct ts_linecol old_end_linecol
+ = treesit_linecol_maybe (to, to_byte, current_buffer->ts_linecol_cache);
+#endif
+
+
ptrdiff_t insbeg_bytes, insend_bytes;
ptrdiff_t insbytes;
unsigned char *insbeg_ptr;
@@ -1623,7 +1654,9 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new,
eassert (to_byte >= from_byte);
eassert (outgoing_insbytes >= 0);
eassert (from_byte >= 0);
- treesit_record_change (from_byte, to_byte, from_byte + outgoing_insbytes);
+
+ treesit_record_change (from_byte, to_byte, from_byte + outgoing_insbytes,
+ start_linecol, old_end_linecol, from + inschars);
#endif
/* Relocate point as if it were a marker. */
@@ -1948,6 +1981,13 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
nchars_del = to - from;
nbytes_del = to_byte - from_byte;
+#ifdef HAVE_TREE_SITTER
+ struct ts_linecol start_linecol
+ = treesit_linecol_maybe (from, from_byte, current_buffer->ts_linecol_cache);
+ struct ts_linecol old_end_linecol
+ = treesit_linecol_maybe (to, to_byte, current_buffer->ts_linecol_cache);
+#endif
+
/* Make sure the gap is somewhere in or next to what we are deleting. */
if (from > GPT)
gap_right (from, from_byte);
@@ -2007,7 +2047,8 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
#ifdef HAVE_TREE_SITTER
eassert (from_byte <= to_byte);
eassert (from_byte >= 0);
- treesit_record_change (from_byte, to_byte, from_byte);
+ treesit_record_change (from_byte, to_byte, from_byte,
+ start_linecol, old_end_linecol, from);
#endif
return deletion;
diff --git a/src/treesit.c b/src/treesit.c
index b097939..583a01b 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -864,6 +864,176 @@ loaded or the file name couldn't be determined, return nil. */)
}
+/*** Linecol functions */
+
+#define TREE_SITTER_DEBUG_LINECOL true
+
+static void
+treesit_print_linecol (struct ts_linecol linecol)
+{
+ printf ("{ line=%ld col=%ld pos=%ld byte_pos=%ld }\n", linecol.line, linecol.col, linecol.pos, linecol.byte_pos);
+}
+
+static void
+treesit_validate_linecol (const char *name, struct ts_linecol linecol)
+{
+ eassert (linecol.pos <= Z);
+
+ if (linecol.pos > Z)
+ {
+ printf ("OUT OF RANGE\n");
+ treesit_print_linecol (linecol);
+ printf ("Z: %ld\n", Z);
+ }
+
+ ptrdiff_t true_line_count = count_lines (BEG, linecol.byte_pos) + 1;
+ eassert (true_line_count == linecol.line);
+
+ if (true_line_count != linecol.line)
+ {
+ printf ("MISMATCH\n");
+ printf ("%s: ", name);
+ treesit_print_linecol (linecol);
+ printf ("true: %ld\n", true_line_count);
+ }
+}
+
+/* Calculate and return the line and column number of BYTE_POS by
+ scanning newlines from CACHE. CACHE must be valid. */
+static struct ts_linecol
+treesit_linecol_of_pos (ptrdiff_t target_pos, ptrdiff_t target_byte_pos,
+ struct ts_linecol cache)
+{
+ eassert (target_pos == target_byte_pos);
+
+ if (TREE_SITTER_DEBUG_LINECOL)
+ {
+ /* eassert (true_line_count == cache.line); */
+ treesit_validate_linecol ("cache", cache);
+ }
+
+ /* When we finished searching for newlines between CACHE and
+ TARGET_POS, BYTE_POS_2 is at TARGET_POS, and BYTE_POS_1 is at the
+ previous newline. If TARGET_POS happends to be on a newline,
+ BYTE_POS_1 will be on that position. BYTE_POS_1 is used for
+ calculating the column. (If CACHE and TARGET_POS are in the same
+ line, BYTE_POS_1 is unset and we don't use it.) */
+ ptrdiff_t byte_pos_1 = 0;
+ ptrdiff_t pos_2 = 0;
+ ptrdiff_t byte_pos_2 = 0;
+ /* Number of lines between CACHE and TARGET_POS. */
+ ptrdiff_t line_delta = 0;
+
+ if (target_byte_pos == cache.byte_pos)
+ return cache;
+
+ /* Search forward. */
+ if (cache.byte_pos < target_byte_pos)
+ {
+ pos_2 = cache.pos;
+ byte_pos_2 = cache.byte_pos;
+ while (byte_pos_2 < target_byte_pos)
+ {
+ ptrdiff_t counted = 0;
+ pos_2 = find_newline (pos_2, byte_pos_2, target_pos, target_byte_pos,
+ 1, &counted, &byte_pos_2, false);
+
+ if (counted > 0) byte_pos_1 = byte_pos_2;
+ line_delta += counted;
+ /* printf ("byte_pos_2=%ld counted=%ld line_delta=%ld\n", byte_pos_2, counted, line_delta); */
+ }
+ eassert (byte_pos_2 == target_byte_pos);
+ /* At this point, byte_pos_2 is at target_pos, and byte_pos_1 is
+ at the previous newline if we went across any. */
+
+ struct ts_linecol target_linecol;
+ target_linecol.pos = target_pos;
+ target_linecol.byte_pos = target_byte_pos;
+ target_linecol.line = cache.line + line_delta;
+ /* If we moved across any newline, use the previous newline to
+ calculate the column; if we stayed at the same line, use the
+ cached column to calculate the new column. */
+ target_linecol.col = line_delta > 0
+ ? target_byte_pos - byte_pos_1
+ : target_byte_pos - cache.byte_pos + cache.col;
+
+ if (TREE_SITTER_DEBUG_LINECOL)
+ {
+ /* eassert (true_line_count == target_linecol.line); */
+ treesit_validate_linecol ("target", target_linecol);
+ }
+
+ return target_linecol;
+ }
+
+ /* Search backward. */
+ printf ("BACK\n");
+ pos_2 = cache.pos + 1;
+ /* The "+1" Cancels out with the "-1" in the first iteration. */
+ byte_pos_2 = cache.byte_pos + 1;
+ while (byte_pos_2 > target_byte_pos)
+ {
+ ptrdiff_t counted = 0;
+ /* pos_2 - 1 won't underflow because of the loop condition. */
+ pos_2 = find_newline (pos_2 - 1, byte_pos_2 - 1,
+ target_pos, target_byte_pos,
+ -1, &counted, &byte_pos_2, false);
+ line_delta += counted;
+ }
+ eassert (byte_pos_2 == target_byte_pos);
+ /* At this point, pos_2 is at target_pos. */
+
+ struct ts_linecol target_linecol;
+ target_linecol.pos = target_pos;
+ target_linecol.byte_pos = target_byte_pos;
+ target_linecol.line = cache.line + line_delta;
+ eassert (cache.line + line_delta > 0);
+
+ /* Calculate the column. */
+ if (line_delta == 0)
+ {
+ target_linecol.col = cache.col - (cache.byte_pos - target_byte_pos);
+ }
+ else
+ {
+ /* We need to find the previous newline in order to calculate the
+ column. Use POS_2 instead of POS_2 - 1, this way, if POS_2 is
+ at BOL, we stay in the same place. */
+ pos_2 = find_newline (pos_2, byte_pos_2, BEG, BEG_BYTE, -1,
+ NULL, &byte_pos_2, false);
+ target_linecol.col = target_byte_pos - byte_pos_2;
+ }
+
+ if (TREE_SITTER_DEBUG_LINECOL)
+ {
+ treesit_validate_linecol ("target", target_linecol);
+ }
+
+ return target_linecol;
+}
+
+/* Return a TSPoint given POS and VISIBLE_BEG. VISIBLE_BEG must be
+ before POS. */
+static TSPoint
+treesit_make_ts_point (struct ts_linecol visible_beg,
+ struct ts_linecol pos)
+{
+ TSPoint point;
+ if (visible_beg.line == pos.line)
+ {
+ point.row = 0;
+ point.column = pos.col - visible_beg.col;
+ eassert (point.column >= 0);
+ }
+ else
+ {
+ point.row = pos.line - visible_beg.line;
+ eassert (point.row > 0);
+ point.column = pos.col;
+ }
+ return point;
+}
+
/*** Parsing functions */
static void
@@ -879,28 +1049,34 @@ treesit_check_parser (Lisp_Object obj)
larger than UINT32_MAX. */
static inline void
treesit_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
- ptrdiff_t old_end_byte, ptrdiff_t new_end_byte)
+ ptrdiff_t old_end_byte, ptrdiff_t new_end_byte,
+ TSPoint start_point, TSPoint old_end_point,
+ TSPoint new_end_point)
{
eassert (start_byte >= 0);
eassert (start_byte <= old_end_byte);
eassert (start_byte <= new_end_byte);
- TSPoint dummy_point = {0, 0};
eassert (start_byte <= UINT32_MAX);
eassert (old_end_byte <= UINT32_MAX);
eassert (new_end_byte <= UINT32_MAX);
TSInputEdit edit = {(uint32_t) start_byte,
(uint32_t) old_end_byte,
(uint32_t) new_end_byte,
- dummy_point, dummy_point, dummy_point};
+ start_point, old_end_point, new_end_point};
ts_tree_edit (tree, &edit);
}
-/* Update each parser's tree after the user made an edit. This
- function does not parse the buffer and only updates the tree, so it
- should be very fast. */
-void
-treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
- ptrdiff_t new_end_byte)
+/* Update each parser's tree after the user made an edit. This function
+ does not parse the buffer and only updates the tree, so it should be
+ very fast. If the caller knows there's no parser in the current
+ buffer, they can pass empty linecol for
+ START/OLD_END/NEW_END_linecol. */
+static void
+treesit_record_change_1 (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+ ptrdiff_t new_end_byte,
+ struct ts_linecol start_linecol,
+ struct ts_linecol old_end_linecol,
+ struct ts_linecol new_end_linecol)
{
struct buffer *base_buffer = current_buffer;
if (current_buffer->base_buffer)
@@ -920,12 +1096,15 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
{
eassert (start_byte <= old_end_byte);
eassert (start_byte <= new_end_byte);
- /* Think the recorded change as a delete followed by an
- insert, and think of them as moving unchanged text back
- and forth. After all, the whole point of updating the
- tree is to update the position of unchanged text. */
- ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
- ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
+ /* Before sending the edit to tree-sitter, we need to first
+ clip the beg/end to visible_beg and visible_end of the
+ parser. A tip for understanding the code below: think the
+ recorded change as a delete followed by an insert, and
+ think of them as moving unchanged text back and forth.
+ After all, the whole point of updating the tree is to
+ update the position of unchanged text. */
+ const ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
+ const ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
eassert (visible_beg >= 0);
eassert (visible_beg <= visible_end);
@@ -949,9 +1128,9 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
eassert (start_offset <= old_end_offset);
eassert (start_offset <= new_end_offset);
- treesit_tree_edit_1 (tree, start_offset, old_end_offset,
- new_end_offset);
- XTS_PARSER (lisp_parser)->need_reparse = true;
+ /* We have the correct offset for start/end now, but don't
+ update the tree yet, because we still need to calculate the
+ TSPoint, which needs the updated visible_beg linecol. */
/* VISIBLE_BEG/END records tree-sitter's range of view in
the buffer. We need to adjust them when tree-sitter's
@@ -966,19 +1145,99 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
visi_beg_delta = (old_end_byte < visible_beg
? new_end_byte - old_end_byte : 0);
- XTS_PARSER (lisp_parser)->visible_beg = visible_beg + visi_beg_delta;
- XTS_PARSER (lisp_parser)->visible_end = (visible_end
- + visi_beg_delta
- + (new_end_offset
- - old_end_offset));
+ ptrdiff_t old_visi_beg = visible_beg;
+ struct ts_linecol old_visi_beg_linecol
+ = XTS_PARSER (lisp_parser)->visi_beg_linecol;
+ /* struct ts_linecol old_visi_end_linecol */
+ /* = XTS_PARSER (lisp_parser)->visi_end_linecol; */
+
+ const ptrdiff_t new_visible_beg = visible_beg + visi_beg_delta;
+ const ptrdiff_t new_visible_end
+ = (visible_end + visi_beg_delta
+ + (new_end_offset - old_end_offset));
+
+ XTS_PARSER (lisp_parser)->visible_beg = new_visible_beg;
+ XTS_PARSER (lisp_parser)->visible_end = new_visible_end;
+ XTS_PARSER (lisp_parser)->visi_beg_linecol
+ = treesit_linecol_of_pos (BYTE_TO_CHAR (new_visible_beg),
+ new_visible_beg,
+ old_visi_beg <= start_byte
+ ? old_visi_beg_linecol
+ : start_linecol);
+ /* FIXME: computing visi_end_linecol from new_end_linecol
+ could be expensive, we need a more efficient way to compute
+ it. */
+ XTS_PARSER (lisp_parser)->visi_end_linecol
+ = treesit_linecol_of_pos (BYTE_TO_CHAR (new_visible_end),
+ new_visible_end,
+ new_end_linecol);
eassert (XTS_PARSER (lisp_parser)->visible_beg >= 0);
eassert (XTS_PARSER (lisp_parser)->visible_beg
- <= XTS_PARSER (lisp_parser)->visible_end);
+ <= XTS_PARSER (lisp_parser)->visible_end);
+
+ /* Now, calculate TSPoints and finally update the tree. */
+ struct ts_linecol new_begv_linecol
+ = XTS_PARSER (lisp_parser)->visi_beg_linecol;
+ TSPoint old_end_point = treesit_make_ts_point (old_visi_beg_linecol,
+ old_end_linecol);
+ TSPoint start_point = treesit_make_ts_point (new_begv_linecol,
+ start_linecol);
+ TSPoint new_end_point = treesit_make_ts_point (new_begv_linecol,
+ new_end_linecol);
+
+ treesit_tree_edit_1 (tree, start_offset, old_end_offset,
+ new_end_offset, start_point, old_end_point,
+ new_end_point);
+ XTS_PARSER (lisp_parser)->need_reparse = true;
}
}
}
+/* Return the linecol of POS, calculated from CACHE. But if there's no
+ parser in the current buffer, skip calculation and return an empty
+ linecol instead. */
+struct ts_linecol
+treesit_linecol_maybe (ptrdiff_t pos, ptrdiff_t pos_byte,
+ struct ts_linecol cache)
+{
+ if (NILP (BVAR (current_buffer, ts_parser_list)))
+ return TREESIT_EMPTY_LINECOL;
+
+ return treesit_linecol_of_pos (pos, pos_byte, cache);
+}
+
+/* Update each parser's tree after the user made an edit. This function
+ does not parse the buffer and only updates the tree, so it should be
+ very fast.
+
+ This is a wrapper over treesit_record_change that does a bit more
+ boilerplate work: it (optionally) calculates linecol for new_end,
+ pass all the positions into treesit_record_change_1 which does the
+ real work, and finally (optionally) sets buffer's linecol cache to
+ new_end's linecol.
+
+ If NEW_END is next to NEW_END_BYTE in the arglist, caller might
+ accidentally swap them, so I placed NEW_END at the end of the
+ arglist. */
+void
+treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+ ptrdiff_t new_end_byte,
+ struct ts_linecol start_linecol,
+ struct ts_linecol old_end_linecol,
+ ptrdiff_t new_end)
+{
+ struct ts_linecol new_end_linecol
+ = treesit_linecol_maybe (new_end, new_end_byte, start_linecol);
+
+ treesit_record_change_1 (start_byte, old_end_byte, new_end_byte,
+ start_linecol, old_end_linecol, new_end_linecol);
+
+ treesit_print_linecol (new_end_linecol);
+ if (new_end_linecol.pos != 0)
+ current_buffer->ts_linecol_cache = new_end_linecol;
+}
+
static TSRange *treesit_make_ts_ranges (Lisp_Object, Lisp_Object,
uint32_t *);
@@ -1046,6 +1305,7 @@ treesit_sync_visible_region (Lisp_Object parser)
ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
+
eassert (0 <= visible_beg);
eassert (visible_beg <= visible_end);
@@ -1066,39 +1326,72 @@ treesit_sync_visible_region (Lisp_Object parser)
from ________|xxxx|__
to |xxxx|__________ */
+ struct ts_linecol buffer_linecol_cache = buffer->ts_linecol_cache;
+ struct ts_linecol visi_beg_linecol = XTS_PARSER (parser)->visi_beg_linecol;
+ struct ts_linecol visi_end_linecol = XTS_PARSER (parser)->visi_end_linecol;
+ struct ts_linecol buffer_begv_linecol
+ = treesit_linecol_of_pos (BUF_BEGV (buffer), BUF_BEGV_BYTE (buffer),
+ visi_beg_linecol);
+ struct ts_linecol buffer_zv_linecol
+ = treesit_linecol_of_pos (BUF_ZV (buffer), BUF_ZV_BYTE (buffer),
+ buffer_linecol_cache);
+ eassert (visi_beg_linecol.byte_pos == visible_beg);
+
/* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */
if (visible_beg > BUF_BEGV_BYTE (buffer))
{
+ TSPoint new_end = treesit_make_ts_point (buffer_begv_linecol,
+ visi_beg_linecol);
/* Tree-sitter sees: insert at the beginning. */
- treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer));
+ treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer),
+ TREESIT_TS_POINT_1_0, TREESIT_TS_POINT_1_0,
+ new_end);
visible_beg = BUF_BEGV_BYTE (buffer);
+ visi_beg_linecol = buffer_begv_linecol;
eassert (visible_beg <= visible_end);
}
/* 2. Make sure visible_end = BUF_ZV_BYTE. */
if (visible_end < BUF_ZV_BYTE (buffer))
{
+ TSPoint start = treesit_make_ts_point (visi_beg_linecol,
+ visi_end_linecol);
+ TSPoint new_end = treesit_make_ts_point (visi_beg_linecol,
+ buffer_zv_linecol);
/* Tree-sitter sees: insert at the end. */
treesit_tree_edit_1 (tree, visible_end - visible_beg,
visible_end - visible_beg,
- BUF_ZV_BYTE (buffer) - visible_beg);
+ BUF_ZV_BYTE (buffer) - visible_beg,
+ start, start, new_end);
visible_end = BUF_ZV_BYTE (buffer);
+ visi_end_linecol = buffer_zv_linecol;
eassert (visible_beg <= visible_end);
}
else if (visible_end > BUF_ZV_BYTE (buffer))
{
+ TSPoint start = treesit_make_ts_point (visi_beg_linecol,
+ buffer_zv_linecol);
+ TSPoint old_end = treesit_make_ts_point (visi_beg_linecol,
+ visi_end_linecol);
/* Tree-sitter sees: delete at the end. */
treesit_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
visible_end - visible_beg,
- BUF_ZV_BYTE (buffer) - visible_beg);
+ BUF_ZV_BYTE (buffer) - visible_beg,
+ start, old_end, start);
visible_end = BUF_ZV_BYTE (buffer);
+ visi_end_linecol = buffer_zv_linecol;
eassert (visible_beg <= visible_end);
}
/* 3. Make sure visible_beg = BUF_BEGV_BYTE. */
if (visible_beg < BUF_BEGV_BYTE (buffer))
{
+ TSPoint old_end = treesit_make_ts_point (visi_beg_linecol,
+ buffer_begv_linecol);
/* Tree-sitter sees: delete at the beginning. */
- treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0);
+ treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0,
+ TREESIT_TS_POINT_1_0, old_end,
+ TREESIT_TS_POINT_1_0);
visible_beg = BUF_BEGV_BYTE (buffer);
+ visi_beg_linecol = buffer_begv_linecol;
eassert (visible_beg <= visible_end);
}
eassert (0 <= visible_beg);
@@ -1108,6 +1401,8 @@ treesit_sync_visible_region (Lisp_Object parser)
XTS_PARSER (parser)->visible_beg = visible_beg;
XTS_PARSER (parser)->visible_end = visible_end;
+ XTS_PARSER (parser)->visi_beg_linecol = visi_beg_linecol;
+ XTS_PARSER (parser)->visi_end_linecol = visi_end_linecol;
/* Fix ranges so that the ranges stays with in visible_end. Here we
try to do minimal work so that the ranges is minimally correct and
@@ -1381,6 +1676,19 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser,
lisp_parser->need_to_gc_buffer = false;
lisp_parser->within_reparse = false;
eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
+
+ struct buffer *old_buf = current_buffer;
+ set_buffer_internal (XBUFFER (buffer));
+
+ /* treesit_linecol_of_pos doesn't signal, so no need to
+ unwind-protect. */
+ lisp_parser->visi_beg_linecol = treesit_linecol_of_pos (BEGV, BEGV_BYTE,
+ TREESIT_BOB_LINECOL);
+ lisp_parser->visi_end_linecol
+ = treesit_linecol_of_pos (ZV, ZV_BYTE, lisp_parser->visi_beg_linecol);
+
+ set_buffer_internal (old_buf);
+
return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
}
@@ -4376,6 +4684,66 @@ nodes in the subtree, including NODE. */)
}
}
+DEFUN ("treesit--linecol-at", Ftreesit__linecol_at,
+ Streesit__linecol_at, 1, 1, 0,
+ doc: /* Test buffer-local linecol cache.
+
+Calculate the line and column at POS using the buffer-local cache,
+return the line and column in the form of
+
+ (LINE . COL)
+
+This is used for testing and debugging only. */)
+ (Lisp_Object pos)
+{
+ CHECK_NUMBER (pos);
+ struct ts_linecol pos_linecol
+ = treesit_linecol_of_pos (XFIXNUM (pos), CHAR_TO_BYTE (XFIXNUM (pos)),
+ current_buffer->ts_linecol_cache);
+ return Fcons (make_fixnum (pos_linecol.line), make_fixnum (pos_linecol.col));
+}
+
+DEFUN ("treesit--linecol-cache-set", Ftreesit__linecol_cache_set,
+ Streesit__linecol_cache_set, 4, 4, 0,
+ doc: /* Set the linecol cache for the current buffer.
+
+This is used for testing and debugging only. */)
+ (Lisp_Object line, Lisp_Object col, Lisp_Object pos, Lisp_Object bytepos)
+{
+ CHECK_FIXNUM (line);
+ CHECK_FIXNUM (col);
+ CHECK_FIXNUM (pos);
+ CHECK_FIXNUM (bytepos);
+
+ current_buffer->ts_linecol_cache.line = XFIXNUM (line);
+ current_buffer->ts_linecol_cache.col = XFIXNUM (col);
+ current_buffer->ts_linecol_cache.pos = XFIXNUM (pos);
+ current_buffer->ts_linecol_cache.byte_pos = XFIXNUM (bytepos);
+
+ return Qnil;
+}
+
+DEFUN ("treesit--linecol-cache", Ftreesit__linecol_cache,
+ Streesit__linecol_cache, 0, 0, 0,
+ doc: /* Return the buffer-local linecol cache for debugging.
+
+Return a plist (:line LINE :col COL :pos POS :bytepos BYTEPOS). This is
+used for testing and debugging only. */)
+ (void)
+{
+ struct ts_linecol cache = current_buffer->ts_linecol_cache;
+
+ Lisp_Object plist = (list4 (QCpos, make_fixnum (cache.pos),
+ QCbytepos, make_fixnum (cache.byte_pos)));
+ plist = Fcons (make_fixnum (cache.col), plist);
+ plist = Fcons (QCcol, plist);
+ plist = Fcons (make_fixnum (cache.line), plist);
+ plist = Fcons (QCline, plist);
+
+ return plist;
+}
+
+
#endif /* HAVE_TREE_SITTER */
DEFUN ("treesit-available-p", Ftreesit_available_p,
@@ -4418,6 +4786,11 @@ syms_of_treesit (void)
DEFSYM (QCequal, ":equal");
DEFSYM (QCmatch, ":match");
DEFSYM (QCpred, ":pred");
+ DEFSYM (QCline, ":line");
+ DEFSYM (QCcol, ":col");
+ DEFSYM (QCpos, ":pos");
+ DEFSYM (QCbytepos, ":bytepos");
+
DEFSYM (Qnot_found, "not-found");
DEFSYM (Qsymbol_error, "symbol-error");
@@ -4649,6 +5022,10 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B instead. */);
defsubr (&Streesit_induce_sparse_tree);
defsubr (&Streesit_node_match_p);
defsubr (&Streesit_subtree_stat);
+
+ defsubr (&Streesit__linecol_at);
+ defsubr (&Streesit__linecol_cache);
+ defsubr (&Streesit__linecol_cache_set);
#endif /* HAVE_TREE_SITTER */
defsubr (&Streesit_available_p);
#ifdef WINDOWSNT
diff --git a/src/treesit.h b/src/treesit.h
index 0d4635f..b6a3f3a 100644
--- a/src/treesit.h
+++ b/src/treesit.h
@@ -26,6 +26,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include <tree_sitter/api.h>
#include "lisp.h"
+#include "buffer.h"
INLINE_HEADER_BEGIN
@@ -97,6 +98,14 @@ struct Lisp_TS_Parser
(ref:visible-beg-null) in treesit.c for more explanation. */
ptrdiff_t visible_beg;
ptrdiff_t visible_end;
+ /* Caches the line and column number of VISIBLE_BEG. It's always
+ valid and matches VISIBLE_BEG (because it's updated at each buffer
+ edit). (It has to be, because in treesit_record_change, we need to
+ calculate the line/col offset of old_end_linecol, the exact reason
+ why is left as an exercise to the reader.) */
+ struct ts_linecol visi_beg_linecol;
+ /* Similar to VISI_BEG_LINECOL but caches VISIBLE_END. */
+ struct ts_linecol visi_end_linecol;
/* This counter is incremented every time a change is made to the
buffer in treesit_record_change. The node retrieved from this parser
inherits this timestamp. This way we can make sure the node is
@@ -222,7 +231,19 @@ CHECK_TS_COMPILED_QUERY (Lisp_Object query)
INLINE_HEADER_END
-extern void treesit_record_change (ptrdiff_t, ptrdiff_t, ptrdiff_t);
+/* A linecol_cache that points to BOB, this is always valid. */
+static const struct ts_linecol TREESIT_BOB_LINECOL = { 1, 1, 1, 0 };
+/* An uninitialized linecol. */
+static const struct ts_linecol TREESIT_EMPTY_LINECOL = { 0, 0, 0, 0 };
+static const TSPoint TREESIT_TS_POINT_1_0 = { 1, 0 };
+
+extern struct ts_linecol linecol_offset (struct ts_linecol,
+ struct ts_linecol);
+extern struct ts_linecol treesit_linecol_maybe (ptrdiff_t, ptrdiff_t,
+ struct ts_linecol);
+extern void treesit_record_change (ptrdiff_t, ptrdiff_t, ptrdiff_t,
+ struct ts_linecol, struct ts_linecol,
+ ptrdiff_t);
extern Lisp_Object make_treesit_parser (Lisp_Object, TSParser *, TSTree *,
Lisp_Object, Lisp_Object);
extern Lisp_Object make_treesit_node (Lisp_Object, TSNode);
diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el
index caacb74..11d5e1b 100644
--- a/test/src/treesit-tests.el
+++ b/test/src/treesit-tests.el
@@ -224,6 +224,51 @@
(kill-buffer base)
(kill-buffer indirect))))
+;;; Linecol
+
+(ert-deftest treesit-linecol-basic ()
+ "Tests for basic lincol synchronization."
+ (with-temp-buffer
+ (should (equal (treesit--linecol-cache)
+ '(:line 1 :col 0 :pos 1 :bytepos 1)))
+ (should (equal (treesit--linecol-at (point))
+ '(1 . 0)))
+ (insert "\n")
+ ;; Buffer content: a single newline.
+ (should (equal (treesit--linecol-at (point))
+ '(2 . 0)))
+
+ (treesit--linecol-cache-set 2 0 2 2)
+ (should (equal (treesit--linecol-cache)
+ '(:line 2 :col 0 :pos 2 :bytepos 2)))
+
+ (goto-char (point-min))
+ (should (equal (treesit--linecol-at (point))
+ '(1 . 0)))
+
+ (insert "0123456789")
+ ;; Buffer content: ten chars followed by a newline.
+ (treesit--linecol-cache-set 1 0 1 1)
+ (should (equal (treesit--linecol-at (point))
+ '(1 . 10)))
+
+ (goto-char (point-max))
+ (should (equal (treesit--linecol-at (point))
+ '(2 . 0)))
+
+ (treesit--linecol-cache-set 1 5 6 6)
+ (should (equal (treesit--linecol-at (point))
+ '(2 . 0)))
+
+ (treesit--linecol-cache-set 2 0 12 12)
+ ;; Position 6 is in the middle of the first line.
+ (should (equal (treesit--linecol-at 6)
+ '(1 . 5)))
+ ;; Position 11 is at the end of the line.
+ (should (equal (treesit--linecol-at 11)
+ '(1 . 10)))
+ ))
+
;;; Tree traversal
(ert-deftest treesit-search-subtree ()