Skip to content

Commit c1eecbc

Browse files
committed
Fix deverbosification of strings
- Make --disablecompression work - Support strings containing "⸿" or "¶" - Don't quote strings containing "\r", "\n" or "”" - Don't quote strings only containing multiple extended characters - Properly escape strings containing "´" - Count ISO-8859-1 extended characters correctly
1 parent a1e03af commit c1eecbc

File tree

5 files changed

+29
-31
lines changed

5 files changed

+29
-31
lines changed

‎charcoal.py‎

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
UnicodeLookup, ReverseLookup, UnicodeCommands, InCodepage, sOperator,
2121
rCommand
2222
)
23-
from compression import Decompressed, Compressed
23+
from compression import Decompressed, Escaped
2424
from wolfram import *
2525
from extras import *
2626
from enum import Enum
@@ -5017,9 +5017,7 @@ def print_xxd(data):
50175017
del raw_file_output
50185018
del file_output
50195019
if argv.disablecompression:
5020-
StringifierProcessor[CT.String][0] = lambda result: [re.sub(
5021-
"\n", "¶", rCommand.sub(r"´\1", result[0])
5022-
)]
5020+
StringifierProcessor[CT.String][0] = lambda r: [("s", Escaped(r[0])), ("!", "s")]
50235021
if argv.verbose or argv.deverbosify:
50245022
code = ParseExpression(
50255023
code, grammars=VerboseGrammars, processor=StringifierProcessor,
@@ -5057,7 +5055,7 @@ def charcoal_length(character):
50575055
return 5
50585056
length = 0
50595057
for character in code:
5060-
if InCodepage(character) or ord(character) < 256:
5058+
if InCodepage(character):
50615059
length += 1
50625060
else:
50635061
length += charcoal_length(

‎codepage.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ def InCodepage(character):
8383
UnicodeCommands = "ABCDEFGHIJKLMNOPQRSTUVWXYZ\
8484
⁰¹²³⁴⁵⁶⁷⁸⁹αβγδεζηθικλμνξπρσςτυφχψω⟦⟧⦃⦄«»⁺⁻×÷∕﹪∧∨¬⁼‹›&|~←↑→↓↖↗↘↙\
8585
↶↷⟲¿‽‖·¤¦“”⎚…§⎇↥↧⌊⌈±⊞⊟➙⧴″‴?⪫⪪⌕℅◧◨⮌≡№≔≕▷▶✂⊙⬤✳�≦≧ⅈⅉ;ΣΠ⊕⊖⊗⊘⭆↨⍘₂↔Φ"
86-
UnicodeCommandRegex = "A-Z⁰¹²³⁴-⁹α-ξπ-ω⟦⟧⦃⦄«»⁺⁻×÷∕﹪∧∨¬⁼‹›&|~←-↓↖-↙\
87-
↶↷⟲¿‽‖·¤¦“”⎚…§⎇↥↧⌊⌈±⊞⊟➙⧴″‴?⪫⪪⌕℅◧◨⮌≡№≔≕▷▶✂⊙⬤✳�≦≧ⅈⅉ;ΣΠ⊕⊖⊗⊘⭆↨⍘₂↔Φ"
86+
UnicodeCommandRegex = "A-Z⸿¶⁰¹²³⁴-⁹α-ξπ-ω⟦⟧⦃⦄«»⁺⁻×÷∕﹪∧∨¬⁼‹›&|~←-↓↖-↙\
87+
↶↷⟲¿‽‖´·¤¦“”⎚…§⎇↥↧⌊⌈±⊞⊟➙⧴″‴?⪫⪪⌕℅◧◨⮌≡№≔≕▷▶✂⊙⬤✳�≦≧ⅈⅉ;ΣΠ⊕⊖⊗⊘⭆↨⍘₂↔Φ"
8888
sCommand = "[%s]" % UnicodeCommandRegex
8989
sOperator = """\
9090
[SN‽¬I‽V⊟➙⧴″‴↧↥⌊⌈℅⮌⁺⁻×÷∕﹪⁼‹›&|~…X§?⪫⪪⌕◧◨№⎇E∧∨▷≕✂⊙⬤ⅈⅉ;ΣΠ⊕⊖⊗⊘⭆↨⍘₂↔Φ]\

‎compression.py‎

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,20 @@
3030
LZMA_ENCODING = 125
3131

3232

33-
def Compressed(string, escape=False):
33+
def Escaped(string):
34+
"""
35+
Escaped(string) -> str
36+
Returns a Charcoal escaped literal of the given string.
37+
38+
"""
39+
if not string:
40+
return "””"
41+
string = rCommand.sub(\\1", string)
42+
if string[0] in "+X*|-\\/<>^KLTVY7¬":
43+
string = "´" + string
44+
return re.sub("\r", "⸿", re.sub("\n", "¶", string))
45+
46+
def Compressed(string):
3447
"""
3548
Compressed(string) -> str
3649
Returns the shortest Charcoal compressed literal of the given string.
@@ -39,42 +52,29 @@ def Compressed(string, escape=False):
3952
if not string:
4053
return "””"
4154
if not all(
42-
character == "" or character == "⸿" or
55+
character == "\n" or character == "\r" or
4356
character >= " " and character <= "~"
4457
for character in string
4558
):
46-
if not escape:
47-
return string
48-
if len(re.findall("[^ -~¶⸿]", string)) > 3:
49-
return "”" + Codepage[RAW_ENCODING] + string + "”"
50-
return (
51-
"´" * (string[0] in "+X*|-\\/<>^KLTVY7¬") +
52-
rCommand.sub(\\1", string)
53-
)
54-
original_string, string = string, re.sub(
55-
"¶", "\n", re.sub("⸿", "\r", string)
56-
)
59+
if "\n" in string or "\r" in string or "”" in string:
60+
return Escaped(string)
61+
if len(rCommand.findall(string)) < 3:
62+
return Escaped(string)
63+
return "”" + Codepage[RAW_ENCODING] + string + "”"
5764
compressed_charset = CompressCharset(string)
5865
compressed_rle = CompressRLE(string)
5966
compressed_brotli = CompressBrotli(string)
6067
compressed_lzma = CompressLZMA(string)
6168
compressed_permuted = CompressPermutations(string)
6269
compressed = CompressString(string)
63-
string_length = len(original_string) - 2
70+
string_length = len(string) - 2
6471
minimum_length = min(
6572
len(compressed_charset), len(compressed_rle), len(compressed_brotli),
6673
len(compressed_lzma), len(compressed_permuted), len(compressed),
6774
string_length
6875
)
6976
if string_length == minimum_length:
70-
if not escape:
71-
return original_string
72-
if len(re.findall("[^ -~¶⸿]", original_string)) > 3:
73-
return "”" + Codepage[RAW_ENCODING] + original_string + "”"
74-
return (
75-
"´" * (original_string[0] in "+X*|-\\/<>^KLTVY7¬") +
76-
rCommand.sub(\\1", original_string)
77-
)
77+
return Escaped(string)
7878
if len(compressed) == minimum_length:
7979
return "“" + compressed + "”"
8080
if len(compressed_permuted) == minimum_length:

‎stringifierprocessor.py‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from charcoaltoken import CharcoalToken as CT
22
from unicodegrammars import UnicodeGrammars
33
from compression import Compressed
4-
from codepage import rCommand
54
import re
65

76
def string(s):
@@ -14,7 +13,7 @@ def string(s):
1413
if s == " !\"#$%&'()*+,-./0123456789:;<=>?@\
1514
ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~":
1615
return [("v", "γ")]
17-
s = Compressed(s.replace("\n", "¶").replace("\r", "⸿"), True)
16+
s = Compressed(s)
1817
if s[0] != "“" and s[0] != "”":
1918
return [("s", s), ("!", "s")]
2019
return [("c", s)]

‎test.py‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,6 +1797,7 @@ def test_dictionary_verbose(self):
17971797
def test_compression(self):
17981798
self.assertEqual(Run("”y≔⟦³¦²¦¹⟧β▷sβ”"), "≔⟦³¦²¦¹⟧β▷sβ")
17991799
self.assertEqual(Run("”y≔⟦³¦²¦¹⟧β▷sβ"), "≔⟦³¦²¦¹⟧β▷sβ")
1800+
self.assertEqual(Run("Print('´⸿¶')", verbose=True), "´⸿¶")
18001801
self.assertEqual(Run("\
18011802
Print('zzyzyzyzyzyzyzyzzzzzzzzyzyz')", verbose=True), "\
18021803
zzyzyzyzyzyzyzyzzzzzzzzyzyz")

0 commit comments

Comments
 (0)