~duangle/scopes#2: 
[PATCH] string-interpolation.sc

# HG changeset patch
# User Westerbly Snaydley <westerbly@gmail.com>
# Date 1597276697 18000
#      Wed Aug 12 18:58:17 2020 -0500
# Node ID f0f2584a4dc0137865abec0f1e4540e09d893e55
# Parent  8b3c98d57d7b4111d1db9a8878ff935f299ab705
added UTF-8 pattern support for the interpolation markers

diff -r 8b3c98d57d7b -r f0f2584a4dc0 lib/scopes/string-interpolation.sc
--- a/lib/scopes/string-interpolation.sc	Wed Aug 12 02:44:54 2020 -0500
+++ b/lib/scopes/string-interpolation.sc	Wed Aug 12 18:58:17 2020 -0500
@@ -10,31 +10,42 @@
     expressions inside them.
 
 spice escape-pattern (str)
-    # because patterns are expected to be very small (1-3 characters),
-    # it will be easier and probably even faster to just prepend a \ to every char.
+    import UTF-8
+    using import itertools
+
+    str as:= string
     let new-pattern =
         # TODO: support UTF-8
-        fold (result = "") for c in (str as string)
-            local c = c
-            switch c # characters in "[]\\^$.|?*+(){}"
-            pass 91:i8
-            pass 93:i8
-            pass 92:i8
-            pass 94:i8
-            pass 36:i8
-            pass 46:i8
-            pass 124:i8
-            pass 63:i8
-            pass 42:i8
-            pass 43:i8
-            pass 40:i8
-            pass 41:i8
-            pass 123:i8
-            pass 125:i8
-            do
-                .. result "\\" (string &c 1:usize)
-            default
-                .. result (string &c 1:usize)
+        ->> str UTF-8.decoder
+            retain
+                inline (c)
+                    switch c
+                    pass (UTF-8.char "[")
+                    pass (UTF-8.char "]")
+                    pass (UTF-8.char "\\")
+                    pass (UTF-8.char "^")
+                    pass (UTF-8.char "$")
+                    pass (UTF-8.char ".")
+                    pass (UTF-8.char "|")
+                    pass (UTF-8.char "?")
+                    pass (UTF-8.char "*")
+                    pass (UTF-8.char "+")
+                    pass (UTF-8.char "(")
+                    pass (UTF-8.char ")")
+                    pass (UTF-8.char "{")
+                    pass (UTF-8.char "}")
+                    do
+                        UTF-8.char "\\"
+                    default
+                        -1
+                map
+                    inline (code-point)
+                        code-point
+            flatten
+            filter
+                (x) -> (x != -1)
+            UTF-8.encoder
+            string.collector 256
     `[new-pattern]
 
 run-stage;
diff -r 8b3c98d57d7b -r f0f2584a4dc0 testing/test_string_interpolation.sc
--- a/testing/test_string_interpolation.sc	Wed Aug 12 02:44:54 2020 -0500
+++ b/testing/test_string_interpolation.sc	Wed Aug 12 18:58:17 2020 -0500
@@ -34,3 +34,12 @@
     ==
         fAlt"this is not a very smart interpolation marker abravarargs...cadabra"
         "this is not a very smart interpolation marker 1 2 3 4"
+
+let prefix:f🤔 = (gen-interpolation-macro "🤔" "🤔")
+run-stage;
+let encoding = "UTF-8"
+test
+    ==
+        f🤔"we also support 🤔encoding🤔 markers!"
+        "we also support UTF-8 markers!"
+print f🤔"we also support 🤔encoding🤔 markers!"
# HG changeset patch
# User Westerbly Snaydley <westerbly@gmail.com>
# Date 1597869862 18000
#      Wed Aug 19 15:44:22 2020 -0500
# Node ID 7e6b1b843bcdda5f328a7752edca6230ddab8d8b
# Parent  f0f2584a4dc0137865abec0f1e4540e09d893e55
small adjustments to string-interpolation and its test

diff -r f0f2584a4dc0 -r 7e6b1b843bcd lib/scopes/string-interpolation.sc
--- a/lib/scopes/string-interpolation.sc	Wed Aug 12 18:58:17 2020 -0500
+++ b/lib/scopes/string-interpolation.sc	Wed Aug 19 15:44:22 2020 -0500
@@ -15,7 +15,6 @@
 
     str as:= string
     let new-pattern =
-        # TODO: support UTF-8
         ->> str UTF-8.decoder
             retain
                 inline (c)
diff -r f0f2584a4dc0 -r 7e6b1b843bcd testing/test_string_interpolation.sc
--- a/testing/test_string_interpolation.sc	Wed Aug 12 18:58:17 2020 -0500
+++ b/testing/test_string_interpolation.sc	Wed Aug 19 15:44:22 2020 -0500
@@ -42,4 +42,3 @@
     ==
         f🤔"we also support 🤔encoding🤔 markers!"
         "we also support UTF-8 markers!"
-print f🤔"we also support 🤔encoding🤔 markers!"
# HG changeset patch
# User Westerbly Snaydley <westerbly@gmail.com>
# Date 1597928486 18000
#      Thu Aug 20 08:01:26 2020 -0500
# Node ID 0e745ccc1a1662fa106474ad9af8b84dfb81e34f
# Parent  7e6b1b843bcdda5f328a7752edca6230ddab8d8b
added a check for illegal UTF-8 bytes in the regexp escape

diff -r 7e6b1b843bcd -r 0e745ccc1a16 lib/scopes/string-interpolation.sc
--- a/lib/scopes/string-interpolation.sc	Wed Aug 19 15:44:22 2020 -0500
+++ b/lib/scopes/string-interpolation.sc	Thu Aug 20 08:01:26 2020 -0500
@@ -39,6 +39,8 @@
                         -1
                 map
                     inline (code-point)
+                        if (code-point < 0)
+                            error "illegal byte in UTF-8 stream"
                         code-point
             flatten
             filter
# HG changeset patch
# User Westerbly Snaydley <westerbly@gmail.com>
# Date 1597218294 18000
#      Wed Aug 12 02:44:54 2020 -0500
# Node ID 8b3c98d57d7b4111d1db9a8878ff935f299ab705
# Parent  7173c4a4d444208b80a284be9f52fbff43cb1307
First version of string-interpolation.sc for evaluation

diff -r 7173c4a4d444 -r 8b3c98d57d7b lib/scopes/string-interpolation.sc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/scopes/string-interpolation.sc	Wed Aug 12 02:44:54 2020 -0500
@@ -0,0 +1,90 @@
+#
+    The Scopes Compiler Infrastructure
+    This file is distributed under the MIT License.
+    See LICENSE.md for details.
+
+""""string-interpolation
+    ====
+
+    Implements especially formatted strings that can reference variables and
+    expressions inside them.
+
+spice escape-pattern (str)
+    # because patterns are expected to be very small (1-3 characters),
+    # it will be easier and probably even faster to just prepend a \ to every char.
+    let new-pattern =
+        # TODO: support UTF-8
+        fold (result = "") for c in (str as string)
+            local c = c
+            switch c # characters in "[]\\^$.|?*+(){}"
+            pass 91:i8
+            pass 93:i8
+            pass 92:i8
+            pass 94:i8
+            pass 36:i8
+            pass 46:i8
+            pass 124:i8
+            pass 63:i8
+            pass 42:i8
+            pass 43:i8
+            pass 40:i8
+            pass 41:i8
+            pass 123:i8
+            pass 125:i8
+            do
+                .. result "\\" (string &c 1:usize)
+            default
+                .. result (string &c 1:usize)
+    `[new-pattern]
+
+run-stage;
+
+inline gen-interpolation-macro (begin-pattern end-pattern)
+    if ((begin-pattern == "") or (end-pattern == ""))
+        error "interpolation markers must be at least one character long"
+    fn any->string (values...)
+        inline convert (value)
+            static-if ((typeof value) == string)
+                value
+            else
+                tostring value
+
+        let first rest = (va-split 1 values...)
+        va-lfold (convert (first))
+            inline (__ v result)
+                .. result " " (convert v)
+            (rest)
+    let pattern =
+        ..
+            escape-pattern begin-pattern
+            ".*?"
+            escape-pattern end-pattern
+    sugar interpolate (str)
+        str as:= string
+        let chunks =
+            loop (str chunks = str '())
+                let match? start end = ('match? pattern str)
+                if (not match?)
+                    break ('reverse (cons str chunks))
+                let lhs = (lslice str start)
+                let code =
+                    slice str
+                        start + (countof begin-pattern)
+                        end - (countof end-pattern)
+                let parsed-code = (sc_parse_from_string code)
+                _
+                    rslice str end
+                    cons
+                        cons (qq [any->string]) (parsed-code as list)
+                        cons lhs chunks
+
+        if ((countof chunks) > 1)
+            cons (qq [..]) chunks
+        else
+            cons (qq [embed]) chunks
+
+
+do
+    let gen-interpolation-macro
+    let prefix:f = (gen-interpolation-macro "{" "}")
+    locals;
diff -r 7173c4a4d444 -r 8b3c98d57d7b testing/test_string_interpolation.sc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/testing/test_string_interpolation.sc	Wed Aug 12 02:44:54 2020 -0500
@@ -0,0 +1,36 @@
+using import string-interpolation
+using import testing
+
+let ABC = 123
+let CDE = 345
+let str = "banana"
+test
+    ==
+        f"ABC is {ABC}, CDE is {CDE}, and the sum is {(+ ABC CDE)}. This other string is {str}"
+        "ABC is 123, CDE is 345, and the sum is 468. This other string is banana"
+
+test
+    ==
+        f"there is nothing to interpolate here, so the string should remain unchanged."
+        "there is nothing to interpolate here, so the string should remain unchanged."
+
+test
+    ==
+        f"{ an expression start token without a corresponding end is ignored"
+        "{ an expression start token without a corresponding end is ignored"
+
+test-compiler-error
+    f"however { if there's an end token at some point, the first start token is considered. {}"
+
+let varargs... = 1 2 3 4
+test
+    ==
+        f"some ... {varargs...} for you!"
+        "some ... 1 2 3 4 for you!"
+
+let prefix:fAlt = (gen-interpolation-macro "abra" "cadabra")
+run-stage;
+test
+    ==
+        fAlt"this is not a very smart interpolation marker abravarargs...cadabra"
+        "this is not a very smart interpolation marker 1 2 3 4"
Status
REPORTED
Submitter
~radgerayden
Assigned to
No-one
Submitted
4 years ago
Updated
4 years ago
Labels
No labels applied.