\version "2.15.39"
           
%#(define strg "asdäöüព្រះជាម្ចាស់")
#(define strg "ä")
%#(define strg "asd")
 
#(define (utf-8-string->wide-char-list str)

   (define (helper counter erg str)
      "Convert a UTF-8 byte string into an list with integer representing the UNICODE character codes"
      (let
       ((mult 1)
        (sum 0))
     
        (let ((numeric (char->integer (string-ref str counter))))
         (if (< numeric #x80)   ;; #x80 -> 128
          (begin ; 7-Bit-ASCII stand alone character
           (if (not (equal? mult 1)) 
            (begin
             (ly:warning "utf-8-string->wide-char-list: UTF-8-string out of sequence!")
             (set! mult 1) (set! sum 0)))
           (set! erg (cons numeric erg)))
          (if (< numeric #xc0)   ;; #xc0 -> 192
           (begin ; 10. ..... = UTF-8 expansion byte
            (set! sum (+ sum (* mult (- numeric #x80))))
            (set! mult (* 64 mult)))
           (if (< numeric #xe0)
            (begin ; 110. .... = UTF-8 start of two byte sequence
             (if (not (equal? mult 64))
              (ly:warning "utf-8-string->wide-char-list: UTF-8-string out of sequence!")
              (begin
               (set! sum (+ sum (* mult (- numeric #xc0))))
               (set! erg (cons sum erg))))
             (set! mult 1) (set! sum 0))
            (if (< numeric #xf0)
             (begin ; 1110 .... = UTF-8 start of three byte sequence
              (if (not (equal? mult 4096))
               (ly:warning "utf-8-string->wide-char-list: UTF-8-string out of sequence!")
               (begin
                (set! sum (+ sum (* mult (- numeric #xe0))))
                (set! erg (cons sum erg))))
              (set! mult 1) (set! sum 0))
             (if (< numeric #xf8)
              (begin ; 1111 0... = UTF-8 start of four byte sequence
               (if (not (equal? mult 262144))
                (ly:warning "utf-8-string->wide-char-list: UTF-8-string out of sequence!")
                (begin
                 (set! sum (+ sum (* mult (- numeric #xf0))))
                 (set! erg (cons sum erg))))
               (set! mult 1) (set! sum 0))
              (begin ; This would be the header of a UTF-8 encoding of an UNICODE character
                     ; with more than 21 bits - this does not exist!
               (ly:warning "utf-8-string->wide-char-list: UTF-8-string out of sequence!")
               (set! mult 1) (set! sum 0))))))))
     (if (= counter 0)
       erg
       (helper (- counter 1) erg str))))

   (helper (- (string-length str) 1) '() str))

#(define unicode-diacritics ; These are the UNICODE ranges of the diacritical symbols, which
                            ; should not be insulated form their predestinating glyph.
                            ; Look at the code charts at www.unicode.org for more information.
                            ; As long as this table is entered manually there is a high risk of errors.
 '(                         ; This list (ascending order) tends to be incomplete
   ; Combining Diacritical Marks
   ( #x0300 . #x036f )
   ; Cyrillic
   ( #x0483 . #x0489 )
   ; Hebrew
   ( #x0591 . #x05bd )
   ( #x05bf . #x05bf )
   ( #x05c1 . #x05c2 )
   ( #x05c4 . #x05c5 )
   ( #x05c7 . #x05c7 )
   ; Arabic
   ( #x0610 . #x061a )
   ( #x064b . #x065f )
   ( #x0670 . #x0670 )
   ( #x06d6 . #x06dc )
   ( #x06df . #x06e4 )
   ( #x06ea . #x06ed )
   ; Syriac
   ( #x0711 . #x0711 )
   ( #x0730 . #x074a )
   ; Thaana
   ( #x07a6 . #x07b0 )
   ; NKo
   ( #x07eb . #x07f3 )
   ; Samaritan
   ( #x0816 . #x0823 )
   ( #x0825 . #x0827 )
   ( #x0829 . #x082d )
   ; Mandaic
   ( #x0859 . #x085b )
   ; Arabic Extended-A
   ( #x08e4 . #x08fe )
   ; Devanagari
   ( #x0900 . #x0903 )
   ( #x093a . #x093c )
   ( #x0934 . #x094f )
   ( #x0951 . #x0957 )
   ( #x0962 . #x0963 )
   ; Bengali
   ( #x0981 . #x0983 )
   ( #x09bc . #x09bc )
   ( #x09be . #x09cd )
   ( #x09d7 . #x09d7 )
   ( #x09e2 . #x09e3 )
   ; Gurmukhi
   ( #x0a01 . #x0a03 )
   ( #x0a3c . #x0a3c )
   ( #x0a3e . #x0a42 )
   ( #x0a47 . #x0a48 )
   ( #x0a4b . #x0a4d )
   ( #x0a51 . #x0a51 )
   ( #x0a70 . #x0a71 )
   ( #x0a75 . #x0a75 )
   ; Gujarati
   ( #x0a81 . #x0a83 )
   ( #x0abc . #x0abc )
   ( #x0abe . #x0acd )
   ( #x0ae2 . #x0ae4 )
   ; Oriya
   ( #x0b01 . #x0b03 )
   ( #x0b3c . #x0b3c )
   ( #x0b3e . #x0b56 )
   ( #x0b62 . #x0b64 )
   ; Tamil
   ( #x0b82 . #x0b82 )
   ( #x0bb4 . #x0bcd )
   ( #x0bd7 . #x0bd7 )
   ; Telugu
   ( #x0c01 . #x0c03 )
   ( #x0c3e . #x0c56 )
   ( #x0c62 . #x0c63 )
   ; Kannada
   ( #x0c82 . #x0c83 )
   ( #x0cbc . #x0cd6 )
   ( #x0ce2 . #x0ce3 )
   ; Malayalam
   ( #x0d02 . #x0d03 )
   ( #x0d3e . #x0d4d )
   ( #x0d57 . #x0d57 )
   ( #x0d62 . #x0d63 )
   ; Sinhala
   ( #x0d82 . #x0d83 )
   ( #x0dca . #x0df3 )
   ; Thai
   ( #x0e31 . #x0e31 )
   ( #x0e34 . #x0e3a )
   ( #x0e47 . #x0e4e )
   ; Lao
   ( #x0eb1 . #x0eb1 )
   ( #x0eb4 . #x0ebc )
   ( #x0ec8 . #x0ecd )
   ; Tibetan
   ( #x0f18 . #x0f19 )
   ( #x0f35 . #x0f35 )
   ( #x0f37 . #x0f37 )
   ( #x0f39 . #x0f39 )
   ( #x0f3e . #x0fef )
   ( #x0f71 . #x0f84 )
   ( #x0f86 . #x0f87 )
   ( #x0f8d . #x0fbc )
   ; Myanmar
   ( #x102b . #x1039 )
   ( #x103a . #x103e )
   ( #x1056 . #x1059 )
   ( #x105e . #x1060 )
   ( #x1062 . #x1064 )
   ( #x1067 . #x106d )
   ( #x1071 . #x1074 )
   ( #x1082 . #x108d )
   ( #x108f . #x108f )
   ( #x109a . #x109d )
   ; Tagalog
   ( #x1712 . #x1714 )
   ; Hanunoo
   ( #x1732 . #x1734 )
   ; Buhid
   ( #x1752 . #x1753 )
   ; Tagbanwa
   ( #x1772 . #x1773 )
   ; Khmer
   ( #x17b6 . #x17d1 )
   ( #x17d3 . #x17d3 )
   ( #x17dd . #x17dd )
   ; Limbu
   ( #x1920 . #x193b )
   ; New Tai Lue
   ( #x19b0 . #x19c0 )
   ( #x19c8 . #x19c9 )
   ; Buginese
   ( #x1a17 . #x1a1b )
   ; Tai Tahm
   ( #x1a55 . #x1a7f )
   ; Balinese
   ( #x1b00 . #x1b04 )
   ( #x1b34 . #x1b44 )
   ( #x1b6b . #x1b73 )
   ; Sundanese
   ( #x1b80 . #x1b82 )
   ( #x1ba1 . #x1baa )
   ( #x1bac . #x1bad )
   ; Batak
   ( #x1be6 . #x1bf3 )
   ; Lepcha
   ( #x1c24 . #x1c37 )
   ; Vedic Extensions
   ( #x1cd0 . #x1ce8 )
   ( #x1ced . #x1ced )
   ( #x1cf2 . #x1cf4 )
   ; Combining Diacritical Marks Supplement
   ( #x1dc0 . #x1dff )
   ; Combining Diacritical Marks for Symbols
   ( #x20d0 . #x20ff )
   ; Coptic
   ( #x2cef . #x2cf1 )
   ; Cyrillic Extended-A
   ( #x2d40 . #x2dff )
   ; Hiragana
   ( #x3099 . #x309a )
   ; Cyrillic Extended-B
   ( #xa66f . #xa67d )
   ( #xa69f . #xa69f )
   ; Syloti Nagri
   ( #xa802 . #xa802 )
   ( #xa806 . #xa806 )
   ( #xa80b . #xa80b )
   ( #xa823 . #xa827 )
   ; Saurashtra
   ( #xa880 . #xa881 )
   ( #xa8b4 . #xa8c4 )
   ; Devanagari Extended
   ( #xa8e0 . #xa8f1 )
   ; Kayah Li
   ( #xa926 . #xa92d )
   ; Rejang
   ( #xa947 . #xa953 )
   ; Javanese
   ( #xa980 . #xa983 )
   ( #xa9b3 . #xa9c0 )
   ; Cham
   ( #xaa29 . #xaa36 )
   ( #xaa43 . #xaa43 )
   ( #xaa4c . #xaa4d )
   ; Myanmar Extended-A
   ( #xaa7b . #xaa7b )
   ; Tai Viet
   ( #xaab0 . #xaab0 )
   ( #xaab2 . #xaab4 )
   ( #xaabe . #xaabf )
   ( #xaac1 . #xaac1 )
   ; Meetei Mayek Extensions
   ( #xaaeb . #xaaef )
   ( #xaaf5 . #xaaf5 )
   ; Meetei Mayek
   ( #xabe3 . #xabed )
   ; Alphabetic Presentation Forms
   ( #xfb1e . #xfb1e )
   ; Combining Half Marks
   ( #xfe20 . #xfe26 )
   ; Kharoshthi
   ( #x10a01 . #x10a0f )
   ( #x10a38 . #x10a3a )
   ; Brahmi
   ( #x11000 . #x11002 )
   ( #x11038 . #x11046 )
   ; Kaithi
   ( #x11080 . #x11082 )
   ( #x110b0 . #x110ba )
   ; Chakma
   ( #x11100 . #x11102 )
   ( #x11127 . #x11132 )
   ( #x11134 . #x11134 )
   ; Sharada
   ( #x11180 . #x11182 )
   ( #x111b3 . #x111c0 )
   ; Takri
   ( #x116ab . #x116b7 )
   ; Miao
   ( #x16f51 . #x16f7e )
   ; Musical Symbols
   ( #x1d165 . #x1d168 )
   ( #x1d16d . #x1d172 )
   ( #x1d17b . #x1d182 )
   ( #x1d185 . #x1d18b )
   ( #x1d1aa . #x1d1ad )
   ; Ancient Greek Musical Notation
   ( #x1d242 . #x1d244 )
   ;;; I hope, I did not make any mistake while typing this table
 ))


#(define (int-in-regions num reglist)
  (if (not (pair? reglist)) #f
   (if (not (pair? (car reglist))) #f
    (if (< num (caar reglist)) #f
     (if (<= num (cdar reglist)) #t
      (int-in-regions num (cdr reglist)))))))

#(define (wide-char-diacritic? codepoint)
 ; does this UNICODE codepoint refer to a diacrity modifyer?
  (int-in-regions codepoint unicode-diacritics))


#(define (wide-char-list->utf-8-glyphlist wcl)
 ; convert a list of UNICODE codepoint integers
 ; into a list of one character UTF-8 strings,
 ; but do not separate the combining diacritical modifyers
 ; (append them to the character strings)
  (let ((erg '()) (add-to-current #f))
   (for-each (lambda (u)
     (let ((us (if (eq? u 0) "\0" (ly:wide-char->utf-8 u))))
      (if add-to-current
       (set-car! erg (string-append us (car erg)))
       (set! erg (cons us erg)))
      (set! add-to-current (wide-char-diacritic? u)))) 
    (reverse wcl))
   erg))

#(define (wide-char-list->utf-8 wcl)
 ; Helper function to define utf-8 strings with a few special characters in it
  (let ((erg ""))
   (for-each (lambda (x)
     (if (string? x)
      (set! erg (string-append erg x))
      (if (integer? x)
       (if (eq? x 0)
        (set! erg (string-append erg "\0"))
        (set! erg (string-append erg (ly:wide-char->utf-8 x))))
       (if (list? x)
        (set! erg (string-append erg (wide-char-list->utf-8 x)))))))
    wcl)
   erg))

#(define (string->string-list strg)
  (wide-char-list->utf-8-glyphlist (utf-8-string->wide-char-list strg)))
  
#(define (string-list->string lst)
  (wide-char-list->utf-8 lst))
  
% testing
       
#(let* ((l1 (utf-8-string->wide-char-list strg))
        (l2 (wide-char-list->utf-8-glyphlist l1))
        (new-strg (wide-char-list->utf-8 l1))
        (strg? (string? new-strg)))
  (newline)
  (newline)(display "\"The string\"__")(display strg)
  (newline)(display "\"List of integers, \n  representing the UNICODE character codes\"__")(display l1)
  (newline)(display "\"List of one character UTF-8 strings\"__")(display l2)
  (newline)(display "\"Back to string\"__")(display new-strg)
  (newline)(display "\"string?\"__")(display strg?))
  
#(let* ((strg-lst (string->string-list strg))
        (new-strg (string-list->string strg-lst))
        (strg? (string? new-strg)))
     (newline)
     (newline)(display "\"The string\"__")(display strg)
     (newline)(display "\"The string-list\"__")(display strg-lst)
     (newline)(display "\"Back to string\"__")(display new-strg)
     (newline)(display "\"string?\"__")(display strg?)
     (newline))
     
\markup \strg

