function string unicode_array_to_utf8(long uni%(), long mode_bits) ! !================================================================================================== ! title : unicode_array_to_utf8.fun ! history: ! ver who when what ! --- --- -------- -------------------------------------------------------------------------------- ! 100 NSR 20170315 1. original effort (unicode_to_utf8.fun) ! 107 NSR 20200107 1. derived from unicode_to_utf8.fun which was inside: ! MYSQL_IMPORT_HELPER_BASIC_TEMPLATE_107_PART3.BAS ! 2. added some debugging !================================================================================================== ! UTF-8 encoding ! 1. RFC-2279: http://www.faqs.org/rfcs/rfc2279.html ! 2. RFC-3629: https://tools.ietf.org/html/rfc3629 (limits UTF-8 to 4 octets; some code points in ! the 21-bit address space are not being used (notice the 'z' on line 4)) ! ! UCS-4 range (hex) UTF-8 octet sequence (binary) Data Bits ! ------------------- ----------------------------- --------- ! 0000,0000-0000,007F 0xxxxxxx 7 bits ! 0000,0080-0000,07FF 110xxxxx 10xxxxxx 11 bits ! 0000,0800-0000,FFFF 1110xxxx 10xxxxxx 10xxxxxx 16 bits ! 0001,0000-001F,FFFF 11110zXX 10xxxxxx 10xxxxxx 10xxxxxx 21 bits (RFC limit) ! 0020,0000-03FF,FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 26 bits (invalid) ! 0400,0000-7FFF,FFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 31 bits (invalid) !================================================================================================== option type=explicit ! ! declare string out$, temp$ ! declare long uni%, temp%, alt%, i%, j%, k%, bytes%, bits% ! !----------------------------------------------------------------------- ! main !----------------------------------------------------------------------- out$ = "" ! k% = uni%(0) ! data length is stored here for i% = 1 to k% ! scan the string uni% = uni%(i%) ! grab some unicode from the array select uni% ! case 0 to x"0007F" ! bytes% = 1 ! case x"00080" to x"007FF" ! bytes% = 2 ! case x"00800" to x"0FFFF" ! bytes% = 3 ! case x"10000" to x"10FFFF" ! bytes% = 4 ! case else ! if (mode_bits and 4%) = 4 then ! print "-d-UNICODE_ARRAY_TO_utf8: ";i%;uni% ! end if ! goto next_code_point ! throw away anything else end select ! ! temp$ = "" ! zap temp% = bytes% ! copy desired byte count while temp% > 0 ! if temp% = 1 then ! if on last one select bytes% ! case 1 ! bits% = uni% ! no encoding required case 2 ! bits% = b"11000000" or uni% ! case 3 ! bits% = b"11100000" or uni% ! case 4 ! bits% = b"11110000" or uni% ! end select ! else ! else not on last one bits% = b"10000000" or (uni% and x"3f") ! only use the lowest 6-bits uni% = uni% / 64 ! shift data by six bits end if ! temp$ = chr$(bits%) + temp$ ! append from the left temp% = temp% -1 ! next ! out$ = out$ + temp$ ! ! next_code_point: next i% ! unicode_array_to_utf8 = out$ ! pass string back end function ! adios !
Back to OpenVMS
Back to OpenVMS Demo Index
Back to Home
Neil Rieck
Kitchener - Waterloo - Cambridge, Ontario, Canada.