Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ This repository provides packages to use the

The Haskell data structures are generated programmatically from the UCD files.
The latest Unicode version supported by these libraries is
[`16.0.0`](https://www.unicode.org/versions/Unicode16.0.0/).
[`17.0.0`](https://www.unicode.org/versions/Unicode17.0.0/).

### `unicode-data`

Expand Down
20 changes: 20 additions & 0 deletions experimental/icu/cbits/icu.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,26 @@ bool __hs_u_hasBinaryProperty(UChar32 c, UProperty which) {
return u_hasBinaryProperty(c, which);
}

bool __hs_u_islower(UChar32 c) {
return u_islower(c);
}

bool __hs_u_isupper(UChar32 c) {
return u_isupper(c);
}

UChar32 __hs_u_tolower(UChar32 c) {
return u_tolower(c);
}

UChar32 __hs_u_toupper(UChar32 c) {
return u_toupper(c);
}

UChar32 __hs_u_istitle(UChar32 c) {
return u_istitle(c);
}

/*******************************************************************************
* Names
******************************************************************************/
Expand Down
10 changes: 10 additions & 0 deletions experimental/icu/cbits/icu.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ void __hs_u_getUnicodeVersion(UVersionInfo versionArray);

bool __hs_u_hasBinaryProperty(UChar32 c, UProperty which);

bool __hs_u_islower(UChar32 c);

bool __hs_u_isupper(UChar32 c);

UChar32 __hs_u_tolower(UChar32 c);

UChar32 __hs_u_toupper(UChar32 c);

UChar32 __hs_u_istitle(UChar32 c);

/*******************************************************************************
* Names
******************************************************************************/
Expand Down
68 changes: 57 additions & 11 deletions experimental/icu/lib/ICU/Char.chs
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,27 @@ module ICU.Char
, UGeneralCategory(..)
, toGeneralCategory
, charType
, UProperty(..)
, hasBinaryProperty
, isNoncharacter
, isLowerCase
, isUpperCase
, isLower
, isUpper
, isTitle
, toLowerCase
, toUpperCase
) where

#include <unicode/uchar.h>

import Data.Char (ord)
import Data.Char (chr, ord)
import qualified Data.Char as Char
import Data.Int (Int8)
import Data.Version (Version, makeVersion)
import Data.Word (Word32)
import Foreign (Ptr)
import Foreign.C (CInt)
import Foreign.C (CInt(..))
import Foreign.Marshal.Array (allocaArray, peekArray)
import System.IO.Unsafe (unsafePerformIO)

Expand Down Expand Up @@ -137,19 +146,56 @@ toGeneralCategory = \case
FinalPunctuation -> Char.FinalQuote

{#enum define UProperty {
UCHAR_NONCHARACTER_CODE_POINT as NoncharacterCodePoint
UCHAR_NONCHARACTER_CODE_POINT as NoncharacterCodePoint,
UCHAR_LOWERCASE as LowerCase,
UCHAR_UPPERCASE as UpperCase
}
deriving (Bounded, Eq, Ord, Show) #}

foreign import ccall safe "icu.h __hs_u_hasBinaryProperty" u_hasBinaryProperty
:: UChar32 -> Int -> Bool
:: UChar32 -> CInt -> Bool

-- hasBinaryProperty :: UChar32 -> Int -> Bool
-- hasBinaryProperty = {#call pure u_hasBinaryProperty as __hs_u_hasBinaryProperty#}
-- {#fun pure u_hasBinaryProperty as hasBinaryProperty
-- {`UChar32', `Int'} -> `Bool' #}
hasBinaryProperty :: Char -> UProperty -> Bool
hasBinaryProperty c
= u_hasBinaryProperty (fromIntegral (ord c))
. fromIntegral
. fromEnum

isNoncharacter :: Char -> Bool
isNoncharacter c = u_hasBinaryProperty
(fromIntegral (ord c))
(fromEnum NoncharacterCodePoint)
isNoncharacter = (`hasBinaryProperty` NoncharacterCodePoint)

isLowerCase :: Char -> Bool
isLowerCase = (`hasBinaryProperty` LowerCase)

isUpperCase :: Char -> Bool
isUpperCase = (`hasBinaryProperty` UpperCase)

foreign import ccall safe "icu.h __hs_u_islower" u_islower
:: UChar32 -> Bool

isLower :: Char -> Bool
isLower = u_islower . fromIntegral . ord

foreign import ccall safe "icu.h __hs_u_isupper" u_isupper
:: UChar32 -> Bool

isUpper :: Char -> Bool
isUpper = u_isupper . fromIntegral . ord

foreign import ccall safe "icu.h __hs_u_istitle" u_istitle
:: UChar32 -> Bool

isTitle :: Char -> Bool
isTitle = u_istitle . fromIntegral . ord

foreign import ccall safe "icu.h __hs_u_tolower" u_tolower
:: UChar32 -> UChar32

toLowerCase :: Char -> Char
toLowerCase = chr . fromIntegral . u_tolower . fromIntegral . ord

foreign import ccall safe "icu.h __hs_u_toupper" u_toupper
:: UChar32 -> UChar32

toUpperCase :: Char -> Char
toUpperCase = chr . fromIntegral . u_toupper . fromIntegral . ord
6 changes: 3 additions & 3 deletions experimental/unicode-data-text/unicode-data-text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ library
build-depends:
base >= 4.7 && < 4.23,
text >= 1.2.4 && < 2.2,
unicode-data >= 0.7 && < 0.8
unicode-data >= 0.8 && < 0.9

test-suite test
import: default-extensions, compile-options
Expand All @@ -82,7 +82,7 @@ test-suite test
base >= 4.7 && < 4.23,
hspec >= 2.0 && < 2.12,
text >= 1.2.4 && < 2.2,
unicode-data >= 0.7 && < 0.8,
unicode-data >= 0.8 && < 0.9,
unicode-data-text
build-tool-depends:
hspec-discover:hspec-discover >= 2.0 && < 2.12
Expand All @@ -98,7 +98,7 @@ benchmark bench
tasty-bench >= 0.2.5 && < 0.5,
tasty >= 1.4.1 && < 1.6,
text >= 1.2.4 && < 2.2,
unicode-data >= 0.7 && < 0.8,
unicode-data >= 0.8 && < 0.9,
unicode-data-text
-- [NOTE] Recommendation of tasty-bench to reduce garbage collection noisiness
ghc-options: -O2 -fdicts-strict -rtsopts -with-rtsopts=-A32m
Expand Down
46 changes: 24 additions & 22 deletions ucd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# we used to generate them earlier are exactly the same as the ones we are
# downloading. To ensure that verfication of the checksum is necessary.

VERSION=16.0.0
VERSION=17.0.0

# When downloading fresh new version comment this out
VERIFY_CHECKSUM=y
Expand All @@ -16,29 +16,31 @@ UCD_URL="https://www.unicode.org/Public/$VERSION/ucd"
# $ find data/$VERSION/ -type f -print0 | xargs -0 sha256sum
# Format: filename:checksum
UCD_FILES="\
Blocks.txt:f3907b395d410f1b97342292ca6bc83dd12eb4b205f2a0c48efdef99e517d7b0 \
CaseFolding.txt:6f1f9c588eb4a5c718d9e8f93b782685e5c7fec872cf05e8e6878053599e09bb \
DerivedCoreProperties.txt:39d35161f2954497f69e08bdb9e701493f476a3d30222de20028feda36c1dabd \
DerivedNormalizationProps.txt:4d4c03892dea9146d674b686e495df2d55a28d071ac474041d73518f887abddc \
NameAliases.txt:9953f0fcebf5ea8091c5c581e4df0e43f20d2533c84ccca7987a9bb819a896a8 \
PropertyValueAliases.txt:440fd3e5460b9bfe31da67b6f923992e1989d31fe2ed91e091c4b8f8e2620bf9 \
PropList.txt:53d614508e2a0b2305a8aa21cd60d993de9326cdf65993660dfcce4503548583 \
Scripts.txt:9e88f0a677df47311106340be8ede2ecdacd9c1c931831218d2be6d5508e0039 \
ScriptExtensions.txt:049117ce26b9769fe2749b06eef51a50a89faef4a97764dd2d81daa715980700 \
SpecialCasing.txt:8d5de354eef79f2395a54c9c7dcebbaf3d30fc962d0f85611ea97aa973a0c451 \
UnicodeData.txt:ff58e5823bd095166564a006e47d111130813dcf8bf234ef79fa51a870edb48f \
extracted/DerivedCombiningClass.txt:52064d588c98c623b2373905e6a449eb520f900113954bcd212e94ef0810b471 \
extracted/DerivedName.txt:0cc1469faa0c5518572ef93f4f457f93aa8a160ce320aad3793d85f4b435fd24 \
extracted/DerivedNumericValues.txt:00b43cc5c9b86a834f82389c4537f103e652821387daa556f0bd220f6c23007e"

# Security files (https://www.unicode.org/Public/security/$VERSION/$file)
SECURITY_URL="https://www.unicode.org/Public/security/$VERSION"
Blocks.txt:c0edefaf1a19771e830a82735472716af6bf3c3975f6c2a23ffbe2580fbbcb15\
CaseFolding.txt:ff8d8fefbf123574205085d6714c36149eb946d717a0c585c27f0f4ef58c4183\
DerivedCoreProperties.txt:24c7fed1195c482faaefd5c1e7eb821c5ee1fb6de07ecdbaa64b56a99da22c08\
DerivedNormalizationProps.txt:71fd6a206a2c0cdd41feb6b7f656aa31091db45e9cedc926985d718397f9e488\
NameAliases.txt:793f6f1e4d15fd90f05ae66460191dc4d75d1fea90136a25f30dd6a4cb950eac\
PropertyValueAliases.txt:64e9a5f76f7a1e8b5a47d6a1f9a26522a251208f5276bdfa1559dac7cf2e827a\
PropList.txt:130dcddcaadaf071008bdfce1e7743e04fdfbc910886f017d9f9ac931d8c64dd\
Scripts.txt:9f5e50d3abaee7d6ce09480f325c706f485ae3240912527e651954d2d6b035bf\
ScriptExtensions.txt:ec2107e58825a1586acee8e0911ce18260394ac8b87e535ca325f1ccbeb06bc6\
SpecialCasing.txt:efc25faf19de21b92c1194c111c932e03d2a5eaf18194e33f1156e96de4c9588\
UnicodeData.txt:2e1efc1dcb59c575eedf5ccae60f95229f706ee6d031835247d843c11d96470c\
extracted/DerivedCombiningClass.txt:191463abfbd202703c6fd6776a92a23ac44ec65e0476a7f95aa91ca492cef29b\
extracted/DerivedName.txt:019758bbe6c756c40fca6d505187ea660c5e195533e2ff2c841963a212c9d369\
extracted/DerivedNumericValues.txt:139b976bdc288be01c80f018523da769cf2845109b5a7f0f8a432db64bfedcfa"

# Security files:
# - < 17.0.0: https://www.unicode.org/Public/security/$VERSION/$file)
# - ≥ 17.0.0: https://www.unicode.org/Public/$VERSION/security/$file)
SECURITY_URL="https://www.unicode.org/Public/$VERSION/security"
# Format: filename:checksum
SECURITY_FILES="\
IdentifierStatus.txt:c6108ca140e054b55a5b0378e7ebed8b1ef0e846251f6195361bc9af8ffc61b1 \
IdentifierType.txt:c7e57f71176fb3035e0c85e4d9f30b08374588b2bd16e729efbc7e49c7c9438f \
confusables.txt:95bd0aad6dced5ebc63436f459c06ab21a8d107cd842fb57f5c3a1e91bca8611 \
intentional.txt:6827f1f7694f747aa93e374619b4bf81ffb18e2feb0b9c982c427f7eec2266c1"
IdentifierStatus.txt:617228a16da13850bf8af28b6cd08f5e9b6595d2eb60404fe6eee2c85b4e4a35\
IdentifierType.txt:924ac63faa97ed73420d6ac48d08279d90968c7da0502ab701e08bfbb9683c22\
confusables.txt:091c7f82fc39ef208faf8f94d29c244de99254675e09de163160c810d13ef22a\
intentional.txt:33738217c15c1a0df0b7a2cc0a0b50b27ebdca119ca11253440ec0102f05626b"

# Download the files

Expand Down
4 changes: 4 additions & 0 deletions unicode-data-names/Changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 0.6.0 (September 2025)

- Updated to [Unicode 17.0.0](https://www.unicode.org/versions/Unicode17.0.0/).

## 0.5.0 (September 2025)

- Updated to [Unicode 16.0.0](https://www.unicode.org/versions/Unicode16.0.0/).
Expand Down
2 changes: 1 addition & 1 deletion unicode-data-names/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ There are 3 APIs:
The Haskell data structures are generated programmatically from the
Unicode character database (UCD) files. The latest Unicode version
supported by this library is
[`16.0.0`](https://www.unicode.org/versions/Unicode16.0.0/).
[`17.0.0`](https://www.unicode.org/versions/Unicode17.0.0/).

Please see the
[Haddock documentation](https://hackage.haskell.org/package/unicode-data-names)
Expand Down
4 changes: 2 additions & 2 deletions unicode-data-names/lib/Unicode/Internal/Char/Names/Version.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ module Unicode.Internal.Char.Names.Version (unicodeVersion) where
import Data.Version (Version, makeVersion)

-- | Version of the Unicode standard used by this package:
-- [16.0.0](https://www.unicode.org/versions/Unicode16.0.0/).
-- [17.0.0](https://www.unicode.org/versions/Unicode17.0.0/).
--
-- @since 0.3.0
unicodeVersion :: Version
unicodeVersion = makeVersion [16,0,0]
unicodeVersion = makeVersion [17,0,0]
Loading