Skip to content

Commit 378d493

Browse files
Feature/csv to dhall with schema (#2260)
* feat(csv-to-dhall): Change dhallFromCsv function to accept a Dhall type * feat(csv-to-dhall): Functions to resolve schema and throw exceptions * feat(csv-to-dhall): Adapt schema functionality to main csv-to-dhall executable * feat(csv-to-dhall): Adapt Tasty tests accordingly to use dhall schema * Update dhall-csv/src/Dhall/CsvToDhall.hs Consistency with other parsing errors Co-authored-by: Gabriel Gonzalez <Gabriel439@gmail.com> * Update dhall-csv/src/Dhall/CsvToDhall.hs better styling Co-authored-by: Gabriel Gonzalez <Gabriel439@gmail.com> * Update dhall-csv/src/Dhall/CsvToDhall.hs Co-authored-by: Gabriel Gonzalez <Gabriel439@gmail.com> * feat(csv-to-dhall): fix a type mismatch * Update dhall-csv/src/Dhall/CsvToDhall.hs Co-authored-by: Gabriel Gonzalez <Gabriel439@gmail.com> * feat(csv-to-dhall): used decodeUtf8' instead of decodeUtf8 * fix(csv-to-dhall): modified behavior when reading doubles return NaN * Update dhall-csv/src/Dhall/CsvToDhall.hs Co-authored-by: Gabriel Gonzalez <Gabriel439@gmail.com> * fix: do not allow trailing text in numbers fields Co-authored-by: Gabriel Gonzalez <Gabriel439@gmail.com>
1 parent 4278839 commit 378d493

File tree

8 files changed

+307
-38
lines changed

8 files changed

+307
-38
lines changed

dhall-csv/csv-to-dhall/Main.hs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@ module Main where
77
import Control.Applicative (optional, (<|>))
88
import Control.Exception (SomeException)
99
import Data.Version (showVersion)
10-
import Dhall.CsvToDhall (dhallFromCsv)
10+
import Dhall.CsvToDhall
1111
import Dhall.Pretty (CharacterSet (..))
1212
import Data.Text (Text)
1313
import Options.Applicative (Parser, ParserInfo)
1414

1515
import qualified Control.Exception
1616
import qualified Data.Text.IO as Text.IO
1717
import qualified Dhall.Csv.Util
18+
import qualified Dhall.Core
1819
import qualified Dhall.Util
1920
import qualified GHC.IO.Encoding
2021
import qualified Options.Applicative as Options
@@ -32,6 +33,7 @@ parserInfo = Options.info
3233
data Options
3334
= Default
3435
{ schema :: Maybe Text
36+
, conversion :: Conversion
3537
, file :: Maybe FilePath
3638
, output :: Maybe FilePath
3739
, ascii :: Bool
@@ -53,6 +55,7 @@ parseOptions =
5355
typeCommand
5456
<|> ( Default
5557
<$> optional parseSchema
58+
<*> parseConversion
5659
<*> optional parseFile
5760
<*> optional parseOutput
5861
<*> parseASCII
@@ -144,6 +147,13 @@ main = do
144147
Left err -> fail err
145148
Right csv -> pure csv
146149

150+
let toSchema schema = do
151+
finalSchema <- case schema of
152+
Just text -> resolveSchemaExpr text
153+
Nothing -> fail "Please specify a schema. Type inference has not been implemented"
154+
155+
typeCheckSchemaExpr id finalSchema
156+
147157
case options of
148158
Version ->
149159
putStrLn (showVersion Meta.version)
@@ -154,7 +164,9 @@ main = do
154164

155165
csv <- toCsv (not noHeader) file
156166

157-
let expression = dhallFromCsv csv
167+
finalSchema <- toSchema schema
168+
169+
expression <- Dhall.Core.throws $ dhallFromCsv conversion finalSchema csv
158170

159171
Dhall.Util.renderExpression characterSet plain output expression
160172

dhall-csv/dhall-csv.cabal

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,11 @@ Library
3737
bytestring < 0.12,
3838
cassava >= 0.5.0.0 && < 0.6 ,
3939
containers >= 0.5.9 && < 0.7 ,
40+
either ,
41+
exceptions >= 0.8.3 && < 0.11,
4042
dhall >= 1.39.0 && < 1.40,
4143
filepath < 1.5 ,
44+
optparse-applicative ,
4245
prettyprinter >= 1.5.1 && < 1.8 ,
4346
text >= 0.11.1.0 && < 1.3 ,
4447
unordered-containers < 0.3 ,

dhall-csv/src/Dhall/CsvToDhall.hs

Lines changed: 249 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,254 @@
1-
module Dhall.CsvToDhall (dhallFromCsv) where
1+
{-# LANGUAGE OverloadedStrings #-}
2+
{-# LANGUAGE RecordWildCards #-}
23

3-
import Data.Bifunctor (bimap)
4-
import Data.Text.Encoding (decodeUtf8)
5-
import Data.Void (Void)
6-
import Dhall.Core (Expr, RecordField)
7-
import Dhall.Src (Src)
4+
module Dhall.CsvToDhall (
5+
-- * CSV to Dhall
6+
dhallFromCsv
7+
, parseConversion
8+
, defaultConversion
9+
, resolveSchemaExpr
10+
, typeCheckSchemaExpr
11+
, Conversion(..)
12+
13+
-- * Exceptions
14+
, CompileError(..)
15+
) where
16+
17+
import Control.Applicative ((<|>))
18+
import Control.Exception (Exception, throwIO)
19+
import Control.Monad.Catch (MonadCatch, throwM)
20+
import Data.Either (lefts, rights)
21+
import Data.Either.Combinators (mapRight)
22+
import Data.Foldable (toList)
23+
import Data.List ((\\))
24+
import Data.Text (Text)
25+
import Data.Text.Encoding (decodeUtf8, decodeUtf8', encodeUtf8)
26+
import Data.Text.Encoding.Error (UnicodeException)
27+
import Data.Text.Read (decimal, double)
28+
import Data.Void (Void)
29+
import Dhall.Core (Expr)
30+
import Dhall.Src (Src)
31+
import Options.Applicative (Parser)
832

933
import qualified Data.Csv
10-
import qualified Data.HashMap.Strict as HashMap
11-
import qualified Data.Sequence as Sequence
12-
import qualified Dhall.Core as Core
13-
import qualified Dhall.Map
34+
import qualified Data.HashMap.Strict as HashMap
35+
import qualified Data.Sequence as Sequence
36+
import qualified Dhall.Core as Core
37+
import qualified Dhall.Import
38+
import qualified Dhall.Map as Map
39+
import qualified Dhall.Parser
40+
import qualified Dhall.TypeCheck as TypeCheck
41+
import qualified Options.Applicative as O
42+
43+
-- ----------
44+
-- Conversion
45+
-- ----------
46+
47+
-- | JSON-to-dhall translation options
48+
data Conversion = Conversion
49+
{ strictRecs :: Bool
50+
, unions :: UnionConv
51+
} deriving Show
52+
53+
data UnionConv = UFirst | UNone | UStrict deriving (Show, Read, Eq)
54+
55+
-- | Default conversion options
56+
defaultConversion :: Conversion
57+
defaultConversion = Conversion
58+
{ strictRecs = False
59+
, unions = UFirst
60+
}
61+
62+
-- ---------------
63+
-- Command options
64+
-- ---------------
65+
66+
-- | Standard parser for options related to the conversion method
67+
parseConversion :: Parser Conversion
68+
parseConversion = Conversion <$> parseStrict
69+
<*> parseUnion
70+
where
71+
parseStrict =
72+
O.flag' True
73+
( O.long "records-strict"
74+
<> O.help "Fail if any CSV fields are missing from the expected Dhall type"
75+
)
76+
<|> O.flag' False
77+
( O.long "records-loose"
78+
<> O.help "Tolerate CSV fields not present within the expected Dhall type"
79+
)
80+
<|> pure True
81+
82+
83+
-- | Parser for command options related to treating union types
84+
parseUnion :: Parser UnionConv
85+
parseUnion =
86+
uFirst
87+
<|> uNone
88+
<|> uStrict
89+
<|> pure UFirst -- defaulting to UFirst
90+
where
91+
uFirst = O.flag' UFirst
92+
( O.long "unions-first"
93+
<> O.help "The first value with the matching type (successfully parsed all the way down the tree) is accepted, even if not the only possible match. (DEFAULT)"
94+
)
95+
uNone = O.flag' UNone
96+
( O.long "unions-none"
97+
<> O.help "Unions not allowed"
98+
)
99+
uStrict = O.flag' UStrict
100+
( O.long "unions-strict"
101+
<> O.help "Error if more than one union values match the type (and parse successfully)"
102+
)
103+
104+
type ExprX = Expr Src Void
105+
106+
-- | Parse schema code and resolve imports
107+
resolveSchemaExpr :: Text -- ^ type code (schema)
108+
-> IO ExprX
109+
resolveSchemaExpr code = do
110+
parsedExpression <-
111+
case Dhall.Parser.exprFromText "\n\ESC[1;31m(schema)\ESC[0m" code of
112+
Left err -> throwIO err
113+
Right parsedExpression -> return parsedExpression
114+
Dhall.Import.load parsedExpression
115+
116+
typeCheckSchemaExpr :: (Exception e, MonadCatch m)
117+
=> (CompileError -> e) -> ExprX -> m ExprX
118+
typeCheckSchemaExpr compileException expr =
119+
case TypeCheck.typeOf expr of -- check if the expression has type
120+
Left err -> throwM . compileException $ TypeError err
121+
Right t -> case t of -- check if the expression has type Type
122+
Core.Const Core.Type -> return expr
123+
_ -> throwM . compileException $ BadDhallType t expr
14124

15-
dhallFromCsv :: [Data.Csv.NamedRecord] -> Expr Src Void
16-
dhallFromCsv csv = Core.ListLit mType $ Sequence.fromList $ map convertRecord csv
125+
126+
dhallFromCsv :: Conversion -> ExprX -> [Data.Csv.NamedRecord] -> Either CompileError ExprX
127+
dhallFromCsv Conversion{..} typeExpr = listConvert (Core.normalize typeExpr)
17128
where
18-
mType :: Maybe (Expr Src Void)
19-
mType = case csv of
20-
[] -> Just $ Core.App Core.List $ Core.Record $ Dhall.Map.fromList []
21-
_ -> Nothing
22-
convertRecord :: Data.Csv.NamedRecord -> Expr Src Void
23-
convertRecord recordCsv = Core.RecordLit dhallMap
24-
where
25-
dhallMap = Dhall.Map.fromList $ map (bimap decodeUtf8 convertField) $ HashMap.toList recordCsv
26-
convertField :: Data.Csv.Field -> RecordField Src Void
27-
convertField = Core.makeRecordField . Core.TextLit . (Core.Chunks []) . decodeUtf8
129+
listConvert :: ExprX -> [Data.Csv.NamedRecord] -> Either CompileError ExprX
130+
listConvert (Core.App Core.List recordType@(Core.Record _)) [] = return $ Core.ListLit (Just recordType) Sequence.empty
131+
listConvert (Core.App Core.List recordType) [] = Left $ Unsupported recordType
132+
listConvert (Core.App Core.List recordType) csv = do
133+
a <- traverse (recordConvert recordType) csv
134+
return $ Core.ListLit Nothing $ Sequence.fromList a
135+
listConvert e _ = Left $ Unsupported e
136+
137+
recordConvert :: ExprX -> Data.Csv.NamedRecord -> Either CompileError ExprX
138+
recordConvert (Core.Record record) csvRecord
139+
| badKeys <- lefts (map decodeUtf8' (HashMap.keys csvRecord))
140+
, not (null badKeys)
141+
= Left $ UnicodeError (head badKeys) -- Only report first key that failed to be decoded
142+
| extraKeys <- (map decodeUtf8 $ HashMap.keys csvRecord) \\ Map.keys record
143+
, strictRecs && not (null extraKeys)
144+
= Left $ UnhandledKeys extraKeys
145+
| otherwise
146+
= do
147+
let f k v = fieldConvert k (Core.recordFieldValue v) (HashMap.lookup (encodeUtf8 k) csvRecord)
148+
a <- Map.traverseWithKey (\k v -> mapRight Core.makeRecordField (f k v)) record
149+
return $ Core.RecordLit a
150+
recordConvert e _ = Left $ Unsupported e
151+
152+
fieldConvert :: Text -> ExprX -> Maybe Data.Csv.Field -> Either CompileError ExprX
153+
-- Unions
154+
fieldConvert recordKey t@(Core.Union tm) maybeField = do
155+
let f unionKey Nothing =
156+
case maybeField of
157+
Nothing -> Left $ MissingKey recordKey
158+
Just field ->
159+
case decodeUtf8' field of
160+
Left err -> Left $ UnicodeError err
161+
Right _field ->
162+
if _field == unionKey
163+
then Right $ Core.Field t $ Core.makeFieldSelection unionKey
164+
else Left $ Mismatch t field recordKey
165+
f unionKey (Just _type) = do
166+
expression <- fieldConvert recordKey _type maybeField
167+
return (Core.App (Core.Field t $ Core.makeFieldSelection unionKey) expression)
168+
169+
case (unions, rights (toList (Map.mapWithKey f tm)), maybeField) of
170+
(UNone , _ , _ ) -> Left $ ContainsUnion t
171+
(UStrict, xs@(_:_:_), Just field) -> Left $ UndecidableUnion t field xs
172+
(UStrict, xs@(_:_:_), Nothing ) -> Left $ UndecidableMissingUnion t xs
173+
(_ , [] , Just field) -> Left $ Mismatch t field recordKey
174+
(_ , [] , Nothing ) -> Left $ MissingKey recordKey
175+
(UFirst , x:_ , _ ) -> Right $ x
176+
(UStrict, [x] , _ ) -> Right $ x
177+
178+
-- Missing Optionals
179+
fieldConvert _ (Core.App Core.Optional t) Nothing = return $ Core.App Core.None t
180+
181+
-- Missing fields
182+
fieldConvert key _ Nothing = Left $ MissingKey key
183+
184+
-- Bools
185+
fieldConvert key Core.Bool (Just field) =
186+
case field of
187+
"true" -> Right (Core.BoolLit True)
188+
"false" -> Right (Core.BoolLit False)
189+
_ -> Left $ Mismatch Core.Bool field key
190+
191+
-- Naturals
192+
fieldConvert key Core.Natural (Just field) =
193+
case decodeUtf8' field of
194+
Left err -> Left $ UnicodeError err
195+
Right _field ->
196+
case decimal _field of
197+
Right (v, "") -> Right $ Core.NaturalLit v -- What to do when there is more text left to read?
198+
_ -> Left $ Mismatch Core.Natural field key
199+
200+
-- Integers
201+
fieldConvert key Core.Integer (Just field) =
202+
case decodeUtf8' field of
203+
Left err -> Left $ UnicodeError err
204+
Right _field ->
205+
case decimal _field of
206+
Right (v, "") -> Right $ Core.IntegerLit v -- What to do when there is more text left to read?
207+
_ -> Left $ Mismatch Core.Integer field key
208+
209+
-- Doubles
210+
fieldConvert _ Core.Double (Just field) =
211+
case decodeUtf8' field of
212+
Left err -> Left $ UnicodeError err
213+
Right _field ->
214+
case double _field of
215+
Right (v, "") -> Right $ Core.DoubleLit $ Core.DhallDouble v
216+
_ -> Right $ Core.DoubleLit $ Core.DhallDouble (read "NaN")
217+
218+
-- Text
219+
fieldConvert _ Core.Text (Just field) =
220+
case decodeUtf8' field of
221+
Left err -> Left $ UnicodeError err
222+
Right _field -> return $ Core.TextLit $ Core.Chunks [] $ _field
223+
224+
-- Optionals
225+
fieldConvert key (Core.App Core.Optional t) maybeField = do
226+
expression <- fieldConvert key t maybeField
227+
return $ Core.Some expression
228+
229+
fieldConvert _ t _ = Left $ Unsupported t
230+
231+
data CompileError
232+
= Unsupported ExprX
233+
| TypeError (TypeCheck.TypeError Src Void)
234+
| BadDhallType
235+
ExprX -- Expression type
236+
ExprX -- Whole expression
237+
| MissingKey Text
238+
| UnhandledKeys [Text] -- Keys in CSV but not in schema
239+
| Mismatch
240+
ExprX -- Expected Dhall Type
241+
Data.Csv.Field -- Actual field
242+
Text -- Record key
243+
| ContainsUnion ExprX
244+
| UndecidableUnion
245+
ExprX -- Expected Type
246+
Data.Csv.Field -- CSV Field
247+
[ExprX] -- Multiple conversions
248+
| UndecidableMissingUnion
249+
ExprX -- Expected Type
250+
[ExprX] -- Multiple Conversions
251+
| UnicodeError UnicodeException
252+
deriving Show
253+
254+
instance Exception CompileError

0 commit comments

Comments
 (0)