I'm using FParsec to parse an input that describes its own format. For example, consider this input:
int,str,int:4,'hello',3
The first part of the input (before the colon) describes the format of the second part of the input. In this case, the format is int, str, int, which means that the actual data consists of three comma-separated values of the given types, so the result should be 4, "hello", 3.
What is the best way to parse something like this with FParsec?
I've pasted my best effort below, but I'm not happy with it. Is there a better way to do this that is cleaner, less stateful, and less reliant on the parse monad? I think this depends on smarter management of UserState, but I don't know how to do it. Thanks.
open FParsec
type State = { Formats : string[]; Index : int32 }
with static member Default = { Formats = [||]; Index = 0 }
type Value =
| Integer of int
| String of string
let parseFormat : Parser<_, State> =
parse {
let! formats =
sepBy
(pstring "int" <|> pstring "str")
(skipString ",")
|>> Array.ofList
do! updateUserState (fun state -> { state with Formats = formats })
}
let parseValue format =
match format with
| "int" -> pint32 |>> Integer
| "str" ->
between
(skipString "'")
(skipString "'")
(manySatisfy (fun c -> c <> '\''))
|>> String
| _ -> failwith "Unexpected"
let parseValueByState =
parse {
let! state = getUserState
let format = state.Formats.[state.Index]
do! setUserState { state with Index = state.Index + 1}
return! parseValue format
}
let parseData =
sepBy
parseValueByState
(skipString ",")
let parse =
parseFormat
>>. skipString ":"
>>. parseData
[<EntryPoint>]
let main argv =
let result = runParserOnString parse State.Default "" "int,str,int:4,'hello',3"
printfn "%A" result
0