New World .datasheet file format
-
- Posts: 5
- Joined: Tue Jul 20, 2021 11:10 am
Re: Datasheet Header
del
Last edited by badmp3 on Fri Aug 20, 2021 11:53 am, edited 1 time in total.
-
- Posts: 1
- Joined: Wed Aug 11, 2021 1:43 am
Re: New World .datasheet file format
For completeness sake here's a parser written in Haskell. other than bytestring, attoparsec, and attoparsec-binary it doesn't depend on anything. Also included a small usage example where I turn the parsed data into a CSV file.
Code: Select all
module DatasheetParser
( parseDatasheet
, Datasheet (..)
, Table (..)
, Row (..)
, Cell (..)
, Column (columnName)
, ColumnType
) where
import qualified Data.ByteString as BS
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.Binary as B
import Data.Word
import Control.Monad (forM)
import GHC.Float (castWord32ToFloat)
getString :: BS.ByteString -> Int -> BS.ByteString
getString dat offset = BS.takeWhile (/= 0) $ BS.drop (offset) dat
data DatasheetHeader
= DatasheetHeader { headerRevision :: Word32
, headerUniqueIdOffset :: Word32
, headerTypeOffset :: Word32
, headerRowNumber :: Word32
, headerPlainTextLen :: Word32
, headerPlainTextOffset :: Word32
, headerSig :: Word32
, headerColumnCount :: Word32
, headerRowCount :: Word32
} deriving (Show)
data DatasheetHeaderStrings
= DatasheetHeaderStrings { headerUniqueId :: BS.ByteString
, headerType :: BS.ByteString
} deriving (Show)
data ColumnType = TString
| TFloat
| TBool
deriving (Show)
data Column
= Column { columnName :: BS.ByteString
, columnType :: ColumnType
}
deriving (Show)
data Cell = CString BS.ByteString
| CFloat Float
| CBool Bool
deriving (Show)
newtype Row = Row [Cell]
deriving (Show)
newtype Table = Table [Row]
deriving (Show)
data Datasheet = Datasheet DatasheetHeader DatasheetHeaderStrings [Column] Table
deriving (Show)
datasheetHeaderParser :: A.Parser DatasheetHeader
datasheetHeaderParser = do
revision <- B.anyWord32le
_ <- A.take 4
uniqueIdOffset <- B.anyWord32le
_ <- A.take 4
typeOffset <- B.anyWord32le
rowNumber <- B.anyWord32le
plainTextLength <- B.anyWord32le
_ <- A.take 28
plainTextOffset <- B.anyWord32le >>= return . (+ 60)
hSig <- B.anyWord32be
_ <- A.take 4
columnCount <- B.anyWord32le
rowCount <- B.anyWord32le
_ <- A.take 16
return
$ DatasheetHeader
revision
uniqueIdOffset
typeOffset
rowNumber
plainTextLength
plainTextOffset
hSig
columnCount
rowCount
datasheetHeaderStrings :: DatasheetHeader -> BS.ByteString -> DatasheetHeaderStrings
datasheetHeaderStrings h d
= DatasheetHeaderStrings
(getString d (fromIntegral $ headerUniqueIdOffset h))
(getString d (fromIntegral $ headerTypeOffset h))
parseColumn :: BS.ByteString -> A.Parser Column
parseColumn s = do
_ <- A.take 4
columnNameOffset <- B.anyWord32le
columnType <- B.anyWord32le
return
$ Column
(getString s (fromIntegral columnNameOffset))
(case fromIntegral columnType of
1 -> TString
2 -> TFloat
3 -> TBool
_ -> error $ show columnType)
parseCell :: BS.ByteString -> ColumnType -> A.Parser Cell
parseCell s t = do
so <- B.anyWord32le
value <- B.anyWord32le
return (case t of
TString -> CString (getString s $ fromIntegral value)
TFloat -> CFloat $ castWord32ToFloat value
TBool -> CBool (if (fromIntegral value) == 0 then False else True))
datasheetParser :: BS.ByteString -> A.Parser Datasheet
datasheetParser d = do
header <- datasheetHeaderParser
let plen = fromIntegral $ headerPlainTextLen header
strings = BS.drop (BS.length d - plen) d
headerStrings = datasheetHeaderStrings header strings
columns <- forM [1..fromIntegral $ headerColumnCount header] (const $ parseColumn strings)
rows <- forM [1..fromIntegral $ headerRowCount header] (const (forM columns ((parseCell strings) . columnType) >>= return . Row))
return
$ Datasheet
header
headerStrings
columns
(Table rows)
parseDatasheet :: BS.ByteString -> Maybe Datasheet
parseDatasheet d = case A.parse (datasheetParser d) d of
A.Done _ x -> Just x
_ -> Nothing
Code: Select all
module Main (main) where
import DatasheetParser
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Csv as C
import Data.String (fromString)
fromCell :: Cell -> BS.ByteString
fromCell c = case c of
CString s -> s
CFloat f -> fromString $ show f
CBool b -> fromString $ show b
sheetToCsv :: Datasheet -> BS.ByteString
sheetToCsv (Datasheet _ _ columns (Table rows))
= let
colnames = map (columnName) columns
r = map (\(Row cells) -> map fromCell cells) rows
in BSL.toStrict $ C.encode ([colnames] ++ r)
main :: IO ()
main = do
f <- BS.readFile "sample/javelindata_crafting.datasheet"
let sheet = parseDatasheet f
case sheet of
Just s -> BS.writeFile "out.csv" (sheetToCsv s)
Nothing -> print "oops"