New World .datasheet file format

Extraction and unpacking of game archives and compression, encryption, obfuscation, decoding of unknown files
badmp3
Posts: 5
Joined: Tue Jul 20, 2021 11:10 am

Re: Datasheet Header

Post by badmp3 »

del
Last edited by badmp3 on Fri Aug 20, 2021 11:53 am, edited 1 time in total.
Pako
Posts: 1
Joined: Wed Aug 11, 2021 1:43 am

Re: New World .datasheet file format

Post by Pako »

For completeness sake here's a parser written in Haskell. other than bytestring, attoparsec, and attoparsec-binary it doesn't depend on anything. Also included a small usage example where I turn the parsed data into a CSV file.

Code: Select all

module DatasheetParser
       ( parseDatasheet
       , Datasheet (..)
       , Table (..)
       , Row (..)
       , Cell (..)
       , Column (columnName)
       , ColumnType
       ) where

import qualified Data.ByteString as BS
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.Binary as B
import Data.Word
import Control.Monad (forM)
import GHC.Float (castWord32ToFloat)

getString :: BS.ByteString -> Int -> BS.ByteString
getString dat offset = BS.takeWhile (/= 0) $ BS.drop (offset) dat

data DatasheetHeader
       = DatasheetHeader { headerRevision        :: Word32
                         , headerUniqueIdOffset  :: Word32
                         , headerTypeOffset      :: Word32
                         , headerRowNumber       :: Word32
                         , headerPlainTextLen    :: Word32
                         , headerPlainTextOffset :: Word32
                         , headerSig             :: Word32
                         , headerColumnCount     :: Word32
                         , headerRowCount        :: Word32
                         } deriving (Show)

data DatasheetHeaderStrings
       = DatasheetHeaderStrings { headerUniqueId :: BS.ByteString
                                , headerType     :: BS.ByteString
                                } deriving (Show)

data ColumnType = TString
                | TFloat
                | TBool
                deriving (Show)

data Column
       = Column { columnName :: BS.ByteString
                , columnType :: ColumnType
                }
              deriving (Show)

data Cell = CString BS.ByteString
          | CFloat Float
          | CBool Bool
          deriving (Show)

newtype Row = Row [Cell]
       deriving (Show)
newtype Table = Table [Row]
       deriving (Show)

data Datasheet = Datasheet DatasheetHeader DatasheetHeaderStrings [Column] Table
       deriving (Show)

datasheetHeaderParser :: A.Parser DatasheetHeader
datasheetHeaderParser = do
       revision             <- B.anyWord32le
       _                    <- A.take 4
       uniqueIdOffset       <- B.anyWord32le
       _                    <- A.take 4
       typeOffset           <- B.anyWord32le
       rowNumber            <- B.anyWord32le
       plainTextLength      <- B.anyWord32le
       _                    <- A.take 28
       plainTextOffset      <- B.anyWord32le >>= return . (+ 60)
       hSig                 <- B.anyWord32be
       _                    <- A.take 4
       columnCount          <- B.anyWord32le
       rowCount             <- B.anyWord32le
       _                    <- A.take 16
       return
              $ DatasheetHeader
                revision
                uniqueIdOffset
                typeOffset
                rowNumber
                plainTextLength
                plainTextOffset
                hSig
                columnCount
                rowCount

datasheetHeaderStrings :: DatasheetHeader -> BS.ByteString -> DatasheetHeaderStrings
datasheetHeaderStrings h d
       = DatasheetHeaderStrings
         (getString d (fromIntegral $ headerUniqueIdOffset h))
         (getString d (fromIntegral $ headerTypeOffset h))

parseColumn :: BS.ByteString -> A.Parser Column
parseColumn s = do
       _                    <- A.take 4
       columnNameOffset     <- B.anyWord32le
       columnType           <- B.anyWord32le
       return
              $ Column
                (getString s (fromIntegral columnNameOffset))
                (case fromIntegral columnType of
                       1 -> TString
                       2 -> TFloat
                       3 -> TBool
                       _ -> error $ show columnType)

parseCell :: BS.ByteString -> ColumnType -> A.Parser Cell
parseCell s t = do
       so    <- B.anyWord32le
       value <- B.anyWord32le
       return (case t of
              TString -> CString (getString s $ fromIntegral value)
              TFloat  -> CFloat $ castWord32ToFloat value
              TBool   -> CBool (if (fromIntegral value) == 0 then False else True))

datasheetParser :: BS.ByteString -> A.Parser Datasheet
datasheetParser d = do
       header               <- datasheetHeaderParser
       let plen             = fromIntegral $ headerPlainTextLen header
           strings          = BS.drop (BS.length d - plen) d
           headerStrings    = datasheetHeaderStrings header strings
       columns              <- forM [1..fromIntegral $ headerColumnCount header] (const $ parseColumn strings)
       rows                 <- forM [1..fromIntegral $ headerRowCount header] (const (forM columns ((parseCell strings) . columnType) >>= return . Row))
       return
              $ Datasheet
                header
                headerStrings
                columns
                (Table rows)

parseDatasheet :: BS.ByteString -> Maybe Datasheet
parseDatasheet d = case A.parse (datasheetParser d) d of
       A.Done _ x -> Just x
       _          -> Nothing


Code: Select all

module Main (main) where

import DatasheetParser
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Csv as C
import Data.String (fromString)

fromCell :: Cell -> BS.ByteString
fromCell c  = case c of
    CString s -> s
    CFloat f  -> fromString $ show f
    CBool b   -> fromString $ show b

sheetToCsv :: Datasheet -> BS.ByteString
sheetToCsv (Datasheet _ _ columns (Table rows))
    = let
        colnames = map (columnName) columns
        r = map (\(Row cells) -> map fromCell cells) rows
        in BSL.toStrict $ C.encode ([colnames] ++ r)

main :: IO ()
main = do
    f <- BS.readFile "sample/javelindata_crafting.datasheet"
    let sheet = parseDatasheet f
    case sheet of
        Just s -> BS.writeFile "out.csv" (sheetToCsv s)
        Nothing -> print "oops"

badmp3
Posts: 5
Joined: Tue Jul 20, 2021 11:10 am

Re: New World .datasheet file format

Post by badmp3 »

fixed