got it working B)
This commit is contained in:
parent
8c38dbe69e
commit
8f4ed393d2
12
CHANGELOG.md
12
CHANGELOG.md
@ -7,5 +7,15 @@ and this project adheres to the
|
||||
[Haskell Package Versioning Policy](https://pvp.haskell.org/).
|
||||
|
||||
## Unreleased
|
||||
### Added
|
||||
- filters probably ??? maybe searching
|
||||
|
||||
### Changed
|
||||
- change how the exe works
|
||||
|
||||
## 0.1.0.0 - 2022-08-20
|
||||
### Added
|
||||
- support for both normal and void tag
|
||||
- attribute parsing
|
||||
- removal of invalid tag names
|
||||
|
||||
## 0.1.0.0 - YYYY-MM-DD
|
||||
|
@ -1 +1,6 @@
|
||||
# boba
|
||||
|
||||
pop pop thingies
|
||||
in other words an absurdly simple :sparkles: **_html_** :sparkles: parser
|
||||
|
||||
the program currently takes std, processes it, outputs it to stdout as a haskell type, and then as a string
|
||||
|
@ -1,6 +1,12 @@
|
||||
module Main where
|
||||
|
||||
import Lib
|
||||
import Parsing
|
||||
import Types
|
||||
|
||||
main :: IO ()
|
||||
main = someFunc
|
||||
main = do
|
||||
content <- fmap (concat . lines) getContents
|
||||
print (stringToHTML content)
|
||||
putStrLn ""
|
||||
print (htmlToString $ stringToHTML content)
|
||||
|
20
boba.cabal
20
boba.cabal
@ -6,12 +6,12 @@ cabal-version: 1.12
|
||||
|
||||
name: boba
|
||||
version: 0.1.0.0
|
||||
description: Please see the README on GitHub at <https://github.com/githubuser/boba#readme>
|
||||
homepage: https://github.com/githubuser/boba#readme
|
||||
bug-reports: https://github.com/githubuser/boba/issues
|
||||
author: Author name here
|
||||
maintainer: example@example.com
|
||||
copyright: 2022 Author name here
|
||||
description: Please see the README on GitLab at <https://gitlab.com/yaemiku/boba#readme>
|
||||
homepage: https://gitlab.com/yaemiku/boba#readme
|
||||
bug-reports: https://gitlab.com/yaemiku/boba/issues
|
||||
author: Mikołaj Kubiczek
|
||||
maintainer: me@yaemiku.dev
|
||||
copyright: 2022 yaemiku
|
||||
license: BSD3
|
||||
license-file: LICENSE
|
||||
build-type: Simple
|
||||
@ -19,13 +19,11 @@ extra-source-files:
|
||||
README.md
|
||||
CHANGELOG.md
|
||||
|
||||
source-repository head
|
||||
type: git
|
||||
location: https://github.com/githubuser/boba
|
||||
|
||||
library
|
||||
exposed-modules:
|
||||
Lib
|
||||
Parsing
|
||||
Types
|
||||
other-modules:
|
||||
Paths_boba
|
||||
hs-source-dirs:
|
||||
@ -34,7 +32,7 @@ library
|
||||
base >=4.7 && <5
|
||||
default-language: Haskell2010
|
||||
|
||||
executable boba-exe
|
||||
executable boba
|
||||
main-is: Main.hs
|
||||
other-modules:
|
||||
Paths_boba
|
||||
|
49
package.yaml
49
package.yaml
@ -1,14 +1,16 @@
|
||||
name: boba
|
||||
version: 0.1.0.0
|
||||
github: "githubuser/boba"
|
||||
license: BSD3
|
||||
author: "Author name here"
|
||||
maintainer: "example@example.com"
|
||||
copyright: "2022 Author name here"
|
||||
name: boba
|
||||
version: 0.1.0.0
|
||||
license: BSD3
|
||||
author: "Mikołaj Kubiczek"
|
||||
maintainer: "me@yaemiku.dev"
|
||||
copyright: "2022 yaemiku"
|
||||
description: Please see the README on GitLab at <https://gitlab.com/yaemiku/boba#readme>
|
||||
homepage: https://gitlab.com/yaemiku/boba#readme
|
||||
bug-reports: https://gitlab.com/yaemiku/boba/issues
|
||||
|
||||
extra-source-files:
|
||||
- README.md
|
||||
- CHANGELOG.md
|
||||
- README.md
|
||||
- CHANGELOG.md
|
||||
|
||||
# Metadata used when publishing your package
|
||||
# synopsis: Short description of your package
|
||||
@ -17,32 +19,31 @@ extra-source-files:
|
||||
# To avoid duplicated efforts in documentation and dealing with the
|
||||
# complications of embedding Haddock markup inside cabal files, it is
|
||||
# common to point users to the README.md file.
|
||||
description: Please see the README on GitHub at <https://github.com/githubuser/boba#readme>
|
||||
|
||||
dependencies:
|
||||
- base >= 4.7 && < 5
|
||||
- base >= 4.7 && < 5
|
||||
|
||||
library:
|
||||
source-dirs: src
|
||||
|
||||
executables:
|
||||
boba-exe:
|
||||
main: Main.hs
|
||||
source-dirs: app
|
||||
boba:
|
||||
main: Main.hs
|
||||
source-dirs: app
|
||||
ghc-options:
|
||||
- -threaded
|
||||
- -rtsopts
|
||||
- -with-rtsopts=-N
|
||||
- -threaded
|
||||
- -rtsopts
|
||||
- -with-rtsopts=-N
|
||||
dependencies:
|
||||
- boba
|
||||
- boba
|
||||
|
||||
tests:
|
||||
boba-test:
|
||||
main: Spec.hs
|
||||
source-dirs: test
|
||||
main: Spec.hs
|
||||
source-dirs: test
|
||||
ghc-options:
|
||||
- -threaded
|
||||
- -rtsopts
|
||||
- -with-rtsopts=-N
|
||||
- -threaded
|
||||
- -rtsopts
|
||||
- -with-rtsopts=-N
|
||||
dependencies:
|
||||
- boba
|
||||
- boba
|
||||
|
19
src/Lib.hs
19
src/Lib.hs
@ -1,6 +1,15 @@
|
||||
module Lib
|
||||
( someFunc
|
||||
) where
|
||||
module Lib where
|
||||
|
||||
someFunc :: IO ()
|
||||
someFunc = putStrLn "someFunc"
|
||||
import Parsing
|
||||
import Types
|
||||
|
||||
htmlToString :: HTML -> String
|
||||
htmlToString (Text s) = s
|
||||
htmlToString (Node (Normal t) a c) = "<" ++ t ++ concatMap attrToString a ++ ">" ++ concatMap htmlToString c ++ "</" ++ t ++ ">"
|
||||
htmlToString (Node (Void t) a _) = "<" ++ t ++ concatMap attrToString a ++ ">"
|
||||
htmlToString (Node (Invalid t) _ _) = ""
|
||||
htmlToString (Root c) = concatMap htmlToString c
|
||||
|
||||
attrToString :: Attr -> String
|
||||
attrToString (K k) = ' ' : k
|
||||
attrToString (KV k v) = ' ' : k ++ "=\"" ++ v ++ "\""
|
||||
|
94
src/Parsing.hs
Normal file
94
src/Parsing.hs
Normal file
@ -0,0 +1,94 @@
|
||||
module Parsing where
|
||||
|
||||
import Data.Char (isSpace)
|
||||
import Data.Function (on)
|
||||
import Data.List (groupBy)
|
||||
import Types
|
||||
|
||||
stringToHTML :: String -> HTML
|
||||
stringToHTML s = let (_, _, a, _) = recurseHTMLString (stringToTags s, [], [], []) in reverseHTML $ Root a
|
||||
|
||||
recurseHTMLString :: ([ParsedTag], [ParsedTag], [HTML], [[HTML]]) -> ([ParsedTag], [ParsedTag], [HTML], [[HTML]])
|
||||
recurseHTMLString ([], _, a, b) = ([], [], a, b)
|
||||
recurseHTMLString (TextTag text : xs, ys, node, nodes) = recurseHTMLString (xs, ys, Text (trim text) : node, nodes)
|
||||
recurseHTMLString (ClosingTag t : xs, ys, node, nodes) =
|
||||
let z : zs = ys
|
||||
in recurseHTMLString (xs, zs, Node t (attrs z) node : head nodes, tail nodes)
|
||||
recurseHTMLString (x : xs, ys, node, nodes) = case tag x of
|
||||
Normal _ -> recurseHTMLString (xs, x : ys, [], node : nodes)
|
||||
Invalid _ -> recurseHTMLString (xs, ys, node, nodes)
|
||||
t -> recurseHTMLString (xs, ys, Node t (attrs x) [] : node, nodes)
|
||||
|
||||
reverseHTML :: HTML -> HTML
|
||||
reverseHTML (Root c) = Root (reverse $ map reverseHTML c)
|
||||
reverseHTML (Node t a c) = Node t a (reverse $ map reverseHTML c)
|
||||
reverseHTML node = node
|
||||
|
||||
stringToTags :: String -> [ParsedTag]
|
||||
stringToTags s =
|
||||
filter validTag $
|
||||
map
|
||||
(pairToTag . foldr tagFolding ("", 0))
|
||||
( groupBy tagGrouping $
|
||||
zip s $
|
||||
zipWith max <$> drop 1 <*> map (* 2) $
|
||||
map (\x -> if x == 0 then 0 else 1) $
|
||||
zipWith min (scanl normalTagScanning 0 s) (reverse $ scanl reverseTagScanning 0 $ reverse s)
|
||||
)
|
||||
|
||||
validTag :: ParsedTag -> Bool
|
||||
validTag (TextTag _) = True
|
||||
validTag t = case tag t of
|
||||
Invalid _ -> False
|
||||
_ -> True
|
||||
|
||||
pairToTag :: (String, Int) -> ParsedTag
|
||||
pairToTag (s, 0) = TextTag s
|
||||
pairToTag (s, _) =
|
||||
let (tag : attrs) = words $ contents s
|
||||
in if head tag == '/'
|
||||
then case getTag $ tail tag of
|
||||
Normal v -> ClosingTag $ Normal v
|
||||
t -> ClosingTag $ Invalid $ name t
|
||||
else OpeningTag (getTag tag) $ map extractAttrs attrs
|
||||
|
||||
extractAttrs :: String -> Attr
|
||||
extractAttrs s =
|
||||
if '=' `elem` s
|
||||
then let (k, v) = break (== '=') s in KV k $ if (||) <$> (== "'") <*> (== "\"") $ take 1 v then contents v else v
|
||||
else K s
|
||||
|
||||
tagScanning :: Int -> Char -> Int
|
||||
tagScanning 1 '\'' = 2
|
||||
tagScanning 2 '\'' = 1
|
||||
tagScanning 1 '"' = 3
|
||||
tagScanning 3 '"' = 1
|
||||
tagScanning 1 '`' = 4
|
||||
tagScanning 4 '`' = 1
|
||||
tagScanning n _ = n
|
||||
|
||||
normalTagScanning :: Int -> Char -> Int
|
||||
normalTagScanning 0 '<' = 1
|
||||
normalTagScanning 1 '>' = 0
|
||||
normalTagScanning a b = tagScanning a b
|
||||
|
||||
reverseTagScanning :: Int -> Char -> Int
|
||||
reverseTagScanning 0 '>' = 1
|
||||
reverseTagScanning 1 '<' = 0
|
||||
reverseTagScanning a b = tagScanning a b
|
||||
|
||||
tagFolding :: (Char, Int) -> (String, Int) -> (String, Int)
|
||||
tagFolding (c, i) (s, _) = (c : s, i)
|
||||
|
||||
tagGrouping :: (Char, Int) -> (Char, Int) -> Bool
|
||||
tagGrouping (_, a) (_, b) = (0 < a && a < b) || (a == 0 && a == b)
|
||||
|
||||
contents :: String -> String
|
||||
contents "" = ""
|
||||
contents [c] = ""
|
||||
contents s = init . tail $ s
|
||||
|
||||
trim :: String -> String
|
||||
trim = f . f
|
||||
where
|
||||
f = reverse . dropWhile isSpace
|
134
src/Types.hs
Normal file
134
src/Types.hs
Normal file
@ -0,0 +1,134 @@
|
||||
module Types where
|
||||
|
||||
import Data.Char (toLower)
|
||||
import Data.List (find)
|
||||
import qualified Data.Maybe
|
||||
|
||||
data HTML = Text String | Node {htmlType :: Tag, attributes :: [Attr], children :: [HTML]} | Root {ch :: [HTML]} deriving (Eq, Show, Read)
|
||||
|
||||
data ParsedTag = OpeningTag {tag :: Tag, attrs :: [Attr]} | ClosingTag {tag :: Tag} | TextTag {text :: String} deriving (Eq, Show, Read)
|
||||
|
||||
data Attr = K String | KV String String deriving (Eq, Show, Read)
|
||||
|
||||
data Tag = Normal {name :: String} | Void {name :: String} | Invalid {name :: String} deriving (Eq, Show, Read)
|
||||
|
||||
getTag :: String -> Tag
|
||||
getTag t = Data.Maybe.fromMaybe (Invalid $ map toLower t) (find ((== map toLower t) . name) tags)
|
||||
|
||||
tags :: [Tag]
|
||||
tags =
|
||||
[ Void "!doctype",
|
||||
Normal "a",
|
||||
Normal "abbr",
|
||||
Normal "address",
|
||||
Void "area",
|
||||
Normal "article",
|
||||
Normal "aside",
|
||||
Normal "audio",
|
||||
Normal "b",
|
||||
Void "base",
|
||||
Normal "bdi",
|
||||
Normal "bdo",
|
||||
Normal "blockquote",
|
||||
Normal "body",
|
||||
Void "br",
|
||||
Normal "button",
|
||||
Normal "canvas",
|
||||
Normal "caption",
|
||||
Normal "cite",
|
||||
Normal "code",
|
||||
Void "col",
|
||||
Normal "colgroup",
|
||||
Normal "data",
|
||||
Normal "datalist",
|
||||
Normal "dd",
|
||||
Normal "del",
|
||||
Normal "details",
|
||||
Normal "dfn",
|
||||
Normal "dialog",
|
||||
Normal "div",
|
||||
Normal "dl",
|
||||
Normal "dt",
|
||||
Normal "em",
|
||||
Void "embed",
|
||||
Normal "fieldset",
|
||||
Normal "figure",
|
||||
Normal "footer",
|
||||
Normal "form",
|
||||
Normal "h1",
|
||||
Normal "h2",
|
||||
Normal "h3",
|
||||
Normal "h4",
|
||||
Normal "h5",
|
||||
Normal "h6",
|
||||
Normal "head",
|
||||
Normal "header",
|
||||
Normal "hgroup",
|
||||
Void "hr",
|
||||
Normal "html",
|
||||
Normal "i",
|
||||
Normal "iframe",
|
||||
Void "img",
|
||||
Void "input",
|
||||
Normal "ins",
|
||||
Normal "kbd",
|
||||
Normal "keygen",
|
||||
Normal "label",
|
||||
Normal "legend",
|
||||
Normal "li",
|
||||
Void "link",
|
||||
Normal "main",
|
||||
Normal "map",
|
||||
Normal "mark",
|
||||
Normal "menu",
|
||||
Void "menuitem",
|
||||
Void "meta",
|
||||
Normal "meter",
|
||||
Normal "nav",
|
||||
Normal "noscript",
|
||||
Normal "object",
|
||||
Normal "ol",
|
||||
Normal "optgroup",
|
||||
Normal "option",
|
||||
Normal "output",
|
||||
Normal "p",
|
||||
Void "param",
|
||||
Normal "pre",
|
||||
Normal "progress",
|
||||
Normal "q",
|
||||
Normal "rb",
|
||||
Normal "rp",
|
||||
Normal "rt",
|
||||
Normal "rtc",
|
||||
Normal "ruby",
|
||||
Normal "s",
|
||||
Normal "samp",
|
||||
Normal "script",
|
||||
Normal "section",
|
||||
Normal "select",
|
||||
Normal "small",
|
||||
Void "source",
|
||||
Normal "span",
|
||||
Normal "strong",
|
||||
Normal "style",
|
||||
Normal "sub",
|
||||
Normal "summary",
|
||||
Normal "sup",
|
||||
Normal "table",
|
||||
Normal "tbody",
|
||||
Normal "td",
|
||||
Normal "template",
|
||||
Normal "textarea",
|
||||
Normal "tfoot",
|
||||
Normal "th",
|
||||
Normal "thead",
|
||||
Normal "time",
|
||||
Normal "title",
|
||||
Normal "tr",
|
||||
Void "track",
|
||||
Normal "u",
|
||||
Normal "ul",
|
||||
Normal "var",
|
||||
Normal "video",
|
||||
Void "wbr"
|
||||
]
|
@ -3,3 +3,5 @@ resolver:
|
||||
|
||||
packages:
|
||||
- .
|
||||
|
||||
system-ghc: true
|
||||
|
Loading…
Reference in New Issue
Block a user