got it working B)
This commit is contained in:
parent
8c38dbe69e
commit
8f4ed393d2
12
CHANGELOG.md
12
CHANGELOG.md
@ -7,5 +7,15 @@ and this project adheres to the
|
|||||||
[Haskell Package Versioning Policy](https://pvp.haskell.org/).
|
[Haskell Package Versioning Policy](https://pvp.haskell.org/).
|
||||||
|
|
||||||
## Unreleased
|
## Unreleased
|
||||||
|
### Added
|
||||||
|
- filters probably ??? maybe searching
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- change how the exe works
|
||||||
|
|
||||||
|
## 0.1.0.0 - 2022-08-20
|
||||||
|
### Added
|
||||||
|
- support for both normal and void tag
|
||||||
|
- attribute parsing
|
||||||
|
- removal of invalid tag names
|
||||||
|
|
||||||
## 0.1.0.0 - YYYY-MM-DD
|
|
||||||
|
@ -1 +1,6 @@
|
|||||||
# boba
|
# boba
|
||||||
|
|
||||||
|
pop pop thingies
|
||||||
|
in other words an absurdly simple :sparkles: **_html_** :sparkles: parser
|
||||||
|
|
||||||
|
the program currently takes std, processes it, outputs it to stdout as a haskell type, and then as a string
|
||||||
|
@ -1,6 +1,12 @@
|
|||||||
module Main where
|
module Main where
|
||||||
|
|
||||||
import Lib
|
import Lib
|
||||||
|
import Parsing
|
||||||
|
import Types
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = someFunc
|
main = do
|
||||||
|
content <- fmap (concat . lines) getContents
|
||||||
|
print (stringToHTML content)
|
||||||
|
putStrLn ""
|
||||||
|
print (htmlToString $ stringToHTML content)
|
||||||
|
20
boba.cabal
20
boba.cabal
@ -6,12 +6,12 @@ cabal-version: 1.12
|
|||||||
|
|
||||||
name: boba
|
name: boba
|
||||||
version: 0.1.0.0
|
version: 0.1.0.0
|
||||||
description: Please see the README on GitHub at <https://github.com/githubuser/boba#readme>
|
description: Please see the README on GitLab at <https://gitlab.com/yaemiku/boba#readme>
|
||||||
homepage: https://github.com/githubuser/boba#readme
|
homepage: https://gitlab.com/yaemiku/boba#readme
|
||||||
bug-reports: https://github.com/githubuser/boba/issues
|
bug-reports: https://gitlab.com/yaemiku/boba/issues
|
||||||
author: Author name here
|
author: Mikołaj Kubiczek
|
||||||
maintainer: example@example.com
|
maintainer: me@yaemiku.dev
|
||||||
copyright: 2022 Author name here
|
copyright: 2022 yaemiku
|
||||||
license: BSD3
|
license: BSD3
|
||||||
license-file: LICENSE
|
license-file: LICENSE
|
||||||
build-type: Simple
|
build-type: Simple
|
||||||
@ -19,13 +19,11 @@ extra-source-files:
|
|||||||
README.md
|
README.md
|
||||||
CHANGELOG.md
|
CHANGELOG.md
|
||||||
|
|
||||||
source-repository head
|
|
||||||
type: git
|
|
||||||
location: https://github.com/githubuser/boba
|
|
||||||
|
|
||||||
library
|
library
|
||||||
exposed-modules:
|
exposed-modules:
|
||||||
Lib
|
Lib
|
||||||
|
Parsing
|
||||||
|
Types
|
||||||
other-modules:
|
other-modules:
|
||||||
Paths_boba
|
Paths_boba
|
||||||
hs-source-dirs:
|
hs-source-dirs:
|
||||||
@ -34,7 +32,7 @@ library
|
|||||||
base >=4.7 && <5
|
base >=4.7 && <5
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable boba-exe
|
executable boba
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
other-modules:
|
other-modules:
|
||||||
Paths_boba
|
Paths_boba
|
||||||
|
13
package.yaml
13
package.yaml
@ -1,10 +1,12 @@
|
|||||||
name: boba
|
name: boba
|
||||||
version: 0.1.0.0
|
version: 0.1.0.0
|
||||||
github: "githubuser/boba"
|
|
||||||
license: BSD3
|
license: BSD3
|
||||||
author: "Author name here"
|
author: "Mikołaj Kubiczek"
|
||||||
maintainer: "example@example.com"
|
maintainer: "me@yaemiku.dev"
|
||||||
copyright: "2022 Author name here"
|
copyright: "2022 yaemiku"
|
||||||
|
description: Please see the README on GitLab at <https://gitlab.com/yaemiku/boba#readme>
|
||||||
|
homepage: https://gitlab.com/yaemiku/boba#readme
|
||||||
|
bug-reports: https://gitlab.com/yaemiku/boba/issues
|
||||||
|
|
||||||
extra-source-files:
|
extra-source-files:
|
||||||
- README.md
|
- README.md
|
||||||
@ -17,7 +19,6 @@ extra-source-files:
|
|||||||
# To avoid duplicated efforts in documentation and dealing with the
|
# To avoid duplicated efforts in documentation and dealing with the
|
||||||
# complications of embedding Haddock markup inside cabal files, it is
|
# complications of embedding Haddock markup inside cabal files, it is
|
||||||
# common to point users to the README.md file.
|
# common to point users to the README.md file.
|
||||||
description: Please see the README on GitHub at <https://github.com/githubuser/boba#readme>
|
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- base >= 4.7 && < 5
|
- base >= 4.7 && < 5
|
||||||
@ -26,7 +27,7 @@ library:
|
|||||||
source-dirs: src
|
source-dirs: src
|
||||||
|
|
||||||
executables:
|
executables:
|
||||||
boba-exe:
|
boba:
|
||||||
main: Main.hs
|
main: Main.hs
|
||||||
source-dirs: app
|
source-dirs: app
|
||||||
ghc-options:
|
ghc-options:
|
||||||
|
19
src/Lib.hs
19
src/Lib.hs
@ -1,6 +1,15 @@
|
|||||||
module Lib
|
module Lib where
|
||||||
( someFunc
|
|
||||||
) where
|
|
||||||
|
|
||||||
someFunc :: IO ()
|
import Parsing
|
||||||
someFunc = putStrLn "someFunc"
|
import Types
|
||||||
|
|
||||||
|
htmlToString :: HTML -> String
|
||||||
|
htmlToString (Text s) = s
|
||||||
|
htmlToString (Node (Normal t) a c) = "<" ++ t ++ concatMap attrToString a ++ ">" ++ concatMap htmlToString c ++ "</" ++ t ++ ">"
|
||||||
|
htmlToString (Node (Void t) a _) = "<" ++ t ++ concatMap attrToString a ++ ">"
|
||||||
|
htmlToString (Node (Invalid t) _ _) = ""
|
||||||
|
htmlToString (Root c) = concatMap htmlToString c
|
||||||
|
|
||||||
|
attrToString :: Attr -> String
|
||||||
|
attrToString (K k) = ' ' : k
|
||||||
|
attrToString (KV k v) = ' ' : k ++ "=\"" ++ v ++ "\""
|
||||||
|
94
src/Parsing.hs
Normal file
94
src/Parsing.hs
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
module Parsing where
|
||||||
|
|
||||||
|
import Data.Char (isSpace)
|
||||||
|
import Data.Function (on)
|
||||||
|
import Data.List (groupBy)
|
||||||
|
import Types
|
||||||
|
|
||||||
|
stringToHTML :: String -> HTML
|
||||||
|
stringToHTML s = let (_, _, a, _) = recurseHTMLString (stringToTags s, [], [], []) in reverseHTML $ Root a
|
||||||
|
|
||||||
|
recurseHTMLString :: ([ParsedTag], [ParsedTag], [HTML], [[HTML]]) -> ([ParsedTag], [ParsedTag], [HTML], [[HTML]])
|
||||||
|
recurseHTMLString ([], _, a, b) = ([], [], a, b)
|
||||||
|
recurseHTMLString (TextTag text : xs, ys, node, nodes) = recurseHTMLString (xs, ys, Text (trim text) : node, nodes)
|
||||||
|
recurseHTMLString (ClosingTag t : xs, ys, node, nodes) =
|
||||||
|
let z : zs = ys
|
||||||
|
in recurseHTMLString (xs, zs, Node t (attrs z) node : head nodes, tail nodes)
|
||||||
|
recurseHTMLString (x : xs, ys, node, nodes) = case tag x of
|
||||||
|
Normal _ -> recurseHTMLString (xs, x : ys, [], node : nodes)
|
||||||
|
Invalid _ -> recurseHTMLString (xs, ys, node, nodes)
|
||||||
|
t -> recurseHTMLString (xs, ys, Node t (attrs x) [] : node, nodes)
|
||||||
|
|
||||||
|
reverseHTML :: HTML -> HTML
|
||||||
|
reverseHTML (Root c) = Root (reverse $ map reverseHTML c)
|
||||||
|
reverseHTML (Node t a c) = Node t a (reverse $ map reverseHTML c)
|
||||||
|
reverseHTML node = node
|
||||||
|
|
||||||
|
stringToTags :: String -> [ParsedTag]
|
||||||
|
stringToTags s =
|
||||||
|
filter validTag $
|
||||||
|
map
|
||||||
|
(pairToTag . foldr tagFolding ("", 0))
|
||||||
|
( groupBy tagGrouping $
|
||||||
|
zip s $
|
||||||
|
zipWith max <$> drop 1 <*> map (* 2) $
|
||||||
|
map (\x -> if x == 0 then 0 else 1) $
|
||||||
|
zipWith min (scanl normalTagScanning 0 s) (reverse $ scanl reverseTagScanning 0 $ reverse s)
|
||||||
|
)
|
||||||
|
|
||||||
|
validTag :: ParsedTag -> Bool
|
||||||
|
validTag (TextTag _) = True
|
||||||
|
validTag t = case tag t of
|
||||||
|
Invalid _ -> False
|
||||||
|
_ -> True
|
||||||
|
|
||||||
|
pairToTag :: (String, Int) -> ParsedTag
|
||||||
|
pairToTag (s, 0) = TextTag s
|
||||||
|
pairToTag (s, _) =
|
||||||
|
let (tag : attrs) = words $ contents s
|
||||||
|
in if head tag == '/'
|
||||||
|
then case getTag $ tail tag of
|
||||||
|
Normal v -> ClosingTag $ Normal v
|
||||||
|
t -> ClosingTag $ Invalid $ name t
|
||||||
|
else OpeningTag (getTag tag) $ map extractAttrs attrs
|
||||||
|
|
||||||
|
extractAttrs :: String -> Attr
|
||||||
|
extractAttrs s =
|
||||||
|
if '=' `elem` s
|
||||||
|
then let (k, v) = break (== '=') s in KV k $ if (||) <$> (== "'") <*> (== "\"") $ take 1 v then contents v else v
|
||||||
|
else K s
|
||||||
|
|
||||||
|
tagScanning :: Int -> Char -> Int
|
||||||
|
tagScanning 1 '\'' = 2
|
||||||
|
tagScanning 2 '\'' = 1
|
||||||
|
tagScanning 1 '"' = 3
|
||||||
|
tagScanning 3 '"' = 1
|
||||||
|
tagScanning 1 '`' = 4
|
||||||
|
tagScanning 4 '`' = 1
|
||||||
|
tagScanning n _ = n
|
||||||
|
|
||||||
|
normalTagScanning :: Int -> Char -> Int
|
||||||
|
normalTagScanning 0 '<' = 1
|
||||||
|
normalTagScanning 1 '>' = 0
|
||||||
|
normalTagScanning a b = tagScanning a b
|
||||||
|
|
||||||
|
reverseTagScanning :: Int -> Char -> Int
|
||||||
|
reverseTagScanning 0 '>' = 1
|
||||||
|
reverseTagScanning 1 '<' = 0
|
||||||
|
reverseTagScanning a b = tagScanning a b
|
||||||
|
|
||||||
|
tagFolding :: (Char, Int) -> (String, Int) -> (String, Int)
|
||||||
|
tagFolding (c, i) (s, _) = (c : s, i)
|
||||||
|
|
||||||
|
tagGrouping :: (Char, Int) -> (Char, Int) -> Bool
|
||||||
|
tagGrouping (_, a) (_, b) = (0 < a && a < b) || (a == 0 && a == b)
|
||||||
|
|
||||||
|
contents :: String -> String
|
||||||
|
contents "" = ""
|
||||||
|
contents [c] = ""
|
||||||
|
contents s = init . tail $ s
|
||||||
|
|
||||||
|
trim :: String -> String
|
||||||
|
trim = f . f
|
||||||
|
where
|
||||||
|
f = reverse . dropWhile isSpace
|
134
src/Types.hs
Normal file
134
src/Types.hs
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
module Types where
|
||||||
|
|
||||||
|
import Data.Char (toLower)
|
||||||
|
import Data.List (find)
|
||||||
|
import qualified Data.Maybe
|
||||||
|
|
||||||
|
data HTML = Text String | Node {htmlType :: Tag, attributes :: [Attr], children :: [HTML]} | Root {ch :: [HTML]} deriving (Eq, Show, Read)
|
||||||
|
|
||||||
|
data ParsedTag = OpeningTag {tag :: Tag, attrs :: [Attr]} | ClosingTag {tag :: Tag} | TextTag {text :: String} deriving (Eq, Show, Read)
|
||||||
|
|
||||||
|
data Attr = K String | KV String String deriving (Eq, Show, Read)
|
||||||
|
|
||||||
|
data Tag = Normal {name :: String} | Void {name :: String} | Invalid {name :: String} deriving (Eq, Show, Read)
|
||||||
|
|
||||||
|
getTag :: String -> Tag
|
||||||
|
getTag t = Data.Maybe.fromMaybe (Invalid $ map toLower t) (find ((== map toLower t) . name) tags)
|
||||||
|
|
||||||
|
tags :: [Tag]
|
||||||
|
tags =
|
||||||
|
[ Void "!doctype",
|
||||||
|
Normal "a",
|
||||||
|
Normal "abbr",
|
||||||
|
Normal "address",
|
||||||
|
Void "area",
|
||||||
|
Normal "article",
|
||||||
|
Normal "aside",
|
||||||
|
Normal "audio",
|
||||||
|
Normal "b",
|
||||||
|
Void "base",
|
||||||
|
Normal "bdi",
|
||||||
|
Normal "bdo",
|
||||||
|
Normal "blockquote",
|
||||||
|
Normal "body",
|
||||||
|
Void "br",
|
||||||
|
Normal "button",
|
||||||
|
Normal "canvas",
|
||||||
|
Normal "caption",
|
||||||
|
Normal "cite",
|
||||||
|
Normal "code",
|
||||||
|
Void "col",
|
||||||
|
Normal "colgroup",
|
||||||
|
Normal "data",
|
||||||
|
Normal "datalist",
|
||||||
|
Normal "dd",
|
||||||
|
Normal "del",
|
||||||
|
Normal "details",
|
||||||
|
Normal "dfn",
|
||||||
|
Normal "dialog",
|
||||||
|
Normal "div",
|
||||||
|
Normal "dl",
|
||||||
|
Normal "dt",
|
||||||
|
Normal "em",
|
||||||
|
Void "embed",
|
||||||
|
Normal "fieldset",
|
||||||
|
Normal "figure",
|
||||||
|
Normal "footer",
|
||||||
|
Normal "form",
|
||||||
|
Normal "h1",
|
||||||
|
Normal "h2",
|
||||||
|
Normal "h3",
|
||||||
|
Normal "h4",
|
||||||
|
Normal "h5",
|
||||||
|
Normal "h6",
|
||||||
|
Normal "head",
|
||||||
|
Normal "header",
|
||||||
|
Normal "hgroup",
|
||||||
|
Void "hr",
|
||||||
|
Normal "html",
|
||||||
|
Normal "i",
|
||||||
|
Normal "iframe",
|
||||||
|
Void "img",
|
||||||
|
Void "input",
|
||||||
|
Normal "ins",
|
||||||
|
Normal "kbd",
|
||||||
|
Normal "keygen",
|
||||||
|
Normal "label",
|
||||||
|
Normal "legend",
|
||||||
|
Normal "li",
|
||||||
|
Void "link",
|
||||||
|
Normal "main",
|
||||||
|
Normal "map",
|
||||||
|
Normal "mark",
|
||||||
|
Normal "menu",
|
||||||
|
Void "menuitem",
|
||||||
|
Void "meta",
|
||||||
|
Normal "meter",
|
||||||
|
Normal "nav",
|
||||||
|
Normal "noscript",
|
||||||
|
Normal "object",
|
||||||
|
Normal "ol",
|
||||||
|
Normal "optgroup",
|
||||||
|
Normal "option",
|
||||||
|
Normal "output",
|
||||||
|
Normal "p",
|
||||||
|
Void "param",
|
||||||
|
Normal "pre",
|
||||||
|
Normal "progress",
|
||||||
|
Normal "q",
|
||||||
|
Normal "rb",
|
||||||
|
Normal "rp",
|
||||||
|
Normal "rt",
|
||||||
|
Normal "rtc",
|
||||||
|
Normal "ruby",
|
||||||
|
Normal "s",
|
||||||
|
Normal "samp",
|
||||||
|
Normal "script",
|
||||||
|
Normal "section",
|
||||||
|
Normal "select",
|
||||||
|
Normal "small",
|
||||||
|
Void "source",
|
||||||
|
Normal "span",
|
||||||
|
Normal "strong",
|
||||||
|
Normal "style",
|
||||||
|
Normal "sub",
|
||||||
|
Normal "summary",
|
||||||
|
Normal "sup",
|
||||||
|
Normal "table",
|
||||||
|
Normal "tbody",
|
||||||
|
Normal "td",
|
||||||
|
Normal "template",
|
||||||
|
Normal "textarea",
|
||||||
|
Normal "tfoot",
|
||||||
|
Normal "th",
|
||||||
|
Normal "thead",
|
||||||
|
Normal "time",
|
||||||
|
Normal "title",
|
||||||
|
Normal "tr",
|
||||||
|
Void "track",
|
||||||
|
Normal "u",
|
||||||
|
Normal "ul",
|
||||||
|
Normal "var",
|
||||||
|
Normal "video",
|
||||||
|
Void "wbr"
|
||||||
|
]
|
@ -3,3 +3,5 @@ resolver:
|
|||||||
|
|
||||||
packages:
|
packages:
|
||||||
- .
|
- .
|
||||||
|
|
||||||
|
system-ghc: true
|
||||||
|
Loading…
Reference in New Issue
Block a user