got it working B)

This commit is contained in:
Nikola Kubiczek 2022-08-20 03:18:43 +02:00
parent 8c38dbe69e
commit 8f4ed393d2
Signed by: yaemiku
GPG Key ID: ADC039636B3E4AAB
9 changed files with 301 additions and 42 deletions

View File

@ -7,5 +7,15 @@ and this project adheres to the
[Haskell Package Versioning Policy](https://pvp.haskell.org/).
## Unreleased
### Added
- filters probably ??? maybe searching
### Changed
- change how the exe works
## 0.1.0.0 - 2022-08-20
### Added
- support for both normal and void tag
- attribute parsing
- removal of invalid tag names
## 0.1.0.0 - YYYY-MM-DD

View File

@ -1 +1,6 @@
# boba
pop pop thingies
in other words an absurdly simple :sparkles: **_html_** :sparkles: parser
the program currently takes std, processes it, outputs it to stdout as a haskell type, and then as a string

View File

@ -1,6 +1,12 @@
module Main where
import Lib
import Parsing
import Types
main :: IO ()
main = someFunc
main = do
content <- fmap (concat . lines) getContents
print (stringToHTML content)
putStrLn ""
print (htmlToString $ stringToHTML content)

View File

@ -6,12 +6,12 @@ cabal-version: 1.12
name: boba
version: 0.1.0.0
description: Please see the README on GitHub at <https://github.com/githubuser/boba#readme>
homepage: https://github.com/githubuser/boba#readme
bug-reports: https://github.com/githubuser/boba/issues
author: Author name here
maintainer: example@example.com
copyright: 2022 Author name here
description: Please see the README on GitLab at <https://gitlab.com/yaemiku/boba#readme>
homepage: https://gitlab.com/yaemiku/boba#readme
bug-reports: https://gitlab.com/yaemiku/boba/issues
author: Mikołaj Kubiczek
maintainer: me@yaemiku.dev
copyright: 2022 yaemiku
license: BSD3
license-file: LICENSE
build-type: Simple
@ -19,13 +19,11 @@ extra-source-files:
README.md
CHANGELOG.md
source-repository head
type: git
location: https://github.com/githubuser/boba
library
exposed-modules:
Lib
Parsing
Types
other-modules:
Paths_boba
hs-source-dirs:
@ -34,7 +32,7 @@ library
base >=4.7 && <5
default-language: Haskell2010
executable boba-exe
executable boba
main-is: Main.hs
other-modules:
Paths_boba

View File

@ -1,10 +1,12 @@
name: boba
version: 0.1.0.0
github: "githubuser/boba"
license: BSD3
author: "Author name here"
maintainer: "example@example.com"
copyright: "2022 Author name here"
author: "Mikołaj Kubiczek"
maintainer: "me@yaemiku.dev"
copyright: "2022 yaemiku"
description: Please see the README on GitLab at <https://gitlab.com/yaemiku/boba#readme>
homepage: https://gitlab.com/yaemiku/boba#readme
bug-reports: https://gitlab.com/yaemiku/boba/issues
extra-source-files:
- README.md
@ -17,7 +19,6 @@ extra-source-files:
# To avoid duplicated efforts in documentation and dealing with the
# complications of embedding Haddock markup inside cabal files, it is
# common to point users to the README.md file.
description: Please see the README on GitHub at <https://github.com/githubuser/boba#readme>
dependencies:
- base >= 4.7 && < 5
@ -26,7 +27,7 @@ library:
source-dirs: src
executables:
boba-exe:
boba:
main: Main.hs
source-dirs: app
ghc-options:

View File

@ -1,6 +1,15 @@
module Lib
( someFunc
) where
module Lib where
someFunc :: IO ()
someFunc = putStrLn "someFunc"
import Parsing
import Types
htmlToString :: HTML -> String
htmlToString (Text s) = s
htmlToString (Node (Normal t) a c) = "<" ++ t ++ concatMap attrToString a ++ ">" ++ concatMap htmlToString c ++ "</" ++ t ++ ">"
htmlToString (Node (Void t) a _) = "<" ++ t ++ concatMap attrToString a ++ ">"
htmlToString (Node (Invalid t) _ _) = ""
htmlToString (Root c) = concatMap htmlToString c
attrToString :: Attr -> String
attrToString (K k) = ' ' : k
attrToString (KV k v) = ' ' : k ++ "=\"" ++ v ++ "\""

94
src/Parsing.hs Normal file
View File

@ -0,0 +1,94 @@
module Parsing where
import Data.Char (isSpace)
import Data.Function (on)
import Data.List (groupBy)
import Types
stringToHTML :: String -> HTML
stringToHTML s = let (_, _, a, _) = recurseHTMLString (stringToTags s, [], [], []) in reverseHTML $ Root a
recurseHTMLString :: ([ParsedTag], [ParsedTag], [HTML], [[HTML]]) -> ([ParsedTag], [ParsedTag], [HTML], [[HTML]])
recurseHTMLString ([], _, a, b) = ([], [], a, b)
recurseHTMLString (TextTag text : xs, ys, node, nodes) = recurseHTMLString (xs, ys, Text (trim text) : node, nodes)
recurseHTMLString (ClosingTag t : xs, ys, node, nodes) =
let z : zs = ys
in recurseHTMLString (xs, zs, Node t (attrs z) node : head nodes, tail nodes)
recurseHTMLString (x : xs, ys, node, nodes) = case tag x of
Normal _ -> recurseHTMLString (xs, x : ys, [], node : nodes)
Invalid _ -> recurseHTMLString (xs, ys, node, nodes)
t -> recurseHTMLString (xs, ys, Node t (attrs x) [] : node, nodes)
reverseHTML :: HTML -> HTML
reverseHTML (Root c) = Root (reverse $ map reverseHTML c)
reverseHTML (Node t a c) = Node t a (reverse $ map reverseHTML c)
reverseHTML node = node
stringToTags :: String -> [ParsedTag]
stringToTags s =
filter validTag $
map
(pairToTag . foldr tagFolding ("", 0))
( groupBy tagGrouping $
zip s $
zipWith max <$> drop 1 <*> map (* 2) $
map (\x -> if x == 0 then 0 else 1) $
zipWith min (scanl normalTagScanning 0 s) (reverse $ scanl reverseTagScanning 0 $ reverse s)
)
validTag :: ParsedTag -> Bool
validTag (TextTag _) = True
validTag t = case tag t of
Invalid _ -> False
_ -> True
pairToTag :: (String, Int) -> ParsedTag
pairToTag (s, 0) = TextTag s
pairToTag (s, _) =
let (tag : attrs) = words $ contents s
in if head tag == '/'
then case getTag $ tail tag of
Normal v -> ClosingTag $ Normal v
t -> ClosingTag $ Invalid $ name t
else OpeningTag (getTag tag) $ map extractAttrs attrs
extractAttrs :: String -> Attr
extractAttrs s =
if '=' `elem` s
then let (k, v) = break (== '=') s in KV k $ if (||) <$> (== "'") <*> (== "\"") $ take 1 v then contents v else v
else K s
tagScanning :: Int -> Char -> Int
tagScanning 1 '\'' = 2
tagScanning 2 '\'' = 1
tagScanning 1 '"' = 3
tagScanning 3 '"' = 1
tagScanning 1 '`' = 4
tagScanning 4 '`' = 1
tagScanning n _ = n
normalTagScanning :: Int -> Char -> Int
normalTagScanning 0 '<' = 1
normalTagScanning 1 '>' = 0
normalTagScanning a b = tagScanning a b
reverseTagScanning :: Int -> Char -> Int
reverseTagScanning 0 '>' = 1
reverseTagScanning 1 '<' = 0
reverseTagScanning a b = tagScanning a b
tagFolding :: (Char, Int) -> (String, Int) -> (String, Int)
tagFolding (c, i) (s, _) = (c : s, i)
tagGrouping :: (Char, Int) -> (Char, Int) -> Bool
tagGrouping (_, a) (_, b) = (0 < a && a < b) || (a == 0 && a == b)
contents :: String -> String
contents "" = ""
contents [c] = ""
contents s = init . tail $ s
trim :: String -> String
trim = f . f
where
f = reverse . dropWhile isSpace

134
src/Types.hs Normal file
View File

@ -0,0 +1,134 @@
module Types where
import Data.Char (toLower)
import Data.List (find)
import qualified Data.Maybe
data HTML = Text String | Node {htmlType :: Tag, attributes :: [Attr], children :: [HTML]} | Root {ch :: [HTML]} deriving (Eq, Show, Read)
data ParsedTag = OpeningTag {tag :: Tag, attrs :: [Attr]} | ClosingTag {tag :: Tag} | TextTag {text :: String} deriving (Eq, Show, Read)
data Attr = K String | KV String String deriving (Eq, Show, Read)
data Tag = Normal {name :: String} | Void {name :: String} | Invalid {name :: String} deriving (Eq, Show, Read)
getTag :: String -> Tag
getTag t = Data.Maybe.fromMaybe (Invalid $ map toLower t) (find ((== map toLower t) . name) tags)
tags :: [Tag]
tags =
[ Void "!doctype",
Normal "a",
Normal "abbr",
Normal "address",
Void "area",
Normal "article",
Normal "aside",
Normal "audio",
Normal "b",
Void "base",
Normal "bdi",
Normal "bdo",
Normal "blockquote",
Normal "body",
Void "br",
Normal "button",
Normal "canvas",
Normal "caption",
Normal "cite",
Normal "code",
Void "col",
Normal "colgroup",
Normal "data",
Normal "datalist",
Normal "dd",
Normal "del",
Normal "details",
Normal "dfn",
Normal "dialog",
Normal "div",
Normal "dl",
Normal "dt",
Normal "em",
Void "embed",
Normal "fieldset",
Normal "figure",
Normal "footer",
Normal "form",
Normal "h1",
Normal "h2",
Normal "h3",
Normal "h4",
Normal "h5",
Normal "h6",
Normal "head",
Normal "header",
Normal "hgroup",
Void "hr",
Normal "html",
Normal "i",
Normal "iframe",
Void "img",
Void "input",
Normal "ins",
Normal "kbd",
Normal "keygen",
Normal "label",
Normal "legend",
Normal "li",
Void "link",
Normal "main",
Normal "map",
Normal "mark",
Normal "menu",
Void "menuitem",
Void "meta",
Normal "meter",
Normal "nav",
Normal "noscript",
Normal "object",
Normal "ol",
Normal "optgroup",
Normal "option",
Normal "output",
Normal "p",
Void "param",
Normal "pre",
Normal "progress",
Normal "q",
Normal "rb",
Normal "rp",
Normal "rt",
Normal "rtc",
Normal "ruby",
Normal "s",
Normal "samp",
Normal "script",
Normal "section",
Normal "select",
Normal "small",
Void "source",
Normal "span",
Normal "strong",
Normal "style",
Normal "sub",
Normal "summary",
Normal "sup",
Normal "table",
Normal "tbody",
Normal "td",
Normal "template",
Normal "textarea",
Normal "tfoot",
Normal "th",
Normal "thead",
Normal "time",
Normal "title",
Normal "tr",
Void "track",
Normal "u",
Normal "ul",
Normal "var",
Normal "video",
Void "wbr"
]

View File

@ -3,3 +3,5 @@ resolver:
packages:
- .
system-ghc: true