diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a3e046..f7b829f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,5 +7,15 @@ and this project adheres to the [Haskell Package Versioning Policy](https://pvp.haskell.org/). ## Unreleased +### Added +- filters probably ??? maybe searching + +### Changed +- change how the exe works + +## 0.1.0.0 - 2022-08-20 +### Added +- support for both normal and void tag +- attribute parsing +- removal of invalid tag names -## 0.1.0.0 - YYYY-MM-DD diff --git a/README.md b/README.md index 6c6798a..809a099 100644 --- a/README.md +++ b/README.md @@ -1 +1,6 @@ # boba + +pop pop thingies +in other words an absurdly simple :sparkles: **_html_** :sparkles: parser + +the program currently takes std, processes it, outputs it to stdout as a haskell type, and then as a string diff --git a/app/Main.hs b/app/Main.hs index de1c1ab..e74382f 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,6 +1,12 @@ module Main where import Lib +import Parsing +import Types main :: IO () -main = someFunc +main = do + content <- fmap (concat . lines) getContents + print (stringToHTML content) + putStrLn "" + print (htmlToString $ stringToHTML content) diff --git a/boba.cabal b/boba.cabal index 3d71704..a904d63 100644 --- a/boba.cabal +++ b/boba.cabal @@ -6,12 +6,12 @@ cabal-version: 1.12 name: boba version: 0.1.0.0 -description: Please see the README on GitHub at -homepage: https://github.com/githubuser/boba#readme -bug-reports: https://github.com/githubuser/boba/issues -author: Author name here -maintainer: example@example.com -copyright: 2022 Author name here +description: Please see the README on GitLab at +homepage: https://gitlab.com/yaemiku/boba#readme +bug-reports: https://gitlab.com/yaemiku/boba/issues +author: Mikołaj Kubiczek +maintainer: me@yaemiku.dev +copyright: 2022 yaemiku license: BSD3 license-file: LICENSE build-type: Simple @@ -19,13 +19,11 @@ extra-source-files: README.md CHANGELOG.md -source-repository head - type: git - location: https://github.com/githubuser/boba - library exposed-modules: Lib + Parsing + Types other-modules: Paths_boba hs-source-dirs: @@ -34,7 +32,7 @@ library base >=4.7 && <5 default-language: Haskell2010 -executable boba-exe +executable boba main-is: Main.hs other-modules: Paths_boba diff --git a/package.yaml b/package.yaml index c9a5bd8..00ae25e 100644 --- a/package.yaml +++ b/package.yaml @@ -1,14 +1,16 @@ -name: boba -version: 0.1.0.0 -github: "githubuser/boba" -license: BSD3 -author: "Author name here" -maintainer: "example@example.com" -copyright: "2022 Author name here" +name: boba +version: 0.1.0.0 +license: BSD3 +author: "Mikołaj Kubiczek" +maintainer: "me@yaemiku.dev" +copyright: "2022 yaemiku" +description: Please see the README on GitLab at +homepage: https://gitlab.com/yaemiku/boba#readme +bug-reports: https://gitlab.com/yaemiku/boba/issues extra-source-files: -- README.md -- CHANGELOG.md + - README.md + - CHANGELOG.md # Metadata used when publishing your package # synopsis: Short description of your package @@ -17,32 +19,31 @@ extra-source-files: # To avoid duplicated efforts in documentation and dealing with the # complications of embedding Haddock markup inside cabal files, it is # common to point users to the README.md file. -description: Please see the README on GitHub at dependencies: -- base >= 4.7 && < 5 + - base >= 4.7 && < 5 library: source-dirs: src executables: - boba-exe: - main: Main.hs - source-dirs: app + boba: + main: Main.hs + source-dirs: app ghc-options: - - -threaded - - -rtsopts - - -with-rtsopts=-N + - -threaded + - -rtsopts + - -with-rtsopts=-N dependencies: - - boba + - boba tests: boba-test: - main: Spec.hs - source-dirs: test + main: Spec.hs + source-dirs: test ghc-options: - - -threaded - - -rtsopts - - -with-rtsopts=-N + - -threaded + - -rtsopts + - -with-rtsopts=-N dependencies: - - boba + - boba diff --git a/src/Lib.hs b/src/Lib.hs index d36ff27..ea5b792 100644 --- a/src/Lib.hs +++ b/src/Lib.hs @@ -1,6 +1,15 @@ -module Lib - ( someFunc - ) where +module Lib where -someFunc :: IO () -someFunc = putStrLn "someFunc" +import Parsing +import Types + +htmlToString :: HTML -> String +htmlToString (Text s) = s +htmlToString (Node (Normal t) a c) = "<" ++ t ++ concatMap attrToString a ++ ">" ++ concatMap htmlToString c ++ "" +htmlToString (Node (Void t) a _) = "<" ++ t ++ concatMap attrToString a ++ ">" +htmlToString (Node (Invalid t) _ _) = "" +htmlToString (Root c) = concatMap htmlToString c + +attrToString :: Attr -> String +attrToString (K k) = ' ' : k +attrToString (KV k v) = ' ' : k ++ "=\"" ++ v ++ "\"" diff --git a/src/Parsing.hs b/src/Parsing.hs new file mode 100644 index 0000000..683c705 --- /dev/null +++ b/src/Parsing.hs @@ -0,0 +1,94 @@ +module Parsing where + +import Data.Char (isSpace) +import Data.Function (on) +import Data.List (groupBy) +import Types + +stringToHTML :: String -> HTML +stringToHTML s = let (_, _, a, _) = recurseHTMLString (stringToTags s, [], [], []) in reverseHTML $ Root a + +recurseHTMLString :: ([ParsedTag], [ParsedTag], [HTML], [[HTML]]) -> ([ParsedTag], [ParsedTag], [HTML], [[HTML]]) +recurseHTMLString ([], _, a, b) = ([], [], a, b) +recurseHTMLString (TextTag text : xs, ys, node, nodes) = recurseHTMLString (xs, ys, Text (trim text) : node, nodes) +recurseHTMLString (ClosingTag t : xs, ys, node, nodes) = + let z : zs = ys + in recurseHTMLString (xs, zs, Node t (attrs z) node : head nodes, tail nodes) +recurseHTMLString (x : xs, ys, node, nodes) = case tag x of + Normal _ -> recurseHTMLString (xs, x : ys, [], node : nodes) + Invalid _ -> recurseHTMLString (xs, ys, node, nodes) + t -> recurseHTMLString (xs, ys, Node t (attrs x) [] : node, nodes) + +reverseHTML :: HTML -> HTML +reverseHTML (Root c) = Root (reverse $ map reverseHTML c) +reverseHTML (Node t a c) = Node t a (reverse $ map reverseHTML c) +reverseHTML node = node + +stringToTags :: String -> [ParsedTag] +stringToTags s = + filter validTag $ + map + (pairToTag . foldr tagFolding ("", 0)) + ( groupBy tagGrouping $ + zip s $ + zipWith max <$> drop 1 <*> map (* 2) $ + map (\x -> if x == 0 then 0 else 1) $ + zipWith min (scanl normalTagScanning 0 s) (reverse $ scanl reverseTagScanning 0 $ reverse s) + ) + +validTag :: ParsedTag -> Bool +validTag (TextTag _) = True +validTag t = case tag t of + Invalid _ -> False + _ -> True + +pairToTag :: (String, Int) -> ParsedTag +pairToTag (s, 0) = TextTag s +pairToTag (s, _) = + let (tag : attrs) = words $ contents s + in if head tag == '/' + then case getTag $ tail tag of + Normal v -> ClosingTag $ Normal v + t -> ClosingTag $ Invalid $ name t + else OpeningTag (getTag tag) $ map extractAttrs attrs + +extractAttrs :: String -> Attr +extractAttrs s = + if '=' `elem` s + then let (k, v) = break (== '=') s in KV k $ if (||) <$> (== "'") <*> (== "\"") $ take 1 v then contents v else v + else K s + +tagScanning :: Int -> Char -> Int +tagScanning 1 '\'' = 2 +tagScanning 2 '\'' = 1 +tagScanning 1 '"' = 3 +tagScanning 3 '"' = 1 +tagScanning 1 '`' = 4 +tagScanning 4 '`' = 1 +tagScanning n _ = n + +normalTagScanning :: Int -> Char -> Int +normalTagScanning 0 '<' = 1 +normalTagScanning 1 '>' = 0 +normalTagScanning a b = tagScanning a b + +reverseTagScanning :: Int -> Char -> Int +reverseTagScanning 0 '>' = 1 +reverseTagScanning 1 '<' = 0 +reverseTagScanning a b = tagScanning a b + +tagFolding :: (Char, Int) -> (String, Int) -> (String, Int) +tagFolding (c, i) (s, _) = (c : s, i) + +tagGrouping :: (Char, Int) -> (Char, Int) -> Bool +tagGrouping (_, a) (_, b) = (0 < a && a < b) || (a == 0 && a == b) + +contents :: String -> String +contents "" = "" +contents [c] = "" +contents s = init . tail $ s + +trim :: String -> String +trim = f . f + where + f = reverse . dropWhile isSpace \ No newline at end of file diff --git a/src/Types.hs b/src/Types.hs new file mode 100644 index 0000000..efc3e19 --- /dev/null +++ b/src/Types.hs @@ -0,0 +1,134 @@ +module Types where + +import Data.Char (toLower) +import Data.List (find) +import qualified Data.Maybe + +data HTML = Text String | Node {htmlType :: Tag, attributes :: [Attr], children :: [HTML]} | Root {ch :: [HTML]} deriving (Eq, Show, Read) + +data ParsedTag = OpeningTag {tag :: Tag, attrs :: [Attr]} | ClosingTag {tag :: Tag} | TextTag {text :: String} deriving (Eq, Show, Read) + +data Attr = K String | KV String String deriving (Eq, Show, Read) + +data Tag = Normal {name :: String} | Void {name :: String} | Invalid {name :: String} deriving (Eq, Show, Read) + +getTag :: String -> Tag +getTag t = Data.Maybe.fromMaybe (Invalid $ map toLower t) (find ((== map toLower t) . name) tags) + +tags :: [Tag] +tags = + [ Void "!doctype", + Normal "a", + Normal "abbr", + Normal "address", + Void "area", + Normal "article", + Normal "aside", + Normal "audio", + Normal "b", + Void "base", + Normal "bdi", + Normal "bdo", + Normal "blockquote", + Normal "body", + Void "br", + Normal "button", + Normal "canvas", + Normal "caption", + Normal "cite", + Normal "code", + Void "col", + Normal "colgroup", + Normal "data", + Normal "datalist", + Normal "dd", + Normal "del", + Normal "details", + Normal "dfn", + Normal "dialog", + Normal "div", + Normal "dl", + Normal "dt", + Normal "em", + Void "embed", + Normal "fieldset", + Normal "figure", + Normal "footer", + Normal "form", + Normal "h1", + Normal "h2", + Normal "h3", + Normal "h4", + Normal "h5", + Normal "h6", + Normal "head", + Normal "header", + Normal "hgroup", + Void "hr", + Normal "html", + Normal "i", + Normal "iframe", + Void "img", + Void "input", + Normal "ins", + Normal "kbd", + Normal "keygen", + Normal "label", + Normal "legend", + Normal "li", + Void "link", + Normal "main", + Normal "map", + Normal "mark", + Normal "menu", + Void "menuitem", + Void "meta", + Normal "meter", + Normal "nav", + Normal "noscript", + Normal "object", + Normal "ol", + Normal "optgroup", + Normal "option", + Normal "output", + Normal "p", + Void "param", + Normal "pre", + Normal "progress", + Normal "q", + Normal "rb", + Normal "rp", + Normal "rt", + Normal "rtc", + Normal "ruby", + Normal "s", + Normal "samp", + Normal "script", + Normal "section", + Normal "select", + Normal "small", + Void "source", + Normal "span", + Normal "strong", + Normal "style", + Normal "sub", + Normal "summary", + Normal "sup", + Normal "table", + Normal "tbody", + Normal "td", + Normal "template", + Normal "textarea", + Normal "tfoot", + Normal "th", + Normal "thead", + Normal "time", + Normal "title", + Normal "tr", + Void "track", + Normal "u", + Normal "ul", + Normal "var", + Normal "video", + Void "wbr" + ] \ No newline at end of file diff --git a/stack.yaml b/stack.yaml index a03c39e..c69f502 100644 --- a/stack.yaml +++ b/stack.yaml @@ -3,3 +3,5 @@ resolver: packages: - . + +system-ghc: true