got it working B)

This commit is contained in:
Nikola Kubiczek 2022-08-20 03:18:43 +02:00
parent 8c38dbe69e
commit 8f4ed393d2
Signed by: yaemiku
GPG Key ID: ADC039636B3E4AAB
9 changed files with 301 additions and 42 deletions

View File

@ -7,5 +7,15 @@ and this project adheres to the
[Haskell Package Versioning Policy](https://pvp.haskell.org/). [Haskell Package Versioning Policy](https://pvp.haskell.org/).
## Unreleased ## Unreleased
### Added
- filters probably ??? maybe searching
### Changed
- change how the exe works
## 0.1.0.0 - 2022-08-20
### Added
- support for both normal and void tag
- attribute parsing
- removal of invalid tag names
## 0.1.0.0 - YYYY-MM-DD

View File

@ -1 +1,6 @@
# boba # boba
pop pop thingies
in other words an absurdly simple :sparkles: **_html_** :sparkles: parser
the program currently takes std, processes it, outputs it to stdout as a haskell type, and then as a string

View File

@ -1,6 +1,12 @@
module Main where module Main where
import Lib import Lib
import Parsing
import Types
main :: IO () main :: IO ()
main = someFunc main = do
content <- fmap (concat . lines) getContents
print (stringToHTML content)
putStrLn ""
print (htmlToString $ stringToHTML content)

View File

@ -6,12 +6,12 @@ cabal-version: 1.12
name: boba name: boba
version: 0.1.0.0 version: 0.1.0.0
description: Please see the README on GitHub at <https://github.com/githubuser/boba#readme> description: Please see the README on GitLab at <https://gitlab.com/yaemiku/boba#readme>
homepage: https://github.com/githubuser/boba#readme homepage: https://gitlab.com/yaemiku/boba#readme
bug-reports: https://github.com/githubuser/boba/issues bug-reports: https://gitlab.com/yaemiku/boba/issues
author: Author name here author: Mikołaj Kubiczek
maintainer: example@example.com maintainer: me@yaemiku.dev
copyright: 2022 Author name here copyright: 2022 yaemiku
license: BSD3 license: BSD3
license-file: LICENSE license-file: LICENSE
build-type: Simple build-type: Simple
@ -19,13 +19,11 @@ extra-source-files:
README.md README.md
CHANGELOG.md CHANGELOG.md
source-repository head
type: git
location: https://github.com/githubuser/boba
library library
exposed-modules: exposed-modules:
Lib Lib
Parsing
Types
other-modules: other-modules:
Paths_boba Paths_boba
hs-source-dirs: hs-source-dirs:
@ -34,7 +32,7 @@ library
base >=4.7 && <5 base >=4.7 && <5
default-language: Haskell2010 default-language: Haskell2010
executable boba-exe executable boba
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Paths_boba Paths_boba

View File

@ -1,14 +1,16 @@
name: boba name: boba
version: 0.1.0.0 version: 0.1.0.0
github: "githubuser/boba" license: BSD3
license: BSD3 author: "Mikołaj Kubiczek"
author: "Author name here" maintainer: "me@yaemiku.dev"
maintainer: "example@example.com" copyright: "2022 yaemiku"
copyright: "2022 Author name here" description: Please see the README on GitLab at <https://gitlab.com/yaemiku/boba#readme>
homepage: https://gitlab.com/yaemiku/boba#readme
bug-reports: https://gitlab.com/yaemiku/boba/issues
extra-source-files: extra-source-files:
- README.md - README.md
- CHANGELOG.md - CHANGELOG.md
# Metadata used when publishing your package # Metadata used when publishing your package
# synopsis: Short description of your package # synopsis: Short description of your package
@ -17,32 +19,31 @@ extra-source-files:
# To avoid duplicated efforts in documentation and dealing with the # To avoid duplicated efforts in documentation and dealing with the
# complications of embedding Haddock markup inside cabal files, it is # complications of embedding Haddock markup inside cabal files, it is
# common to point users to the README.md file. # common to point users to the README.md file.
description: Please see the README on GitHub at <https://github.com/githubuser/boba#readme>
dependencies: dependencies:
- base >= 4.7 && < 5 - base >= 4.7 && < 5
library: library:
source-dirs: src source-dirs: src
executables: executables:
boba-exe: boba:
main: Main.hs main: Main.hs
source-dirs: app source-dirs: app
ghc-options: ghc-options:
- -threaded - -threaded
- -rtsopts - -rtsopts
- -with-rtsopts=-N - -with-rtsopts=-N
dependencies: dependencies:
- boba - boba
tests: tests:
boba-test: boba-test:
main: Spec.hs main: Spec.hs
source-dirs: test source-dirs: test
ghc-options: ghc-options:
- -threaded - -threaded
- -rtsopts - -rtsopts
- -with-rtsopts=-N - -with-rtsopts=-N
dependencies: dependencies:
- boba - boba

View File

@ -1,6 +1,15 @@
module Lib module Lib where
( someFunc
) where
someFunc :: IO () import Parsing
someFunc = putStrLn "someFunc" import Types
htmlToString :: HTML -> String
htmlToString (Text s) = s
htmlToString (Node (Normal t) a c) = "<" ++ t ++ concatMap attrToString a ++ ">" ++ concatMap htmlToString c ++ "</" ++ t ++ ">"
htmlToString (Node (Void t) a _) = "<" ++ t ++ concatMap attrToString a ++ ">"
htmlToString (Node (Invalid t) _ _) = ""
htmlToString (Root c) = concatMap htmlToString c
attrToString :: Attr -> String
attrToString (K k) = ' ' : k
attrToString (KV k v) = ' ' : k ++ "=\"" ++ v ++ "\""

94
src/Parsing.hs Normal file
View File

@ -0,0 +1,94 @@
module Parsing where
import Data.Char (isSpace)
import Data.Function (on)
import Data.List (groupBy)
import Types
stringToHTML :: String -> HTML
stringToHTML s = let (_, _, a, _) = recurseHTMLString (stringToTags s, [], [], []) in reverseHTML $ Root a
recurseHTMLString :: ([ParsedTag], [ParsedTag], [HTML], [[HTML]]) -> ([ParsedTag], [ParsedTag], [HTML], [[HTML]])
recurseHTMLString ([], _, a, b) = ([], [], a, b)
recurseHTMLString (TextTag text : xs, ys, node, nodes) = recurseHTMLString (xs, ys, Text (trim text) : node, nodes)
recurseHTMLString (ClosingTag t : xs, ys, node, nodes) =
let z : zs = ys
in recurseHTMLString (xs, zs, Node t (attrs z) node : head nodes, tail nodes)
recurseHTMLString (x : xs, ys, node, nodes) = case tag x of
Normal _ -> recurseHTMLString (xs, x : ys, [], node : nodes)
Invalid _ -> recurseHTMLString (xs, ys, node, nodes)
t -> recurseHTMLString (xs, ys, Node t (attrs x) [] : node, nodes)
reverseHTML :: HTML -> HTML
reverseHTML (Root c) = Root (reverse $ map reverseHTML c)
reverseHTML (Node t a c) = Node t a (reverse $ map reverseHTML c)
reverseHTML node = node
stringToTags :: String -> [ParsedTag]
stringToTags s =
filter validTag $
map
(pairToTag . foldr tagFolding ("", 0))
( groupBy tagGrouping $
zip s $
zipWith max <$> drop 1 <*> map (* 2) $
map (\x -> if x == 0 then 0 else 1) $
zipWith min (scanl normalTagScanning 0 s) (reverse $ scanl reverseTagScanning 0 $ reverse s)
)
validTag :: ParsedTag -> Bool
validTag (TextTag _) = True
validTag t = case tag t of
Invalid _ -> False
_ -> True
pairToTag :: (String, Int) -> ParsedTag
pairToTag (s, 0) = TextTag s
pairToTag (s, _) =
let (tag : attrs) = words $ contents s
in if head tag == '/'
then case getTag $ tail tag of
Normal v -> ClosingTag $ Normal v
t -> ClosingTag $ Invalid $ name t
else OpeningTag (getTag tag) $ map extractAttrs attrs
extractAttrs :: String -> Attr
extractAttrs s =
if '=' `elem` s
then let (k, v) = break (== '=') s in KV k $ if (||) <$> (== "'") <*> (== "\"") $ take 1 v then contents v else v
else K s
tagScanning :: Int -> Char -> Int
tagScanning 1 '\'' = 2
tagScanning 2 '\'' = 1
tagScanning 1 '"' = 3
tagScanning 3 '"' = 1
tagScanning 1 '`' = 4
tagScanning 4 '`' = 1
tagScanning n _ = n
normalTagScanning :: Int -> Char -> Int
normalTagScanning 0 '<' = 1
normalTagScanning 1 '>' = 0
normalTagScanning a b = tagScanning a b
reverseTagScanning :: Int -> Char -> Int
reverseTagScanning 0 '>' = 1
reverseTagScanning 1 '<' = 0
reverseTagScanning a b = tagScanning a b
tagFolding :: (Char, Int) -> (String, Int) -> (String, Int)
tagFolding (c, i) (s, _) = (c : s, i)
tagGrouping :: (Char, Int) -> (Char, Int) -> Bool
tagGrouping (_, a) (_, b) = (0 < a && a < b) || (a == 0 && a == b)
contents :: String -> String
contents "" = ""
contents [c] = ""
contents s = init . tail $ s
trim :: String -> String
trim = f . f
where
f = reverse . dropWhile isSpace

134
src/Types.hs Normal file
View File

@ -0,0 +1,134 @@
module Types where
import Data.Char (toLower)
import Data.List (find)
import qualified Data.Maybe
data HTML = Text String | Node {htmlType :: Tag, attributes :: [Attr], children :: [HTML]} | Root {ch :: [HTML]} deriving (Eq, Show, Read)
data ParsedTag = OpeningTag {tag :: Tag, attrs :: [Attr]} | ClosingTag {tag :: Tag} | TextTag {text :: String} deriving (Eq, Show, Read)
data Attr = K String | KV String String deriving (Eq, Show, Read)
data Tag = Normal {name :: String} | Void {name :: String} | Invalid {name :: String} deriving (Eq, Show, Read)
getTag :: String -> Tag
getTag t = Data.Maybe.fromMaybe (Invalid $ map toLower t) (find ((== map toLower t) . name) tags)
tags :: [Tag]
tags =
[ Void "!doctype",
Normal "a",
Normal "abbr",
Normal "address",
Void "area",
Normal "article",
Normal "aside",
Normal "audio",
Normal "b",
Void "base",
Normal "bdi",
Normal "bdo",
Normal "blockquote",
Normal "body",
Void "br",
Normal "button",
Normal "canvas",
Normal "caption",
Normal "cite",
Normal "code",
Void "col",
Normal "colgroup",
Normal "data",
Normal "datalist",
Normal "dd",
Normal "del",
Normal "details",
Normal "dfn",
Normal "dialog",
Normal "div",
Normal "dl",
Normal "dt",
Normal "em",
Void "embed",
Normal "fieldset",
Normal "figure",
Normal "footer",
Normal "form",
Normal "h1",
Normal "h2",
Normal "h3",
Normal "h4",
Normal "h5",
Normal "h6",
Normal "head",
Normal "header",
Normal "hgroup",
Void "hr",
Normal "html",
Normal "i",
Normal "iframe",
Void "img",
Void "input",
Normal "ins",
Normal "kbd",
Normal "keygen",
Normal "label",
Normal "legend",
Normal "li",
Void "link",
Normal "main",
Normal "map",
Normal "mark",
Normal "menu",
Void "menuitem",
Void "meta",
Normal "meter",
Normal "nav",
Normal "noscript",
Normal "object",
Normal "ol",
Normal "optgroup",
Normal "option",
Normal "output",
Normal "p",
Void "param",
Normal "pre",
Normal "progress",
Normal "q",
Normal "rb",
Normal "rp",
Normal "rt",
Normal "rtc",
Normal "ruby",
Normal "s",
Normal "samp",
Normal "script",
Normal "section",
Normal "select",
Normal "small",
Void "source",
Normal "span",
Normal "strong",
Normal "style",
Normal "sub",
Normal "summary",
Normal "sup",
Normal "table",
Normal "tbody",
Normal "td",
Normal "template",
Normal "textarea",
Normal "tfoot",
Normal "th",
Normal "thead",
Normal "time",
Normal "title",
Normal "tr",
Void "track",
Normal "u",
Normal "ul",
Normal "var",
Normal "video",
Void "wbr"
]

View File

@ -3,3 +3,5 @@ resolver:
packages: packages:
- . - .
system-ghc: true