#!/usr/local/bin/runhugs module Main(main) where -- change above to runhugs pathname, if wrong. import Pretty import System(getArgs, exitWith, ExitCode(ExitSuccess,ExitFailure)) import GenParse(Parser,ap,rep,rep1,opt,sat,lit,eof,residue) -- import Char -- HBC versionString = "halfp.gs Time-stamp: <99/02/03 11:24:11 peter>\n" {- Program to layout half programs in a beaurocratic style. The following incantations may build a compiled version, if you this file is called halfpp.gs, and you have hbc installed. (You also need GenParse.lhs.) sed -e '/^#!/d' -e '/HBC/s/^-- //' < halfpp.gs > halfpp.hs hbcmake halfpp But you might also need a copy of Peyton-Jones's Pretty.lhs, maybe in /usr/local/share/hugs/lib/hugs/Pretty.lhs The one I use is in my root directory in Chalmers. I think someone has recently implemented another pretty printing module, based on an idea of Phil Wadler's. The program was written partly to get acquainted with pretty printing. It is very hacky, and (here and there) rather broken. -} ----- say hi to the customer fill = fsep . map text . words . unlines -- doesn't work like I expect.. verb = vcat . map text help = vcat [ blurb , usage , options , commentBlurb , text "\n" ] blurb = fill [ "Experimental Filter that reads free-form half code on stdin" , "and rewrites it in a beaurocratic style on stdout." , "Doesn't tackle decl, or #include.\n"] usage = text "Usage" $$ nest 20 (text " halfp [-V|--version|-h|--help|[-w|--width]|[-n] ]* ") options = text "Options" $$ nest 20 optiontab where optiontab = verb [opth, optv, optl, optn] opth = "-h or --help prints this message (implies -n)" optv = "-V or --version gives the version time-stamp" optl = "-w (or --width) sets the page width" optn = "-n means don't do anything" commentBlurb = text "Comment handling" $$ nest 20 (vcat [ commenthd , comment1 , comment_example , comment2 ]) commenthd = vcat [ text "All comments are ignored, except for two kinds."] comment1 = fill [ " First, those which begin and end " , " with *two* minus signs, as in \" {-- blah --} \". " , " However, such a comment will cause a parser error " , " UNLESS it is just in front of the left hand side" , " of a definition. " ] comment_example = verb [ " let { {-- blah blah foo blah --}" , " foo = ... , " , " {-- yadda yadda goo yadda --} " , " goo = ... } in ... " ] comment2 = fill [ "Second, the magic comment \"{-*-}\" is treated " , " as a marker which you may use to delimit " , " an expression whose layout you wish " , " to retain: as in " , " \"{-*-} .. nice expression {-*-}\". " , " Please experiment with this to see if it is " , " useful. Note that a piece of text marked in this" , " way must be an *expression*. (Not a definition, or " , " some random agrammatical fragment.) " ] ------------------------------- main :: IO () main = do args <- getArgs (lineLength,exec) <- pa args if not exec then exitWith ExitSuccess else skip contents <- getContents let Just((ok,doc),[]) = ap realtop (scan contents) putdoc lineLength doc -- putStr (render' lineLength doc) putStr "\n" exitWith (if ok then ExitSuccess else ExitFailure 1) where skip = return () putdoc ll = putStr . render' ll pa args = process_flags args 76 True -- defaults. -- always succeeds, consuming entire input. Extremely rude error behaviour. realtop :: Parser Token (Bool,Doc) realtop = do es <- reps defp rest <- residue if null rest then return (True, top2doc es $$ text "") else return (False, top2doc es $$ text "unconsumed: " $$ fsep (map text rest)) process_flags :: [ String ] -> Int -> Bool -> IO (Int,Bool) process_flags args = let pf = process_flags in case args of [] -> \ n f -> return (n,f) ("-h":args) -> \ n f -> do putStr (show help) pf args n False ("--help":args) -> pf ("-h":args) ("-V":args) -> \ n f -> do putStr versionString pf args n f ("--version":args) -> pf ("-V":args) ("-n":args) -> \ n f -> pf args n False ("--noexec":args) -> pf ("-n":args) ("-w":a:args) -> \ n f -> do let n :: Int ; n = read a pf args n f ("--width":a:args ) -> pf ("-w":a:args) (a:args) -> \ n f -> do putStr ("unrecognised: "++a++"\n") pf args n False -- if we do it this way, we get to set the length render' w = fullRender PageMode w (0.33) string_txt "" string_txt (Chr c) = (c :) string_txt (Str s1) = (s1 ++) string_txt (PStr s1) = (s1 ++) -- `abstract' syntax data E = Ap E E | ApM E [E] | Var String | Data [ (String, [(String,E)]) ] | Case E [ (String,[String],E) ] | Con String | Fun String E E | FunM [(String,E)] E | Abs String E | AbsM [ String ] E | Sig [ (String,E) ] | Struct [ (String, E) ] | Sel E String | Set | Type | TheoryT | Theory [ (String, String, E, E) ] | Let (String, String, E, E) E | LetM [(String, String, E, E)] E | Goal String | Nice String deriving Show ----------------------------------- lexer ---------------------------- -- adapt this to stamp lexemes with their line and column? type Token = String scan :: String -> [ Token ] scan [] = [] scan (' ':cs) = scan cs scan ('\t':cs) = scan cs scan ('\n':cs) = scan cs scan ('"':cs) = gather "\"" "\"" cs scan ('{':'-':'*':'-':'}':cs) = ("!{-*-}\n"++t2++"{-*-}"):ts where t:ts = gather "{-*-}" "{-*-}" cs n = length t t1 = drop 5 (take (n - 5) t) t2 = unlines (indent (lines t1)) --ul [l] = l --ul (l:ls) = l ++ "\n" ++ ul ls scan ('{':'-':'-':cs) = gather "{--" "--}" cs scan ('{':'-':cs) = tail (gather "{-" "-}" cs ) scan ('-':'>':cs) = "->" : scan cs scan (c:cs) | c `elem` ".{}=,():\\" = [c]:scan cs scan ('[':cs) = bal cs ('[':) 1 scan (c:cs) | (isAlpha(c) || c == '$' || c == '#') = scan_id cs (c:) scan (c:cs) = if isAscii c then [c]:scan cs else scan cs scan_id :: String -> (String -> String) -> [ Token ] scan_id cs b = case cs of [] -> f (b []) : scan cs (c:cs') -> if (isAlphanum c || c == '\'' || c == '_') then scan_id cs' (b . (c:)) else f (b []) : scan cs -- NB: not cs' where f s = if s `my_elem` keywords then '*':s else s bal :: String -> (String -> String) -> Int -> [ Token ] bal cs = case cs of [] -> bal "]" (c:cs') -> \ b -> case c of '[' -> \n -> bal cs' (b . (c:)) (n+1) ']' -> \n -> if n == 1 then (b[c]): scan cs' else bal cs' (b . (c:)) (n-1) _ -> bal cs' (b . (c:)) gather :: String -> String -> String -> [ Token ] gather init finit str = f str (init++) where emit b = b finit tack c b = b . (c:) (ff:fr) = finit frn = length fr f s = case s of [] -> emit f -- f finit (x:xs) -> if ff == x && fr `prefix` xs then \ b -> emit b : scan (drop frn xs) else f xs . tack x white = " \t\n" gatherWhile :: (a -> Bool) -> [a] -> ([a],[a]) gatherWhile p cs = f cs init where f cs = case cs of (c:cs') | p c -> f cs' . tack c _ -> emit cs init b = b tack c b = b . (c:) emit cs b = (b[], cs) -- we sort the keywords for the sake of a faster lookup. keywords = sort [ "data", "case", "of", "sig", "struct" , "Set","Type" , "let", "in", "theory", "Theory" ] -- see whether something occurs in a sorted list. my_elem x xs = case xs of [] -> False (y:ys) -> if y < x then my_elem x ys else y == x -- a convenience so you can say (take n spaces) to get n spaces. spaces = repeat ' ' -- the amount of white space at the start of a line. indentation :: String -> (Int,String) indentation line = f line 0 where f l = case l of (' ':cs) -> f cs . (+1) ('\t':cs) -> f cs . tab _ -> \n->(n,l) tab n = n + (8 - n `rem` 8) -- remove common indentation, and generally tidy up a bunch of lines. indent :: [String] -> [String] indent [] = [] indent ls = ls' -- trim null (map (drop n) ls') where ils = [ indentation l | l <- ls ] ils'= trim (\(i,l)->null l) ils n = -- smallest indentation foldr min 1000 [ n | (n,l) <- ils', not (null l) ] ls' = [ take (i-n) spaces ++ l | (i,l) <- ils' ] -- white c = c `elem` " \t" trim p = let revto [] = \x->x revto (y:ys) = revto ys . (y:) reverse = (`revto` []) f = reverse . dropWhile p in f . f -- the list of `breaks' of the input, where a break is a pair of -- strings whose concatenation equals the input, and the first -- character of the second string satisfies the predicate. brk :: (a -> Bool) -> [a] -> [([a],[a])] brk p str = f str (\x->x) where f xs b = case xs of [] -> [] (x:xs') | p x -> (b[],xs): f xs' (b . (x:)) (x:xs') | True -> f xs' (b . (x:)) prefix [] xs = True prefix (x:xs) [] = False prefix (x:xs) (y:ys) = x == y && prefix xs ys -------------------------------- parsing ---------------------------- -- general parsing stuff {- type Token = String newtype Parser a = P ([Token] -> Maybe (a,[Token])) ap (P f) s = f s instance Monad Parser where x >>= k = P (\s -> case ap x s of Nothing -> Nothing Just (x,s') -> ap (k x) s' ) return x = P (\s -> Just (x,s)) instance MonadZero Parser where zero = P (\s -> Nothing ) instance MonadPlus Parser where (P f) ++ (P g) = P (\s -> let x = f s in case x of { Nothing -> g s ; _ -> x }) sat :: (Token -> Bool) -> Parser Token sat p = P (\ts-> case ts of (t:ts) -> if p t then Just (t,ts) else Nothing _ -> Nothing) eof :: Parser () eof = P (\ss-> case ss of [] -> Just((),ss) ; _ -> Nothing) residue :: Parser [Token] residue = P (\s-> Just (s,[])) rep, rep1 :: Parser a -> Parser [a] opt :: Parser [a] -> Parser [a] rep p = opt (rep1 p) rep1 p = do x <- p xs <- rep p return (x:xs) opt p = p ++ return [] lit :: String -> Parser Token lit = sat . (==) -} ---------------- parsing stuff specific to half -- reps (maybe empty) lists of p's separated by commas reps = opt . reps1 reps1 p = do x <- p xs <- rep (do lit "," ; p ) return (x:xs) type Parser' a = Parser Token a identifier, constructor :: Parser' String identifier = sat (\(c:cs)-> isAlpha c) constructor = sat (\(c:cs)-> c == '$') goal = sat (\(c:cs)-> c == '[') comment = sat (\(c:cs)-> c == '{') leave = sat (\(c:cs)-> c == '!') quotedp = sat (\(c:cs)-> c == '"') -- constructs that are bracketed in some way brackg :: (Parser' a,Parser' b) -> Parser' c -> Parser' c brackg (bra,ket) p = do { bra ; x <- p ; ket ; return x } curly = brackg (lit "{",lit "}") include_linep = do lit "#include" ; quotedp defp :: Parser' (String, String, E, E ) defp = do -- opt include_linep -- allow an include line anywhere cmt <- comment ++ return "" nm <- identifier tl <- opt telep lit "=" ; body <- rhsp lit ":" ; t <- rhsp return (nm, cmt, foldr (\(nm,ty)-> Fun nm ty) t tl, foldr (\(nm,ty)-> Abs nm) body tl) dclp :: Parser' (String,E) dclp = do nm <- identifier lit ":" ; t <- rhsp return (nm,t) -- subatomic expressions : no proper prefix accepted sexp = concat [ identifier >>= return . Var , constructor >>= return . Con , lit "*Set" >> return Set , lit "*Type" >> return Type , lit "*Theory" >> return TheoryT , brackg (lit "(", lit ")") rhsp , goal >>= return . Goal , nicep ] -- atomic expressions : can have selections aexp = do e <- sexp nms <- rep (lit "." >> identifier) return (foldl Sel e nms) -- molecular expressions : can have applications mexp = do e <- aexp es <- rep aexp return (foldl Ap e es) -- right hand side of a definition. (Mustn't fail.) rhsp = concat [ mexp , do abs <- absp e <- rhsp return (foldr Abs e abs) , do dcs <- funp e <- rhsp return (foldr (\(nm,ty)->Fun nm ty) e dcs) , do dfs <- letp e <- rhsp return (foldr Let e dfs) , sigp , structp , datap , theoryp , casep ] -- maybe put some useful syntax error pattern here letp = brackg (lit "*let", lit "*in" ) (curly (reps defp)) absp = brackg (lit "\\" , lit "->" ) (rep identifier) funp = brackg (lit "(" , (lit ")" >> lit "->")) (reps1 dclp) nicep= leave >>= return . Nice . tail sigp = do lit "*sig" ; curly (reps sige) >>= return . Sig structp = do lit "*struct" ; curly (reps structe) >>= return . Struct datap = do lit "*data" ; curly (reps datae) >>= return . Data theoryp = do lit "*theory" ; curly (reps defp) >>= return . Theory casep = do disc <- brackg (lit "*case",lit "*of") rhsp bd <- curly (reps caseb) return (Case disc bd) sige :: Parser' (String,E) sige = dclp structe :: Parser' (String,E) structe = do nm <- identifier lit "=" ; e <- rhsp return (nm,e) caseb :: Parser' (String,[String],E) caseb = do cn <- constructor nms <- rep identifier lit "->" ; r <- rhsp return (cn,nms,r) datae :: Parser' (String,[(String,E)]) datae = do cnm <- constructor t <- telep return (cnm,t) telep :: Parser' [(String,E)] telep = opt (brackg (lit "(",lit ")") (reps1 dclp)) --------------------------------- printing ------------------------------ -- transform an expression into one in which Ap, Fun, Abs and Let -- do not occur, but only ApM, FunM, AbsM, LetM. toM e = case e of Theory z -> Theory (map g z) where g (nm,cmt,e1,e2) = (nm,cmt,toM e1,toM e2) Ap e1 e2 -> toM (ApM e1 [e2]) ApM e1 es -> f e1 es where f (ApM e es) = f e . (es++) f (Ap e1 e2) = f e1 . (e2:) f e = ApM (toM e) . map toM Sel e nm -> Sel (toM e) nm Var nm -> e Con nm -> e Data z -> Data (map (\(cn,t)->(cn,map(\(nm,e)->(nm,toM e))t)) z) Case e z -> Case (toM e) (map (\(nm,nms,b)->(nm,nms,toM b)) z) Sig z -> Sig (map (\(nm,t)->(nm,toM t)) z) Struct z -> Struct (map (\(nm,t)->(nm,toM t)) z) Fun nm d r-> toM (FunM [(nm,d)] r) FunM nmds r -> f (nmds++) r where f b (Fun nm d e) = f (b . ((nm,d):)) e f b (FunM nmds e)= f (b . (nmds++)) e f b e = FunM (map g (b [])) (toM e) g (nm,d) = (nm,toM d) Abs nm e -> toM (AbsM [nm] e) AbsM nms e-> f (nms++) e where f b (Abs nm e) = f (b . (nm:)) e f b (AbsM nms e) = f (b . (nms++)) e f b e = AbsM (b []) (toM e) Let def e -> toM (LetM [def] e) LetM defs e3 -> f (defs++) e3 where f b (Let def e3) = f (b.(def:)) e3 f b (LetM defs e3) = f (b.(defs++)) e3 f b e = LetM (map g (b [])) (toM e) g (nm,cmt,e1,e2) = (nm,cmt,toM e1,toM e2) Set -> e Type -> e TheoryT -> e Goal str -> e Nice str -> e _ -> error ("toM unimplemented "++ show e) parenthesize e = (case e of Theory z -> parens Sig z -> parens Struct z -> parens Fun nm d e -> parens FunM dcls e -> parens AbsM nms e -> parens Abs nm e -> parens ApM e es -> parens Ap e1 e2 -> parens -- Sel e nm -> parens LetM ds e -> parens Case e z -> parens _ -> \x-> x ) (toDoc e) -- transform an expression into a `Doc', so as to use the Pretty library. toDoc :: E -> Doc toDoc e = case e of Fun nm d e -> (toDoc . toM) (FunM [(nm,d)] e) FunM dcls e -> cat [ tele dcls , text "->" <+> toDoc e ] AbsM nms e -> sep [ text "\\" <+> sep (map text nms) <+> text "->" , toDoc e ] Abs nm e -> toDoc (AbsM [nm] e) ApM e es -> fsep (toDoc e :map parenthesize es) Ap e1 e2 -> toDoc (ApM e1 [e2]) Theory ds -> sep [ text "theory {" , letpart ds ] LetM ds e -> sep [ sep [ text "let {" , letpart ds ] , text "in " <> toDoc e ] Let d e -> toDoc (LetM [ d ] e) Var nm -> text nm Con nm -> text nm Case e z -> block head b where head = text "case" <+> toDoc e <+> text "of" fe (nm,nms,e) = hang (text nm <+> sep (map text nms) ) k (text "->" <+> toDoc e) fnms = map (\(nm,nms,_)->nm++concat (map (' ':) nms) ) z nmls = map length fnms k = 1 + aesth 9 3 fnms b = map fe z Data x -> block (text "data") b where nms = map (\(nm,z)->nm) x es = map (\(nm,z)->nest 0 (tele z)) x b = tab nms es Sig x -> block (text "sig") b where nms = map (\(nm,z)->nm) x es = map (\(nm,z)->text ":" <+> toDoc z) x b = tab nms es Struct x -> block (text "struct") b where nms = map (\(nm,z)->nm) x es = map (\(nm,z)->text "=" <+> toDoc z) x b = tab nms es Sel e m -> parenthesize e <> text "." <> text m Set -> text "Set" Type -> text "Type" TheoryT -> text "Theory" Goal str -> text str Nice str -> (vcat . map text . lines) str _ -> error ("toDoc unimplemented: "++show e) -- make a document for a telescope tele z = let nms = map (\(nm,e)->nm) z es = map (\(nm,e)->text ":" <+> toDoc e) z p = (take ((length nms) - 1) (repeat (text ",")))++[text " )"] k = 1 + aesth 6 3 nms b = zipWith (<>) [ nest 2 (text nm $$ nest k (text ":" <+> toDoc e) ) | (nm,e) <- z ] p b' = case b of [] -> empty -- text "" _ -> text "(" $$ sep b in b' -- make a document for an equation layeqn k (nm,cmt,ty,bd) = if cmt == "" then x else vcat [ text cmt , text "", x ] where x = lhs $$ nest (k+1) rhs lhs = text nm rhs = vcat [ text "=" <+> toDoc (toM bd) , text ":" <+> toDoc (toM ty) ] -- assumes "{" has been done. letpart ds = if (null ds) then text "}" else nest 2 b where nms = map (\(nm,cmt,t,e)->nm) ds ne = length ds commas = repeat (text ",\n") punct = take (ne - 1) commas ++ [text " }"] k = aesth 5 2 nms eqns = map (layeqn k) ds b = vcat (zipWith (<>) eqns punct) -- format a list of definitions for the top level top2doc ds = vcat b where nms = map (\(nm,cmt,t,e)->nm) ds ne = length ds commas = repeat (text ",\n") punct = take (ne - 1) commas ++ [empty] k = aesth 4 2 nms eqns = map (layeqn k) ds b = zipWith (<>) eqns punct tab nms es = let ls = map length nms lc = map text nms rc = map (nest k) es t = zipWith ($$) lc rc k = 1 + aesth 6 2 nms in t -- try to pick an aesthetically pleasing (!) split point, given a list of names aesth mx stp nms = let lls = map length nms x = sort lls k' = case x of [] -> 0 [x] -> if x < 1 + mx then x else stp (x:xs) -> f xs x f [] n = n f (x:xs)n = if x < 1 + (n+stp) `min` mx then f xs x else n in k' -- if k > 10 then 1 else k -- layout a description table block lab ds = sep ( lab <+> text "{" : ds' ) where n = length ds pnct = take (n-1) (repeat (text ",")) ++ [text " }"] ds' = if n == 0 then [ text "}" ] else map (nest 2) (zipWith (<>) ds pnct) ----------------------------------------------------------- -- utilities and administrivia. sort :: Ord a => [a]->[a] sort [] = [] sort (x:xs) = sort [ y | y <- xs, y < x ] ++ [x] ++ sort [ y | y <- xs, y > x ] {- data T = TN | TC String T T my_elem' x TN = False my_elem' x (TC y t1 t2) = if x < y then my_elem' x t1 else x == y || my_elem' x t2 sort' [] = TN sort' (x:xs) = TC x (sort' [ y | y <- xs, y < x ]) (sort' [ y | y <- xs, y > x ]) -- need a function to balance a tree. -}