-- -- File Name -- dnet.hs -- -- Author / Date -- Jude Nagurney / 02002-02003 -- -- Description -- Searching discrimination net space... -- -- Copyright and License Info -- Copyright (C) 02002-02003 Jude Nagurney -- -- This program is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2, or (at your option) -- any later version. -- -- This program is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program; if not, write to the Free Software -- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- -- The original author can be contacted at gazook@atdot.org, -- Jude Nagurney, 1001 North Randolph Street, #622, Arlington, -- VA, 22201-5607, USA. -- -- Change Log -- First Deployed ??? -- 02-27-02003: Added convertHtmlChar and convertHtmlString functions -- 06-17-02003: Added "Last built at" line. -- ================================= import System(getArgs) import MyUtils(getCmdOutput) -- ----------------- -- Utility Functions doFirst :: [a] -> (a->Bool) -> (a->a) -> [a] doFirst [] _ _ = [] doFirst (a:rest) doTest doMod = if (doTest a) then ((doMod a):rest) else (a:(doFirst rest doTest doMod)) isMember :: Eq a => a -> [a] -> Bool isMember x [] = False isMember x (a:rest) = if x == a then True else isMember x rest -- The empty subset is a subset of every set.... isSubset :: Eq a => [a] -> [a] -> Bool isSubset [] b = True isSubset (a:rest) b = (isMember a b) && (isSubset rest b) removeItem :: Eq a => [a] -> a -> [a] removeItem [] b = [] removeItem (a:rest) b = if a == b then removeItem rest b else (a:(removeItem rest b)) isNL :: Char -> Bool isNL '\n' = True isNL _ = False isNotNL :: Char -> Bool isNotNL '\n' = False isNotNL _ = True addComma :: String -> String -> String addComma a "" = a addComma a b = a ++ "," ++ b -- TODO: Get rid of these character-specific functions, and -- come up with a better way to test for "no commas or plus signs in s for -- the last case of readSearchWord isComma :: Char -> Bool isComma ',' = True isComma _ = False isPlusSign :: Char -> Bool isPlusSign '+' = True isPlusSign _ = False matchChar :: Char -> Char -> Bool matchChar a b = a == b isSearchWordDelim :: Char -> Bool isSearchWordDelim s = isComma s || isPlusSign s notSearchWordDelim :: Char -> Bool notSearchWordDelim s = not (isSearchWordDelim s) -- A utility for escaping special HTML characters -- There area lot more items I could add here, but "<" and ">" are the ones that caused problems recently convertHtmlChar :: Char -> String convertHtmlChar '<' = "<" convertHtmlChar '>' = ">" convertHtmlChar '&' = "&" convertHtmlChar '"' = """ convertHtmlChar x = (x:[]) convertHtmlString :: String -> String convertHtmlString (c:rest) = (convertHtmlChar c) ++ (convertHtmlString rest) convertHtmlString [] = "" -- ------------------- -- The discnet goodies data SearchWord = SWord String deriving (Eq, Ord) printSearchWord :: SearchWord -> String printSearchWord (SWord a) = a printSearchWordList :: [SearchWord] -> String printSearchWordList [] = "" printSearchWordList [a] = show a printSearchWordList (a:rest) = show a ++ " " ++ show rest instance Show SearchWord where show a = printSearchWord a showList a = showString (printSearchWordList a) readSearchWord :: ReadS SearchWord readSearchWord s = [((SWord x),t) | (x,t) <- [span notSearchWordDelim s]] instance Read SearchWord where readsPrec p = readSearchWord -- ---------------------------------------- data SearchList = SList [SearchWord] deriving (Eq) printSearchList :: SearchList -> String printSearchList (SList a) = printSearchWordList a instance Show SearchList where show a = printSearchList a readSearchList :: ReadS SearchList readSearchList s = [((SList (x:xs)),v) | (x,t) <- reads s, ("+",u) <- [span (matchChar '+') t], ((SList xs),v) <- reads u] -- Handle the 1-word base case. -- Not checking the span allowed the parsing to stop too soon -- TODO: There has to be a no-lookahead version of this. ++ [ ((SList [x]),t) | (x,t) <- reads s, ("",t) <- [span (matchChar '+') t]] instance Read SearchList where readsPrec p = readSearchList -- ----------------------------------------- data SearchResult = SResult String deriving (Eq) printSearchResult :: SearchResult -> String printSearchResult (SResult a) = a -- TODO: start using mapping functions here printSearchResultList :: [SearchResult] -> String printSearchResultList [] = "" printSearchResultList [(SResult a)] = a printSearchResultList ((SResult a):rest) = a ++ "," ++ show rest instance Show SearchResult where show a = printSearchResult a showList a = showString (printSearchResultList a) readSearchResult :: ReadS SearchResult readSearchResult s = [((SResult x),t) | (x,t) <- [span isNotNL s]] instance Read SearchResult where readsPrec p = readSearchResult -- ----------------------------------------- data SearchTerm = Term SearchList SearchResult deriving (Show,Eq) readSearchTerm :: ReadS SearchTerm readSearchTerm s = [((Term sl r),r1) | ("q",t1) <- [span (matchChar 'q') s], ("=",t2) <- [span (matchChar '=') t1], (sl,u1) <- reads t2, (",",u2) <- [span (matchChar ',') u1], ("v",u3) <- [span (matchChar 'v') u2], ("=",v) <- [span (matchChar '=') u3], (r,r1) <- reads v] readSearchTermList :: ReadS [SearchTerm] readSearchTermList s = [(x:xs,v) | (x,t) <- reads s, ("\n",t2) <- [span (matchChar '\n') t], (xs,v) <- reads t2] ++ [([x],"") | (x,t) <- reads s, ("","") <- lex t] debugSearchTermList :: String -> [SearchTerm] debugSearchTermList s = x where [(x,"")] = readSearchTermList s instance Read SearchTerm where readsPrec p = readSearchTerm readList = readSearchTermList -- ----------------------------------------- -- Discremenation Net Nodes data Dnode = Node [SearchWord] [SearchResult] [Dnode] root :: Dnode root = Node [(SWord "")] [(SResult "")] [] -- ----------------------------------------- data AnchorTag = ATag String String printAtag :: AnchorTag -> String printAtag (ATag [] _) = error "Href Url is null" printAtag (ATag _ []) = error "Href Description is null" printAtag (ATag s_Url s_Descr) = "" ++ s_Descr ++ "" instance Show AnchorTag where show a = printAtag a -- ==================================================== buildDnet :: SearchTerm -> [Dnode] -> [Dnode] -- Input sanity case: All search terms need at least one search word buildDnet (Term (SList []) _) [] = error "SearchTerm Needs Nonnull SearchList" -- Basest of the base cases: one search word with an empty dlist buildDnet (Term (SList [a]) result) [] = [Node [a] [result] []] -- Second least base case: List of words, with an empty dlist (first SearchTerm) buildDnet (Term (SList (a:rest)) result) [] = [Node [a] [] (buildDnet (Term (SList rest) result) [])] -- Third Base Case: Running out of seach words in a non-empty dlist -- Either an existing node can hold the url, or a new leaf needs to be added for it. buildDnet (Term (SList [a]) r) ((Node [s] results children):drest) = if (a == s) then ((Node [s] (r:results) children):drest) else ((Node [s] results children):(buildDnet (Term (SList [a]) r) drest)) -- Non-base case -- Either the word is a substring of the current node's children (remove the matched searchword, -- and push matching down to the kids) -- Or the current node's children are checked. buildDnet (Term (SList searchList) r) ((Node [s] results children):drest) = if (isMember s searchList) then ((Node [s] results (buildDnet (Term (SList (removeItem searchList s)) r) children)):drest) else (Node [s] results children):(buildDnet (Term (SList searchList) r) drest) -- ================================================================== myBuildDnetList [] d = d myBuildDnetList [s] d = buildDnet s d myBuildDnetList (s:rest) d = myBuildDnetList rest (buildDnet s d) buildDnetList :: [SearchTerm] -> [Dnode] buildDnetList anyList = myBuildDnetList anyList [] buildDnetList2 :: [SearchTerm] -> [Dnode] -> [Dnode] buildDnetList2 anyList start = myBuildDnetList anyList start -- ================================================================== -- Prune out internal nodes with no results and only 1 child. -- TODO: Instead of a separate pruning stage, have buildDnet handle [SWord] -- dynamically, splittng up the list as needed ! compressDnetItem :: Dnode -> Dnode compressDnetItem (Node s [] [k]) = (Node (s++s1) r2 k3) where (Node s1 r2 k3) = compressDnetItem k compressDnetItem (Node s r k) = (Node s r (compressDnetList k)) compressDnetList :: [Dnode] -> [Dnode] compressDnetList a = map compressDnetItem a -- ================================================================== -- Order the nodes alphabetically orderDnetList :: [Dnode] -> [Dnode] orderDnetList [] = [] orderDnetList [Node s r kids] = [Node s r (orderDnetList kids)] orderDnetList (d:drest) = orderDnetList smaller ++ [(Node (s:srest) r (orderDnetList kids))] ++ orderDnetList larger where (Node (s:srest) r kids) = d smaller = [(Node (bs:bsrest) br bkids) | (Node (bs:bsrest) br bkids) <-drest, bs <= s] larger = [(Node (bs:bsrest) br bkids) | (Node (bs:bsrest) br bkids) <-drest, bs > s] -- ================================================================== -- Dnode output -- ================================================================== myIndent :: Int -> String myIndent x | x < 0 = error "Only Nonegative Indent Values Allowed" | x == 0 = "" | otherwise = " " ++ (myIndent (x-1)) dots :: Int -> String dots x | x < 0 = error "Only Nonegative Indent Values Allowed" | x == 0 = "" | otherwise = "." ++ (dots (x-1)) printDnodeSub :: [SearchResult] -> String printDnodeSub results = if theSubResults == "" then "" else " => [" ++ theSubResults ++ "]" where theSubResults = show results printDnode :: Dnode -> Int -> String printDnode (Node [] [] []) i = "" printDnode (Node searchList results kids) i = (myIndent i) ++ (show searchList) ++ (printDnodeSub results) ++ (if theKidsResults == "" then "" else "\n") ++ theKidsResults where theKidsResults = printDnodeList kids (i+2) -- TODO: Should be able to use mapping functions here printDnodeList :: [Dnode] -> Int -> String printDnodeList [] i = "" printDnodeList [a] i = (myIndent i) ++ (printDnode a i) printDnodeList (a:rest) i = (myIndent i) ++ (printDnode a i) ++ "\n" ++ (printDnodeList rest i) instance Show Dnode where show a = printDnode a 0 showList a = showString (printDnodeList a 0) -- ====================================================== toHtmlSearchResults :: [SearchResult] -> String toHtmlSearchResults [] = "" toHtmlSearchResults ((SResult a):rest) = "X " ++ (toHtmlSearchResults rest) toHtmlDnodeSub :: [SearchResult] -> String toHtmlDnodeSub results = if theSubResults == "" then "" else ": " ++ theSubResults where theSubResults = toHtmlSearchResults results toHtmlDnode :: Dnode -> Int -> String toHtmlDnode (Node [] [] []) i = "" toHtmlDnode (Node searchList results kids) i = (dots i) ++ (convertHtmlString (show searchList)) ++ (toHtmlDnodeSub results) ++ (if theKidsResults == "" then "" else "
\n") ++ theKidsResults where theKidsResults = toHtmlDnodeList2 kids (i+2) toHtmlDnodeList2 :: [Dnode] -> Int -> String toHtmlDnodeList2 [] i = "" toHtmlDnodeList2 [a] i = (dots i) ++ (toHtmlDnode a i) toHtmlDnodeList2 (a:rest) i = (dots i) ++ (toHtmlDnode a i) ++ "
\n" ++ (toHtmlDnodeList2 rest i) -- TODO: Introduce some html specific functions ... toHtmlDnodeList :: [Dnode] -> IO String toHtmlDnodeList dlist = do s_BuildDate <- getCmdOutput "date" return("

" ++ (toHtmlDnodeList2 dlist 0) ++ "

Last built at " ++ s_BuildDate ++ "") -- ======================================================= -- Given a filename, open it and output the searchterms contained inside inputSearchTermList :: String -> IO [([SearchTerm],String)] inputSearchTermList [] = error "No filename given" inputSearchTermList s_FileName = do s_Contents <- readFile s_FileName return(reads s_Contents) inputDnode :: String -> IO [Dnode] inputDnode [] = error "No filename given" inputDnode s_FileName = do s_Contents <- readFile s_FileName let x = reads s_Contents in case x of [] -> error ("No parse of " ++ s_FileName) [(theSList,_)] -> return(orderDnetList(compressDnetList (buildDnetList theSList))) (a:arest) -> error ("Ambigious parse of " ++ s_FileName) outputDnode :: String -> String -> IO () outputDnode [] _ = error "No input filename given" outputDnode _ [] = error "No output filename given" outputDnode s_InputFileName s_OutputFileName = do s_Contents <- readFile s_InputFileName let x = reads s_Contents in case x of [] -> error ("No parse of " ++ s_InputFileName ++ "contents") [(theSList,_)] -> do s_Output <- (toHtmlDnodeList (orderDnetList(compressDnetList( buildDnetList theSList)))) writeFile s_OutputFileName s_Output (a:arest) -> error ("Ambigious parse of " ++ s_InputFileName ++ "contents") -- ---------------------------------- -- RUN THE DISCNET.DAT TRANSFORMATION -- ---------------------------------- begin :: String -> String -> IO() begin a b = outputDnode a b -- TODO: Use some of the mapping functions in those print functions -- TODO: Introduce an Href type, that takes an s_Url and s_Descr and outputs "s_Descr -- TODO: Write a read function that will also import strings of this type and make Href types -- TODO: (This is probably too hard to do with the current list-based approach... -- TODO: Make Dnode generic with respect to SearchResult... e.i. allow a list of anything to be stored -- TODO: Get outputDnode working when the input string is reading in a list of STerms with Href results !!:q main :: IO() main = do (a:b:rest) <- getArgs begin a b