--
-- File Name
-- dnet.hs
--
-- Author / Date
-- Jude Nagurney / 02002-02003
--
-- Description
-- Searching discrimination net space...
--
-- Copyright and License Info
-- Copyright (C) 02002-02003 Jude Nagurney
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2, or (at your option)
-- any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
--
-- The original author can be contacted at gazook@atdot.org,
-- Jude Nagurney, 1001 North Randolph Street, #622, Arlington,
-- VA, 22201-5607, USA.
--
-- Change Log
-- First Deployed ???
-- 02-27-02003: Added convertHtmlChar and convertHtmlString functions
-- 06-17-02003: Added "Last built at" line.
-- =================================
import System(getArgs)
import MyUtils(getCmdOutput)
-- -----------------
-- Utility Functions
doFirst :: [a] -> (a->Bool) -> (a->a) -> [a]
doFirst [] _ _ = []
doFirst (a:rest) doTest doMod =
if (doTest a) then
((doMod a):rest)
else
(a:(doFirst rest doTest doMod))
isMember :: Eq a => a -> [a] -> Bool
isMember x [] = False
isMember x (a:rest) = if x == a then True else isMember x rest
-- The empty subset is a subset of every set....
isSubset :: Eq a => [a] -> [a] -> Bool
isSubset [] b = True
isSubset (a:rest) b =
(isMember a b) && (isSubset rest b)
removeItem :: Eq a => [a] -> a -> [a]
removeItem [] b = []
removeItem (a:rest) b =
if a == b then removeItem rest b
else (a:(removeItem rest b))
isNL :: Char -> Bool
isNL '\n' = True
isNL _ = False
isNotNL :: Char -> Bool
isNotNL '\n' = False
isNotNL _ = True
addComma :: String -> String -> String
addComma a "" = a
addComma a b = a ++ "," ++ b
-- TODO: Get rid of these character-specific functions, and
-- come up with a better way to test for "no commas or plus signs in s for
-- the last case of readSearchWord
isComma :: Char -> Bool
isComma ',' = True
isComma _ = False
isPlusSign :: Char -> Bool
isPlusSign '+' = True
isPlusSign _ = False
matchChar :: Char -> Char -> Bool
matchChar a b = a == b
isSearchWordDelim :: Char -> Bool
isSearchWordDelim s = isComma s || isPlusSign s
notSearchWordDelim :: Char -> Bool
notSearchWordDelim s = not (isSearchWordDelim s)
-- A utility for escaping special HTML characters
-- There area lot more items I could add here, but "<" and ">" are the ones that caused problems recently
convertHtmlChar :: Char -> String
convertHtmlChar '<' = "<"
convertHtmlChar '>' = ">"
convertHtmlChar '&' = "&"
convertHtmlChar '"' = """
convertHtmlChar x = (x:[])
convertHtmlString :: String -> String
convertHtmlString (c:rest) = (convertHtmlChar c) ++ (convertHtmlString rest)
convertHtmlString [] = ""
-- -------------------
-- The discnet goodies
data SearchWord = SWord String
deriving (Eq, Ord)
printSearchWord :: SearchWord -> String
printSearchWord (SWord a) = a
printSearchWordList :: [SearchWord] -> String
printSearchWordList [] = ""
printSearchWordList [a] = show a
printSearchWordList (a:rest) = show a ++ " " ++ show rest
instance Show SearchWord where
show a = printSearchWord a
showList a = showString (printSearchWordList a)
readSearchWord :: ReadS SearchWord
readSearchWord s = [((SWord x),t)
| (x,t) <- [span notSearchWordDelim s]]
instance Read SearchWord where
readsPrec p = readSearchWord
-- ----------------------------------------
data SearchList = SList [SearchWord]
deriving (Eq)
printSearchList :: SearchList -> String
printSearchList (SList a) = printSearchWordList a
instance Show SearchList where
show a = printSearchList a
readSearchList :: ReadS SearchList
readSearchList s = [((SList (x:xs)),v)
| (x,t) <- reads s,
("+",u) <- [span (matchChar '+') t],
((SList xs),v) <- reads u]
-- Handle the 1-word base case.
-- Not checking the span allowed the parsing to stop too soon
-- TODO: There has to be a no-lookahead version of this.
++ [ ((SList [x]),t)
| (x,t) <- reads s,
("",t) <- [span (matchChar '+') t]]
instance Read SearchList where
readsPrec p = readSearchList
-- -----------------------------------------
data SearchResult = SResult String
deriving (Eq)
printSearchResult :: SearchResult -> String
printSearchResult (SResult a) = a
-- TODO: start using mapping functions here
printSearchResultList :: [SearchResult] -> String
printSearchResultList [] = ""
printSearchResultList [(SResult a)] = a
printSearchResultList ((SResult a):rest) = a ++ "," ++ show rest
instance Show SearchResult where
show a = printSearchResult a
showList a = showString (printSearchResultList a)
readSearchResult :: ReadS SearchResult
readSearchResult s = [((SResult x),t)
| (x,t) <- [span isNotNL s]]
instance Read SearchResult where
readsPrec p = readSearchResult
-- -----------------------------------------
data SearchTerm = Term SearchList SearchResult
deriving (Show,Eq)
readSearchTerm :: ReadS SearchTerm
readSearchTerm s = [((Term sl r),r1)
| ("q",t1) <- [span (matchChar 'q') s],
("=",t2) <- [span (matchChar '=') t1],
(sl,u1) <- reads t2,
(",",u2) <- [span (matchChar ',') u1],
("v",u3) <- [span (matchChar 'v') u2],
("=",v) <- [span (matchChar '=') u3],
(r,r1) <- reads v]
readSearchTermList :: ReadS [SearchTerm]
readSearchTermList s = [(x:xs,v) | (x,t) <- reads s,
("\n",t2) <- [span (matchChar '\n') t],
(xs,v) <- reads t2]
++
[([x],"") | (x,t) <- reads s,
("","") <- lex t]
debugSearchTermList :: String -> [SearchTerm]
debugSearchTermList s = x
where
[(x,"")] = readSearchTermList s
instance Read SearchTerm where
readsPrec p = readSearchTerm
readList = readSearchTermList
-- -----------------------------------------
-- Discremenation Net Nodes
data Dnode = Node [SearchWord] [SearchResult] [Dnode]
root :: Dnode
root = Node [(SWord "")] [(SResult "")] []
-- -----------------------------------------
data AnchorTag = ATag String String
printAtag :: AnchorTag -> String
printAtag (ATag [] _) = error "Href Url is null"
printAtag (ATag _ []) = error "Href Description is null"
printAtag (ATag s_Url s_Descr) = "" ++ s_Descr ++ ""
instance Show AnchorTag where
show a = printAtag a
-- ====================================================
buildDnet :: SearchTerm -> [Dnode] -> [Dnode]
-- Input sanity case: All search terms need at least one search word
buildDnet (Term (SList []) _) [] = error "SearchTerm Needs Nonnull SearchList"
-- Basest of the base cases: one search word with an empty dlist
buildDnet (Term (SList [a]) result) [] = [Node [a] [result] []]
-- Second least base case: List of words, with an empty dlist (first SearchTerm)
buildDnet (Term (SList (a:rest)) result) [] = [Node [a] [] (buildDnet (Term (SList rest) result) [])]
-- Third Base Case: Running out of seach words in a non-empty dlist
-- Either an existing node can hold the url, or a new leaf needs to be added for it.
buildDnet (Term (SList [a]) r) ((Node [s] results children):drest) =
if (a == s) then
((Node [s] (r:results) children):drest)
else
((Node [s] results children):(buildDnet (Term (SList [a]) r) drest))
-- Non-base case
-- Either the word is a substring of the current node's children (remove the matched searchword,
-- and push matching down to the kids)
-- Or the current node's children are checked.
buildDnet (Term (SList searchList) r) ((Node [s] results children):drest) =
if (isMember s searchList) then
((Node [s] results (buildDnet (Term (SList (removeItem searchList s)) r) children)):drest)
else
(Node [s] results children):(buildDnet (Term (SList searchList) r) drest)
-- ==================================================================
myBuildDnetList [] d = d
myBuildDnetList [s] d = buildDnet s d
myBuildDnetList (s:rest) d = myBuildDnetList rest (buildDnet s d)
buildDnetList :: [SearchTerm] -> [Dnode]
buildDnetList anyList = myBuildDnetList anyList []
buildDnetList2 :: [SearchTerm] -> [Dnode] -> [Dnode]
buildDnetList2 anyList start = myBuildDnetList anyList start
-- ==================================================================
-- Prune out internal nodes with no results and only 1 child.
-- TODO: Instead of a separate pruning stage, have buildDnet handle [SWord]
-- dynamically, splittng up the list as needed !
compressDnetItem :: Dnode -> Dnode
compressDnetItem (Node s [] [k]) = (Node (s++s1) r2 k3)
where (Node s1 r2 k3) = compressDnetItem k
compressDnetItem (Node s r k) = (Node s r (compressDnetList k))
compressDnetList :: [Dnode] -> [Dnode]
compressDnetList a = map compressDnetItem a
-- ==================================================================
-- Order the nodes alphabetically
orderDnetList :: [Dnode] -> [Dnode]
orderDnetList [] = []
orderDnetList [Node s r kids] = [Node s r (orderDnetList kids)]
orderDnetList (d:drest) =
orderDnetList smaller ++ [(Node (s:srest) r (orderDnetList kids))] ++ orderDnetList larger
where
(Node (s:srest) r kids) = d
smaller = [(Node (bs:bsrest) br bkids) | (Node (bs:bsrest) br bkids) <-drest, bs <= s]
larger = [(Node (bs:bsrest) br bkids) | (Node (bs:bsrest) br bkids) <-drest, bs > s]
-- ==================================================================
-- Dnode output
-- ==================================================================
myIndent :: Int -> String
myIndent x
| x < 0 = error "Only Nonegative Indent Values Allowed"
| x == 0 = ""
| otherwise = " " ++ (myIndent (x-1))
dots :: Int -> String
dots x
| x < 0 = error "Only Nonegative Indent Values Allowed"
| x == 0 = ""
| otherwise = "." ++ (dots (x-1))
printDnodeSub :: [SearchResult] -> String
printDnodeSub results
= if theSubResults == "" then "" else " => [" ++ theSubResults ++ "]"
where
theSubResults = show results
printDnode :: Dnode -> Int -> String
printDnode (Node [] [] []) i = ""
printDnode (Node searchList results kids) i =
(myIndent i) ++ (show searchList) ++ (printDnodeSub results) ++ (if theKidsResults == "" then "" else "\n") ++ theKidsResults
where
theKidsResults = printDnodeList kids (i+2)
-- TODO: Should be able to use mapping functions here
printDnodeList :: [Dnode] -> Int -> String
printDnodeList [] i = ""
printDnodeList [a] i = (myIndent i) ++ (printDnode a i)
printDnodeList (a:rest) i = (myIndent i) ++ (printDnode a i) ++ "\n" ++
(printDnodeList rest i)
instance Show Dnode where
show a = printDnode a 0
showList a = showString (printDnodeList a 0)
-- ======================================================
toHtmlSearchResults :: [SearchResult] -> String
toHtmlSearchResults [] = ""
toHtmlSearchResults ((SResult a):rest) = "X " ++ (toHtmlSearchResults rest)
toHtmlDnodeSub :: [SearchResult] -> String
toHtmlDnodeSub results
= if theSubResults == "" then "" else ": " ++ theSubResults
where
theSubResults = toHtmlSearchResults results
toHtmlDnode :: Dnode -> Int -> String
toHtmlDnode (Node [] [] []) i = ""
toHtmlDnode (Node searchList results kids) i =
(dots i) ++ (convertHtmlString (show searchList)) ++ (toHtmlDnodeSub results) ++ (if theKidsResults == "" then "" else "
\n") ++ theKidsResults
where
theKidsResults = toHtmlDnodeList2 kids (i+2)
toHtmlDnodeList2 :: [Dnode] -> Int -> String
toHtmlDnodeList2 [] i = ""
toHtmlDnodeList2 [a] i = (dots i) ++ (toHtmlDnode a i)
toHtmlDnodeList2 (a:rest) i = (dots i) ++ (toHtmlDnode a i) ++ "
\n" ++ (toHtmlDnodeList2 rest i)
-- TODO: Introduce some html specific functions ...
toHtmlDnodeList :: [Dnode] -> IO String
toHtmlDnodeList dlist = do
s_BuildDate <- getCmdOutput "date"
return("
" ++ (toHtmlDnodeList2 dlist 0) ++ "
Last built at " ++ s_BuildDate ++ "") -- ======================================================= -- Given a filename, open it and output the searchterms contained inside inputSearchTermList :: String -> IO [([SearchTerm],String)] inputSearchTermList [] = error "No filename given" inputSearchTermList s_FileName = do s_Contents <- readFile s_FileName return(reads s_Contents) inputDnode :: String -> IO [Dnode] inputDnode [] = error "No filename given" inputDnode s_FileName = do s_Contents <- readFile s_FileName let x = reads s_Contents in case x of [] -> error ("No parse of " ++ s_FileName) [(theSList,_)] -> return(orderDnetList(compressDnetList (buildDnetList theSList))) (a:arest) -> error ("Ambigious parse of " ++ s_FileName) outputDnode :: String -> String -> IO () outputDnode [] _ = error "No input filename given" outputDnode _ [] = error "No output filename given" outputDnode s_InputFileName s_OutputFileName = do s_Contents <- readFile s_InputFileName let x = reads s_Contents in case x of [] -> error ("No parse of " ++ s_InputFileName ++ "contents") [(theSList,_)] -> do s_Output <- (toHtmlDnodeList (orderDnetList(compressDnetList( buildDnetList theSList)))) writeFile s_OutputFileName s_Output (a:arest) -> error ("Ambigious parse of " ++ s_InputFileName ++ "contents") -- ---------------------------------- -- RUN THE DISCNET.DAT TRANSFORMATION -- ---------------------------------- begin :: String -> String -> IO() begin a b = outputDnode a b -- TODO: Use some of the mapping functions in those print functions -- TODO: Introduce an Href type, that takes an s_Url and s_Descr and outputs "s_Descr -- TODO: Write a read function that will also import strings of this type and make Href types -- TODO: (This is probably too hard to do with the current list-based approach... -- TODO: Make Dnode generic with respect to SearchResult... e.i. allow a list of anything to be stored -- TODO: Get outputDnode working when the input string is reading in a list of STerms with Href results !!:q main :: IO() main = do (a:b:rest) <- getArgs begin a b