Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

« Newer Snippets
Older Snippets »
Showing 1-5 of 5 total  RSS 

Haskell Regular Expression Matcher

Basic implementation of Regular Expressions based on "Derivatives of Regular Expressions" by Janusz A. Brzozowski (Journal of Association for Computing Machinery, October 1964)

Not really intended for serious use. Just a proof of concept.

module Regexp
where

import Data.Set (Set)
import Data.Map (Map)
import Monad
import List
import Maybe
import qualified Data.Set as Set
import qualified Data.Map as Map

data Regexp = 
    Zero
  | Match Char       -- matches a single char
  | Not Regexp       -- matches the negation of its argument
  | Prod [Regexp]    -- matches a concatentation of its arguments
  | Sum (Set Regexp) -- matches either of its arguments
  | Star Regexp      -- matches repetitions of its argument (including 0 repetitions)
  deriving (Eq, Ord)

instance Show Regexp where
  show Zero = "0"
  show (Match c) = [c]
  show (Not x)   = '~' : show x
  show (Prod x) = join . (map show)  $ x
  show (Sum x) = "(" ++ ( join . intersperse "|" . (map show) . Set.toList $ x ) ++ ")"
  show (Star x) = "(" ++ show x ++ ")*" 

-- Flagrant abuse of type classes to allow implicit conversion of datatypes into regular
-- expressions.
class Match a where
  match :: a -> Regexp

instance Match Char where
  match c = Match c

instance (Match a) => Match [a] where
  match = con

instance Match Regexp where
  match = id

-- "smart" versions of the constructors, which perform normalisation of the datatype.
-- As long as all regular expressions are built up using these and the match instance
-- for char we can guarantee that structural equality of terms == similarity.
-- This is important to make sure we only generate a finite number of states.
zero :: Regexp 
zero = Zero 

one :: Regexp
one = Prod []

(<+>) :: (Match a, Match b) => a -> b -> Regexp
x <+> y = 
  case (match x, match y) of
    (Zero, b)      -> b
    (a, Zero)      -> a
    (Sum a, Sum b) -> Sum (Set.union a b)
    (Sum a, b)     -> Sum (Set.insert b a)
    (a, Sum b)     -> Sum (Set.insert a b)    
    (a, b)         -> Sum $ Set.fromList [a, b]
  
oneOf :: (Match a) => [a] -> Regexp
oneOf = foldr (<+>) zero

(<*>) :: (Match a, Match b) => a -> b -> Regexp
u <*> v = 
  case (match u, match v) of 
    (Zero, _)         -> zero
    (_, Zero)         -> zero
    (Prod x, Prod y)  -> Prod (x ++ y)
    (Prod x, y)       -> Prod (x ++ [y])
    (x, Prod y)       -> Prod (x : y)
    (x, y)            -> Prod [x, y]

con :: (Match a) => [a] -> Regexp
con = foldr (<*>) one

neg :: (Match a) => a -> Regexp
neg x = 
  case (match x) of
  (Not y) -> y
  y       -> Not y

star :: (Match a) => a -> Regexp
star x =
  case (match x) of
    (Zero)   -> Zero
    (Star y) -> Star y
    y        -> Star y

-- Returns if the regex matches the empty string.
del :: Regexp -> Bool
del (Zero)    = False
del (Sum x)   = or . map del $ Set.toList x
del (Prod x)  = and . map del $ x
del (Match _) = False
del (Not x)   = not $ del x;
del (Star _)  = True

-- The derivative of a regular language A with respect to a character
-- c is dA/dc = { s : cs \in A } 
diff :: Char -> Regexp -> Regexp
diff _ (Zero)  = zero
diff c (Match d) | (c == d) = one
diff c (Match d) = zero
diff c (Sum x) = oneOf $ (map $ diff c) (Set.toList x)
diff c (Prod []) = zero
diff c (Prod (x:xs)) | del x = (diff c x <*> xs) <+> diff c (Prod xs)
diff c (Prod (x:xs)) = diff c x <*> xs
diff c (Not x) = Not (diff c x)
diff c (Star x) = diff c x <*> Star x

flattenSet :: (Ord a) => Set (Set a) -> Set a
flattenSet = Set.fold Set.union Set.empty

(/>>=) :: (Ord a, Ord b) => Set a -> (a -> Set b) -> Set b
x />>= f = flattenSet (Set.map f x)

-- The alphabet of all characters that appear in this regexp
alphabet :: Regexp -> Set Char
alphabet (Zero) = Set.empty
alphabet (Sum x) = flattenSet (Set.map alphabet x) 
alphabet (Prod x) = Set.unions $ map alphabet x
alphabet (Not x) = alphabet x
alphabet (Star x) = alphabet x
alphabet (Match c) = Set.singleton c

-- Set of all derivatives of a regular expression (including itself, and higher order derivatives).
derivatives :: Regexp -> [Regexp]
derivatives exp = Set.toList $ enlarge (Set.singleton exp) (Set.singleton exp) 
  where
    alpha = alphabet exp
    firstDerivatives x = Set.map (`diff` x) alpha 
    enlarge :: Set Regexp -> Set Regexp -> Set Regexp
    enlarge new found = 
      if Set.null new
        then found
        else
          let nextNew   = (new />>= firstDerivatives) Set.\\ found
              nextFound = found `Set.union` nextNew
          in enlarge nextNew nextFound

-- A simple finite state machine type 
data FSM = State { transitions :: (Map Char FSM), isFinal :: Bool } 

-- Converts a Regexp into a finite state machine by using the derivatives
-- with respect to specific characters as the transitions. Essentially at 
-- each stage we build up a regular expression that the remaining characters
-- have to match. Due to Cunning Mathematics, only finitely many such regular
-- expressions (up to similarity) result.
compile :: Regexp -> FSM
compile x = fromJust $ Map.lookup x states
  where
    states :: Map Regexp FSM
    states = Map.fromList $
      do re <- derivatives x -- Totally gratuitious use of list monad. :)
         let trans = do c <- Set.toList $ alphabet re
                        let d = diff c re
                        return (c, fromJust $ Map.lookup d states)
         let state = State (Map.fromList trans) (del re) 
         return (re, state) 

runFSM :: FSM -> String -> Bool
runFSM x []     = isFinal x
runFSM x (c:cs) = case (Map.lookup c $ transitions x) of
                    Nothing -> False
                    Just y  -> runFSM y cs
               
matches :: (Match a) => String -> a -> Bool
matches cs exp = runFSM (compile $  match exp) cs

Simple Haskell script for word counting

This is just a simple piece of code I put together to play with some Haskell when I realised I've not been writing nearly enough of the stuff.

It reads text from stdin and prints the words it finds together with how many times each one occurred.

module Main
where

import List
import Control.Arrow

type Comparator a = (a -> a -> Ordering)

ascending :: (Ord a) => (b -> a) -> Comparator b
ascending f x y = compare (f x) (f y)

descending :: (Ord a) => (b -> a) -> Comparator b
descending = flip . ascending

secondary :: Comparator a -> Comparator a -> Comparator a
secondary f g x y = case f x y of {
                    EQ -> g x y;
                    z  -> z; }

-- Returns a list of unique elements together with their frequency. Listed in decreasing order of frequency, followed by
increasing order of the elements.
count :: (Ord a) => [a] -> [(a, Int)]
count = map (head &&& length) . sortBy (descending length `secondary` ascending head) . group . sort

main :: IO ()
main = interact $ unlines . map (\(x, y) -> (take 20 $ x ++ repeat ' ')  ++ " : " ++ show y) . count . words

Putlines in Haskell

This is an illustrative example of

a) Point free style
b) How Haskell IO works.

If you don't understand Haskell IO, it might be helpful to try and unpick the definition here to see how it works. :-)

import System

main :: IO ()
main = getArgs >>= sequence_ . (map putStrLn) 

JSON Parser in Haskell

I've been having trouble writing parsers recently, and I've been meaning to get to grips with Haskell at some point, so I figured I'd write a simple JSON parser using Haskell's Parsec library. Here's the code for it:

Update: I've modified this to use Data.Map instead of a list of key value pairs for the record / object implementation. I've also removed the 'identifier' feature as it isn't really part of JSON proper. Also, I've noticed that this seems to have acquired some sort of presence on google. This isn't really very good code - it's more a demonstration of parsec than it is an actually useful parser. (I mean, it works fine, and it's probably sufficient for trivial uses, but I wouldn't e.g. guarantee it to be bug free). I strongly recommend using this one instead.

import Text.ParserCombinators.Parsec
import System
import qualified Data.Map as Map

-- Main method. Currently not very interesting - just a test piece of code which accepts
-- a file and prints out a representation of the parsed type (or an error message if it is
-- invalid.
mainParser = do {
    val <- valueParser
    ; skipMany space
    ; eof
    ; return val }

main :: IO ()
main = do {
    args <- getArgs 
  ; val <- parseFromFile mainParser $ args !! 0 
  ; print(val) } 

-- Matches string literals. 
literalString :: Parser JSON 
literalString = do {
        char '"'
      ;  val <-  many1 letter
      ;  char '"'
      ; return $ LiteralString val}


-- Data type representing a JSON AST. Roughly corresponds to a Javascript object.
data JSON = ListValue [JSON] 
          | LiteralString String
          | LiteralInt Integer
          | LiteralBoolean Bool
          | RecordValue (Map.Map String JSON)
            deriving Show         


-- Combined parser.
valueParser :: Parser JSON
valueParser =       
        literalString
    <|> literalInt
    <|> literalBoolean
    <|> recordParser
    <|> listParser 

-- Matches literal integers.
literalInt :: Parser JSON
literalInt = do {
    ; val <- many1 digit
    ; return $ LiteralInt (read val)
        }

-- Matches boolean literals
literalBoolean :: Parser JSON
literalBoolean =
                do{ 
                  string "true"
                ; return $ LiteralBoolean True}
            <|> do{
                  string "false"
                ; return $ LiteralBoolean False}

-- Code for parsing lists.
-- Matches comma separated lists enclosed in [ ]
listParser :: Parser JSON 
listParser = do{ 
                char '['
              ; words <- sepBy1 valueParser listSeparator
              ; char ']'
              ; return $ ListValue words
              }

-- Matches ',' with any amount of space on either side.
listSeparator :: Parser ()
listSeparator = do{ 
      skipMany space 
    ; char ','
    ; skipMany space
}

-- Code for parsing records.
-- Matches { word : JSON; word : JSON; word : value; ... }
recordParser :: Parser JSON 
recordParser = do{
      char '{'
    ; defs <- endBy definitionParser definitionSeparator
    ; char '}'
    ; return $ RecordValue $ Map.fromList defs 
}

-- Matches things of the form word : JSON
definitionParser :: Parser (String, JSON)
definitionParser = do{
      skipMany space
    ; key <- many1 letter 
    ; skipMany space
    ; char ':'
    ; skipMany space
    ; val <- valueParser
    ; return (key, val)
}

-- Matches ';' with any amount of space on either side.
definitionSeparator :: Parser ()
definitionSeparator = do {
      skipMany space
    ; char ';'
    ; skipMany space
    ; return () 
}


&lt;- and let .. = confusion

// description of your code here

module Main
   where

import Random


main = do
   -- Either of these work
   --rnum <- oneRandNum
   let rnum = oneRandNum

   -- But only let works here. Why?
   --numbers <- randArray
   let numbers = randArray

   -- Required for successful compilation.
   print "foo"


oneRandNum :: IO Int
oneRandNum = getStdRandom( randomR( 0, 9 ) )

randArray :: [IO Int]
randArray = [oneRandNum]
« Newer Snippets
Older Snippets »
Showing 1-5 of 5 total  RSS