summaryrefslogtreecommitdiffstats
path: root/news.hs
blob: 7432391b70e5dba29f006ef496bac338a4e46eca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
{-# LANGUAGE OverloadedStrings #-}
import Data.Monoid
import Prelude hiding (elem)
import Network.Wreq
import Control.Lens
import Text.Feed.Import
import qualified Text.Atom.Feed as Atom
import qualified Data.ByteString.Lazy.Char8 as LBS8
import qualified Data.ByteString.Char8 as BS8
import qualified Data.ByteString as BS
import Text.RSS.Syntax
import qualified Text.Feed.Types as Feed
import qualified Data.BloomFilter as Bloom
import qualified Data.BloomFilter.Easy as Bloom
import qualified Data.BloomFilter.Hash as Bloom
import Control.Concurrent
import System.Environment
import System.IO
import Control.Exception


data Item = Item
    { ni_title :: BS.ByteString
    , ni_link :: BS.ByteString
    }
  deriving Show


atomEntryToItems :: Atom.Entry -> [Item]
atomEntryToItems entry =
    map (Item title) links
  where
    title = BS8.pack $ Atom.txtToString (Atom.entryTitle entry)
    links = map (BS8.pack . Atom.linkHref) (Atom.entryLinks entry)

rssItemToItems :: RSSItem -> [Item]
rssItemToItems item =
    map (Item title) links
  where
    title = maybe "untitled" BS8.pack (rssItemTitle item)
    links = maybe [] ((:[]) . BS8.pack) (rssItemLink item)


main :: IO ()
main = do
    hSetBuffering stdout LineBuffering
    [url, delayString] <- getArgs
    let delay = read delayString :: Int
    let bloom0 = Bloom.fromList (Bloom.cheapHashes 17) (2^10 * 1000) [""]
    rec url delay bloom0
    return ()
  where
    rec url delay bloom =
      ( do
        r <- get url
        let f = parseFeedString $ LBS8.unpack $ r ^. responseBody
            items =
              case f of
                Just (Feed.RSSFeed rss) ->
                    concatMap rssItemToItems (rssItems (rssChannel rss))
                Just (Feed.AtomFeed atom) ->
                    concatMap atomEntryToItems (Atom.feedEntries atom)
                _ -> []

            bloom' = Bloom.insertList (map ni_link items) bloom
            newLinkItems = filter (\item ->
                    let link = ni_link item in
                    link `Bloom.notElem` bloom
                ) items
        mapM_ BS8.putStrLn (map (\item ->
                let title = ni_title item
                    link = ni_link item
                in title <> " " <> link
            ) newLinkItems)
        threadDelay (delay * 10^6)
        rec url delay bloom'
      )
        `catch` \e -> hPutStrLn stderr ("Caught " ++ show (e :: SomeException)) >> rec url delay bloom