commit dc2920aa8f747ace426c926abc4c8c2bc631cf56 Author: rnhmjoj Date: Fri Oct 9 15:36:53 2015 +0200 Initial commit diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fbc8b8f --- /dev/null +++ b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2015 rnhmjoj + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Main.hs b/Main.hs new file mode 100755 index 0000000..6123096 --- /dev/null +++ b/Main.hs @@ -0,0 +1,95 @@ +{-# LANGUAGE OverloadedStrings, RecordWildCards, ScopedTypeVariables #-} + +import Control.Monad (when, forM_) +import Control.Exception (try) +import Data.Text (Text, unpack) +import qualified Data.Text.IO as T +import Data.Monoid +import Data.ByteString.Lazy (hPut) +import System.IO +import System.IO.Temp (withSystemTempFile) +import System.Environment (getEnv, lookupEnv) +import Network.HTTP.Conduit (HttpException, simpleHttp) + +import Telegram +import Parser + +url :: String +url = "http://oglaf.com" + + +commands :: String -> Comic -> [Command] +commands path Comic{..} = + [ if page == 1 + then message user title + else nothing + , sendFile user path + , message user descr ] + + +cache :: IO String +cache = do + path <- lookupEnv "XDG_CACHE_HOME" + case path of + Just x -> return (x ++ "/oglaf") + Nothing -> (++ "/.cache/oglaf") <$> getEnv "HOME" + + +lastComic :: IO Text +lastComic = do + res <- try (T.readFile =<< cache) + case res of + Left (err :: IOError) -> do + putStrLn ("Can't read saved state " ++ show err) + putStrLn "Send anyway" + return "" + Right title -> return title + + +setLastComic :: Text -> IO () +setLastComic title = cache >>= flip T.writeFile title + + +findPages :: Text -> IO [Comic] +findPages = findPages' 1 where + findPages' n link = do + res <- try $ simpleHttp (url <> page) + case res of + Left (_ :: HttpException) -> return [] + Right cur -> (parseComic cur n :) <$> findPages' (succ n) link + where + page = unpack link <> show n + + +main :: IO () +main = do + comics <- parseArchive <$> simpleHttp (url <> "/archive") + when (null comics) (fail "error parsing comics archive") + + let latest = head comics + last <- lastComic + + if latest == last + then T.putStrLn ("Already sent " <> last <> ", stopping") + else do + pages <- findPages latest + process <- telegramProcess + socket <- telegramSocket + + send socket connect + + forM_ pages $ \comic -> + withSystemTempFile "oglaf.png" $ \path temp -> do + simpleHttp (imgUrl comic) >>= hPut temp + hClose temp + result <- try $ mapM (send socket) (commands path comic) + + case result of + Left (err :: IOError) -> do + putStrLn "failed" >> print err + fail "Could not send the comic :(" + Right _ -> do + setLastComic latest + putStrLn ("Sent " <> show (page comic)) + + closeTelegram socket process diff --git a/Parser.hs b/Parser.hs new file mode 100644 index 0000000..bc57f89 --- /dev/null +++ b/Parser.hs @@ -0,0 +1,54 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Parser where + +import Data.Monoid +import Data.ByteString.Lazy (ByteString) +import qualified Data.Text as T +import Data.Text (Text, unpack) +import Text.XML.Cursor +import Text.HTML.DOM (parseLBS) +import Text.HTML.TagSoup.Entity (lookupEntity) + +data Comic = Comic + { title :: String + , descr :: String + , imgUrl :: String + , page :: Int + } + + +parseArchive :: ByteString -> [Text] +parseArchive str = mkCursor str $// selectLinks &| extractUrl + where + selectLinks = element "a" &/ element "img" >=> parent + extractUrl = T.concat . attribute "href" + + +parseComic :: ByteString -> Int -> Comic +parseComic str n = Comic + { title = decodeHtml $ unpack $ head $ cur $// selectTitle &| extractContent + , descr = decodeHtml $ unpack $ head $ cur $// selectImg &| extractDescr + , imgUrl = unpack $ head $ cur $// selectImg &| extractUrl + , page = n + } where + selectTitle = element "title" >=> child + selectImg = element "img" >=> attributeIs "id" "strip" + extractUrl = T.concat . attribute "src" + extractDescr = T.concat . attribute "title" + extractContent = T.concat . content + cur = mkCursor str + + +mkCursor :: ByteString -> Cursor +mkCursor = fromDocument . parseLBS + + +decodeHtml :: String -> String +decodeHtml [] = [] +decodeHtml ('&' : xs) = + let (b, a) = break (== ';') xs in + case (lookupEntity b, a) of + (Just c, ';' : as) -> c <> decodeHtml as + _ -> '&' : decodeHtml xs +decodeHtml (x : xs) = x : decodeHtml xs diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/Telegram.hs b/Telegram.hs new file mode 100644 index 0000000..53ed487 --- /dev/null +++ b/Telegram.hs @@ -0,0 +1,76 @@ +module Telegram where + +import Network +import System.IO +import System.Process +import System.Process.Internals +import System.Posix.Signals (signalProcess, sigKILL) +import Data.List.Utils (replace) +import Control.Monad (when, replicateM_) +import Control.Retry + +-- Parameters +user = "Oglaf" +program = "telegram-cli" +args = ["-d", "-P", show port] +(host, port) = ("localhost", 2391) + + +telegramProcess :: IO ProcessHandle +telegramProcess = do + (_,_,_, handle) <- createProcess (proc program args) + return handle + + +telegramSocket :: IO Handle +telegramSocket = recoverAll timeout connect + where connect = connectTo host (PortNumber port) + timeout = fibonacciBackoff 100000 <> limitRetries 5 + + +closeTelegram :: Handle -> ProcessHandle -> IO () +closeTelegram socket handle = + hClose socket >> getPid handle >>= signalProcess sigKILL + where + go (OpenHandle h) = h + getPid h = withProcessHandle h (return . go) + + +send :: Handle -> String -> IO () +send handle command = do + hPutStrLn handle command + putStr (command ++ " -> ") + + -- wait for a reply + reply <- hWaitForInput handle 10000 + when (not reply) (fail "No reply from telegram") + + -- read the whole response + size <- read . last . words <$> hGetLine handle + replicateM_ (size+1) (hGetChar handle) + putStrLn "ok" + + +-- Commands +type Name = String +type Arg = String +type Command = String + + +mkCommand :: Name -> Arg -> Arg -> Command +mkCommand name a b = + name ++ " " ++ escape a ++ " '" ++ escape b ++ "'" + where escape = replace "\'" "\\\'" + + +connect :: Command +connect = "dialog_list" + +sendFile :: Arg -> Arg -> Command +sendFile = mkCommand "send_file" + +message :: Arg -> Arg -> Command +message = mkCommand "msg" + +nothing :: Command +nothing = "help" \ No newline at end of file diff --git a/oglaf.cabal b/oglaf.cabal new file mode 100644 index 0000000..346f847 --- /dev/null +++ b/oglaf.cabal @@ -0,0 +1,20 @@ +name: oglaf +version: 0.1.0.0 +synopsis: Script that fetches the latest oglaf and sends it over to me via telegram +license: MIT +license-file: LICENSE +author: rnhmjoj +maintainer: micheleguerinirocco@me.com +copyright: copyright (C) Michele Guerini Rocco +category: Network +build-type: Simple +cabal-version: >=1.10 + +executable oglaf + main-is: Main.hs + build-depends: base >=4.8 && <4.9, network, + bytestring, text, retry, tagsoup, + MissingH, process, unix, temporary, + http-conduit, xml-conduit, html-conduit + + default-language: Haskell2010