bisc/Main.hs

308 lines
9.8 KiB
Haskell

{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE FlexibleContexts #-}
import Data.List (nub, foldl')
import Data.Maybe (mapMaybe)
import Data.Function ((&))
import Data.Default (def)
import Data.Text.Encoding (decodeUtf8)
import Control.Monad (mapM_, when, (>=>))
import Control.Monad.Reader (ReaderT, runReaderT, asks)
import Control.Monad.Except (ExceptT, runExceptT, throwError)
import System.FilePath (joinPath, takeBaseName, (</>))
import Database.Selda (Text, liftIO, (.||), (!))
import Database.Selda.SQLite (withSQLite)
import qualified Database.Selda as S
import qualified Database.LevelDB as L
import qualified Database.LevelDB.Streaming as LS
import qualified System.Directory as D
import qualified Data.Configurator as C
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.ByteString as B
import Debug.Trace
-- | Bisc settings
data Settings = Settings
{ whitelistPath :: FilePath -- ^ whitelist file
, webenginePath :: FilePath -- ^ webengine data directory
, whitelist :: [Text] -- ^ whitelisted domains
}
-- SQL records
-- | Just a cookie
data Cookie = Cookie
{ host_key :: Text -- ^ cookie domain
, creation_utc :: Int -- ^ creation date
} deriving (S.Generic, Show)
-- | The origin (domain) of a quota
data QuotaOrigin = QuotaOrigin
{ origin :: Text -- ^ URL
, last_modified_time :: Int -- ^ creation date
} deriving (S.Generic, Show)
instance S.SqlRow Cookie
instance S.SqlRow QuotaOrigin
-- SQL tables
-- | Cookies table
cookies :: S.Table Cookie
cookies = S.table "cookies" []
-- | QuotaManager origins table
quotaOrigins :: S.Table QuotaOrigin
quotaOrigins = S.table "OriginInfoTable" []
-- | Main monad stack
--
-- * 'ReaderT' for accessing settings
-- * 'ExceptT' for custom errors
type Action = ReaderT Settings (ExceptT Text IO)
-- | Number of removed domains, list of domains
type Result = (Int, [Text])
-- * Main
-- | Clears all means of permanent storage
main :: IO ()
main = do
config <- D.getXdgDirectory D.XdgConfig ("bisc" </> "bisc.conf")
run <- runAction <$> loadSettings config
run "Cookies" deleteCookies
run "QuotaManager" deleteQuotaOrigins
run "IndexedDB" deleteIndexedDB
run "LocalStorage" deleteLocalStorage
run "SessionStorage" deleteSessionStorage
-- | Runs an 'Action' and pretty-prints the results
runAction :: Settings -> Text -> Action Result -> IO ()
runAction settings name x = do
a <- runExceptT (runReaderT x settings)
case a of
Left err -> T.putStrLn (name <> " cleaning failed: " <> err)
Right res -> printResult res
where
printResult (n, bad)
| n > 0 = do
T.putStrLn (name <> ": deleted " <> T.pack (show n) <> " entries for:")
T.putStrLn (T.unlines $ map (" * " <>) bad)
| otherwise = T.putStrLn (name <> ": nothing to delete.")
-- * Cleaning actions
-- | Deletes records in the Cookies database
deleteCookies :: Action Result
deleteCookies = do
database <- (</> "Cookies") <$> asks webenginePath
whitelist <- map S.text <$> asks whitelist
withSQLite database $ do
bad <- S.query $ do
cookie <- S.select cookies
S.restrict (by whitelist cookie)
return (cookie ! #host_key)
n <- S.deleteFrom cookies (by whitelist)
return (n, nub bad)
where
by set x = S.not_ (x ! #host_key `S.isIn` set)
-- | Deletes records in the QuotaManager API database
deleteQuotaOrigins :: Action Result
deleteQuotaOrigins = do
database <- (</> "QuotaManager") <$> asks webenginePath
whitelist <- map pattern <$> asks whitelist
withSQLite database $ do
bad <- S.query $ do
quota <- S.select quotaOrigins
S.restrict (by whitelist quota)
return (quota ! #origin)
n <- S.deleteFrom quotaOrigins (by whitelist)
return (n, nub bad)
where
-- check if x ∉ set
by set x = S.not_ . any_ . map (S.like (x ! #origin)) $ set
-- turns domains into patterns to match a url
pattern domain = S.text ("http%://%" <> domain <> "/")
any_ = foldl' (.||) S.false
-- | Deletes per-domain files under the IndexedDB directory
--
-- For example:
--
-- https_example.com_0.indexeddb.leveldb
-- https_www.example.com_0.indexeddb.leveldb
--
deleteIndexedDB :: Action Result
deleteIndexedDB = do
webengine <- asks webenginePath
unlisted <- (\domains -> not . (`elem` domains)) <$> asks whitelist
entries <- liftIO $ listDirectoryAbs (webengine </> "IndexedDB")
let
badFiles = filterMaybe (fmap unlisted . domain) entries
badDomains = mapMaybe domain badFiles
liftIO $ mapM_ D.removePathForcibly badFiles
return (length badFiles, nub badDomains)
where
listDirectoryAbs :: FilePath -> IO [FilePath]
listDirectoryAbs dir = map (dir </>) <$> D.listDirectory dir
maybeToBool :: Maybe Bool -> Bool
maybeToBool Nothing = False
maybeToBool (Just x) = x
filterMaybe :: (a -> Maybe Bool) -> [a] -> [a]
filterMaybe f = filter (maybeToBool . f)
domain :: FilePath -> Maybe Text
domain = extract . url where
extract [] = Nothing
extract (x:[]) = Nothing
extract (x:xs) = Just $ T.unwords (init xs)
url = T.splitOn "_" . T.pack . takeBaseName
-- | Deletes records from the local storage levelDB database
--
-- The schema consists of two (or more) records for each url:
--
-- "META:<url>" which stores metadata
-- "_<url>\NUL\SOH<key>" which stores the actual data
--
-- See https://source.chromium.org/chromium/chromium/src/+/master:components/services/storage/dom_storage/local_storage_impl.cc;l=51
--
deleteLocalStorage :: Action Result
deleteLocalStorage = do
webengine <- asks webenginePath
whitelist <- asks whitelist
let path = webengine </> "Local Storage" </> "leveldb"
version <- withDB path (\db -> L.get db def "VERSION")
when (version /= Just "1") (throwError "Unsupported schema version")
withDB path $ \db ->
L.withIterator db def $ \iter -> do
L.iterFirst iter
scanKeys db (by whitelist) iter
where
-- extract domains from the keys
domain = snd . T.breakOnEnd "://" . decodeUtf8
metaDomain = domain . B.drop 5
recDomain = domain . head . B.split 0 . B.drop 1
-- scan the database and delete keys from unlisted domain
scanKeys db checker i = go 0 [] where
go n domains = do
mkey <- L.iterKey i
case mkey of
-- end of database
Nothing -> return (n, domains)
Just key -> do
let (bad, origin) = checker key
let m = if bad then n+1 else n
when bad (L.delete db def key)
L.iterNext i
go m (maybe domains (:domains) origin)
-- check if unlisted and return the domain if a meta record
by whitelist key
| "META:" `B.isPrefixOf` key
&& not (metaDomain key `elem` whitelist) = (True, Just (metaDomain key))
| "_" `B.isPrefixOf` key
&& "\NUL\SOH" `B.isInfixOf` key
&& not (recDomain key `elem` whitelist) = (True, Nothing)
| otherwise = (False, Nothing)
-- | Deletes records from the session storage levelDB database
--
-- The schema consists of a map `url -> id` and records under `id`:
--
-- namespace-<session-uid>-<url> = <id>
-- map-<id>-<key> = <value>
--
-- See https://source.chromium.org/chromium/chromium/src/+/master:components/services/storage/dom_storage/session_storage_metadata.cc;l=21
--
deleteSessionStorage :: Action Result
deleteSessionStorage = do
webengine <- asks webenginePath
whitelist <- asks whitelist
let path = webengine </> "Session Storage"
version <- withDB path (\db -> L.get db def "version")
when (version /= Just "1") (throwError "Unsupported schema version")
withDB path $ \db -> do
-- map of id -> isBad
badMap <- L.withIterator db def $ \i ->
LS.keySlice i LS.AllKeys LS.Asc
& LS.filter (B.isPrefixOf "namespace")
& LS.mapM (\k -> (,) <$> L.get db def k <*> pure (isBad whitelist k))
& LS.toList
-- delete the unlisted domains map
badDomains <- L.withIterator db def $ \i ->
LS.keySlice i LS.AllKeys LS.Asc
& LS.filter (B.isPrefixOf "namespace")
& LS.filter (isBad whitelist)
& LS.mapM (\k -> L.delete db def k >> return (domain k))
& LS.toList
-- and their records
n <- L.withIterator db def $ \i ->
LS.keySlice i LS.AllKeys LS.Asc
& LS.filter (B.isPrefixOf "map-")
& LS.mapM (\k ->
case lookup (originId k) badMap of
Just True -> L.delete db def k >> return 1
_ -> return 0)
& LS.sum
return (n, nub badDomains)
where
isBad whitelist = not . flip elem whitelist . domain
-- extract domain from keys (47 = length "namespace-<uid>-")
url = decodeUtf8 . B.drop 47
domain = (!! 2). T.splitOn "/" . url
-- extract id from key: drop "map-", take until "-" (ascii 45)
originId = Just . B.takeWhile (/= 45). B.drop 4
-- * Helper functions
-- | Loads a leveldb database and runs a resourceT action
--
-- withDB :: FilePath -> (L.DB -> ResourceT IO a) -> Action a
withDB path f = liftIO $ L.runResourceT (L.open path def >>= f)
-- | Loads the config from a file
loadSettings :: FilePath -> IO Settings
loadSettings path = do
configdir <- D.getXdgDirectory D.XdgConfig "qutebrowser"
datadir <- D.getXdgDirectory D.XdgData "qutebrowser"
let
defaultWhitelist = joinPath [configdir, "whitelists", "cookies"]
defaultWebengine = joinPath [datadir, "webengine"]
config <- C.load [C.Optional path]
whitelist <- C.lookupDefault defaultWhitelist config "whitelist-path"
webengine <- C.lookupDefault defaultWebengine config "webengine-path"
domains <- T.lines <$> T.readFile whitelist
return (Settings whitelist webengine domains)