{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedLabels #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE MultiWayIf #-} -- Databases import Database.Selda (Text, liftIO, (!)) import Database.Selda.SQLite (withSQLite) import qualified Database.Selda as S import qualified Database.LevelDB as L import qualified Database.LevelDB.Streaming as LS -- Error handling import Control.Exception as BE import Control.Monad.Catch as CE import qualified System.Exit as E -- Configuration import qualified Options.Applicative as O import qualified System.Directory as D import qualified Data.Configurator as C -- Text converion import Data.Text.Encoding (decodeUtf8) import qualified Data.Text as T import qualified Data.Text.IO as T import qualified Data.ByteString as B -- Version information import qualified Paths_bisc as Bisc import Data.Version (showVersion) -- File locking bypass import qualified System.Posix.Files as Posix -- Misc import Data.List (nub, isInfixOf) import Data.Maybe (mapMaybe) import Data.Function ((&)) import Data.Default (def) import Control.Monad (when) import Control.Monad.Reader (ReaderT, runReaderT, asks) import Control.Monad.Except (ExceptT, runExceptT, throwError) import System.FilePath (joinPath, takeBaseName, ()) -- Options -- | Configuration file settings data Settings = Settings { webenginePath :: FilePath -- ^ webengine data directory , whitelist :: [Text] -- ^ whitelisted domains , options :: Options -- ^ cli options } -- | Command line options data Options = Options { version :: Bool -- ^ print version number , dryRun :: Bool -- ^ don't delete anything , unsafe :: Bool -- ^ ignore locks , configPath :: FilePath -- ^ config file path } -- | Command line parser cliParser :: FilePath -> O.ParserInfo Options cliParser defConfig = O.info (O.helper <*> parser) infos where parser = Options <$> O.switch ( O.long "version" <> O.short 'v' <> O.help "Print the version number and exit" ) <*> O.switch ( O.long "dry-run" <> O.short 'n' <> O.help ("Don't actually remove anything, "<> "just show what would be done") ) <*> O.switch ( O.long "unsafe" <> O.short 'u' <> O.help ("Ignore database locks. " <> "This will probably corrupt the databases, but " <> "works while the browser is running.") ) <*> O.strOption ( O.long "config" <> O.short 'c' <> O.value defConfig <> O.help "Specify a configuration file" ) infos = O.fullDesc <> O.progDesc "A small tool that clears cookies (and more)" -- SQL records -- | Just a cookie data Cookie = Cookie { host_key :: Text -- ^ cookie domain , creation_utc :: Int -- ^ creation date } deriving (S.Generic, Show) -- | The origin (domain) of a quota data QuotaOrigin = QuotaOrigin { origin :: Text -- ^ URL , last_modified_time :: Int -- ^ creation date } deriving (S.Generic, Show) instance S.SqlRow Cookie instance S.SqlRow QuotaOrigin -- SQL tables -- | Cookies table cookies :: S.Table Cookie cookies = S.table "cookies" [] -- | QuotaManager origins table quotaOrigins :: S.Table QuotaOrigin quotaOrigins = S.table "OriginInfoTable" [] -- | Main monad stack -- -- * 'ReaderT' for accessing settings -- * 'ExceptT' for custom errors type Action = ReaderT Settings (ExceptT Text IO) -- | Number of removed domains, list of domains type Result = (Int, [Text]) -- * Main -- | Clears all means of permanent storage main :: IO () main = do defConfig <- D.getXdgDirectory D.XdgConfig ("bisc" "bisc.conf") opts <- O.execParser (cliParser defConfig) when (version opts) $ do putStrLn ("bisc " <> showVersion Bisc.version) E.exitSuccess run <- runAction <$> loadSettings opts numFailures <- sum <$> mapM (uncurry run) actions if numFailures == 0 then E.exitSuccess else do putStrLn ("\nwarning: " <> show numFailures <> " actions have failed") E.exitWith (E.ExitFailure numFailures) -- | Runs an 'Action' and pretty-prints the results runAction :: Settings -> Text -> Action Result -> IO Int runAction settings name x = do a <- BE.try $ runExceptT (runReaderT x settings) case a of Right (Right res) -> printResult res >> return 0 Right (Left msg) -> printFailed msg >> return 1 Left (err :: BE.IOException) -> printFailed (T.pack $ BE.displayException err) >> return 1 where printFailed msg = T.putStrLn ("- " <> name <> " cleaning failed:\n " <> msg) printResult (n, bad) | n > 0 = do T.putStrLn ("- " <> name <> ": " <> verb <> " " <> T.pack (show n) <> " entries for:") T.putStrLn (T.unlines $ map (" * " <>) bad) | otherwise = T.putStrLn ("- " <> name <> ": nothing to delete") verb = if (dryRun . options $ settings) then "would delete" else "deleted" -- * Cleaning actions -- | List of actions and their names actions :: [(Text, Action Result)] actions = [ ("Cookies", deleteCookies) , ("QuotaManager", deleteQuotaOrigins) , ("IndexedDB", deleteIndexedDB) , ("LocalStorage", deleteLocalStorage) , ("SessionStorage", deleteSessionStorage) ] -- | Deletes records in the Cookies database deleteCookies :: Action Result deleteCookies = do dir <- asks webenginePath dry <- asks (dryRun . options) unsafe <- asks (unsafe . options) let database = dir "Cookies" context = if unsafe then bypassLocks "Cookies" else ($ database) exists <- liftIO $ D.doesFileExist database when (not exists) (throwError "database is missing") whitelist <- map S.text <$> asks whitelist context $ \database -> do CE.handle dbErrors $ withSQLite database $ do bad <- S.query $ do cookie <- S.select cookies S.restrict (by whitelist cookie) return (cookie ! #host_key) when (not dry) $ S.deleteFrom_ cookies (by whitelist) return (length bad, nub bad) where by set x = S.not_ (x ! #host_key `S.isIn` set) -- | Deletes records in the QuotaManager API database deleteQuotaOrigins :: Action Result deleteQuotaOrigins = do dir <- asks webenginePath dry <- asks (dryRun . options) unsafe <- asks (unsafe . options) let database = dir "QuotaManager" context = if unsafe then bypassLocks "QuotaManager" else ($ database) exists <- liftIO $ D.doesFileExist database when (not exists) (throwError "database is missing") whitelist <- map pattern <$> asks whitelist context $ \database -> do CE.handle dbErrors $ withSQLite database $ do bad <- S.query $ do quota <- S.select quotaOrigins S.restrict (by whitelist quota) return (quota ! #origin) when (not dry) $ S.deleteFrom_ quotaOrigins (by whitelist) return (length bad, nub bad) where -- check if quota is not whitelisted by whitelist quota = S.not_ (S.true `S.isIn` matches) where url = quota ! #origin matches = do pattern <- S.selectValues (map S.Only whitelist) S.restrict (url `S.like` S.the pattern) return S.true -- turns domains into patterns to match a url pattern domain = "http%://%" <> domain <> "/" -- | Deletes per-domain files under the IndexedDB directory -- -- For example: -- -- https_example.com_0.indexeddb.leveldb -- https_www.example.com_0.indexeddb.leveldb -- deleteIndexedDB :: Action Result deleteIndexedDB = do webengine <- asks webenginePath dry <- asks (dryRun . options) exists <- liftIO $ D.doesDirectoryExist (webengine "IndexedDB") when (not exists) $ throwError "directory is missing" entries <- listDirectoryAbs (webengine "IndexedDB") unlisted <- (\domains -> not . (`elem` domains)) <$> asks whitelist let badFiles = filterMaybe (fmap unlisted . domain) entries badDomains = mapMaybe domain badFiles when (not dry) $ liftIO $ mapM_ D.removePathForcibly badFiles return (length badFiles, nub badDomains) where listDirectoryAbs :: FilePath -> Action [FilePath] listDirectoryAbs dir = liftIO $ map (dir ) <$> D.listDirectory dir maybeToBool :: Maybe Bool -> Bool maybeToBool Nothing = False maybeToBool (Just x) = x filterMaybe :: (a -> Maybe Bool) -> [a] -> [a] filterMaybe f = filter (maybeToBool . f) domain :: FilePath -> Maybe Text domain = extract . url where extract [] = Nothing extract (_:[]) = Nothing extract (_:xs) = Just $ T.unwords (init xs) url = T.splitOn "_" . T.pack . takeBaseName -- | Deletes records from the local storage levelDB database -- -- The schema consists of two (or more) records for each url: -- -- "META:" which stores metadata -- "_\NUL\SOH" which stores the actual data -- -- See https://source.chromium.org/chromium/chromium/src/+/master:components/services/storage/dom_storage/local_storage_impl.cc;l=51 -- deleteLocalStorage :: Action Result deleteLocalStorage = do webengine <- asks webenginePath whitelist <- asks whitelist let path = webengine "Local Storage" "leveldb" dry <- asks (dryRun . options) unsafe <- asks (unsafe . options) when (not dry && unsafe) $ liftIO $ do -- delete and recreate the lock file to bypass POSIX locks D.removeFile (path "LOCK") T.writeFile (path "LOCK") "" dbIsOk <- liftIO $ D.doesFileExist (path "LOCK") when (not dbIsOk) (throwError "database is missing or corrupted") version <- withRetryDB path (\db -> L.get db def "VERSION") when (version /= Just "1") (throwError "database is empty or the schema unsupported") -- when dry running replace the delete function with a nop let delete = if dry then (\_ _ _ -> pure ()) else L.delete withDB path $ \db -> do badDomains <- L.withIterator db def $ \i -> LS.keySlice i LS.AllKeys LS.Asc & LS.filter (\k -> "META:" `B.isPrefixOf ` k && (metaDomain k) `notElem` whitelist) & LS.mapM (\k -> delete db def k >> return (metaDomain k)) & LS.toList n <- L.withIterator db def $ \i -> LS.keySlice i LS.AllKeys LS.Asc & LS.filter (\k -> "_" `B.isPrefixOf` k && "\NUL\SOH" `B.isInfixOf` k && (recDomain k) `notElem` whitelist) & LS.mapM (delete db def) & LS.length return (n, badDomains) where -- extract domains from the keys domain = snd . T.breakOnEnd "://" . decodeUtf8 metaDomain = domain . B.drop 5 recDomain = domain . head . B.split 0 . B.drop 1 -- | Deletes records from the session storage levelDB database -- -- The schema consists of a map `url -> id` and records under `id`: -- -- namespace-- = -- map-- = -- -- See https://source.chromium.org/chromium/chromium/src/+/master:components/services/storage/dom_storage/session_storage_metadata.cc;l=21 -- deleteSessionStorage :: Action Result deleteSessionStorage = do webengine <- asks webenginePath whitelist <- asks whitelist let path = webengine "Session Storage" dry <- asks (dryRun . options) unsafe <- asks (unsafe . options) when (not dry && unsafe) $ liftIO $ do -- delete and recreate the lock file to bypass POSIX locks D.removeFile (path "LOCK") T.writeFile (path "LOCK") "" dbIsOk <- liftIO $ D.doesFileExist (path "LOCK") when (not dbIsOk) (throwError "database is missing or corrupted") version <- withRetryDB path (\db -> L.get db def "version") when (version /= Just "1") (throwError "database is empty or the schema unsupported") -- when dry running replace the delete function with a nop let delete = if dry then (\_ _ _ -> pure ()) else L.delete withDB path $ \db -> do -- map of id -> isBad badMap <- L.withIterator db def $ \i -> LS.keySlice i LS.AllKeys LS.Asc & LS.filter (B.isPrefixOf "namespace") & LS.mapM (\k -> (,) <$> L.get db def k <*> pure (isBad whitelist k)) & LS.toList -- delete the unlisted domains map badDomains <- L.withIterator db def $ \i -> LS.keySlice i LS.AllKeys LS.Asc & LS.filter (B.isPrefixOf "namespace") & LS.filter (isBad whitelist) & LS.mapM (\k -> delete db def k >> return (domain k)) & LS.toList -- and their records n <- L.withIterator db def $ \i -> LS.keySlice i LS.AllKeys LS.Asc & LS.filter (B.isPrefixOf "map-") & LS.mapM (\k -> case lookup (originId k) badMap of Just True -> delete db def k >> return 1 _ -> return 0) & LS.sum return (n, nub badDomains) where isBad whitelist = not . flip elem whitelist . domain -- extract domain from keys (47 = length "namespace--") url = decodeUtf8 . B.drop 47 domain = (!! 2). T.splitOn "/" . url -- extract id from key: drop "map-", take until "-" (ascii 45) originId = Just . B.takeWhile (/= 45). B.drop 4 -- * Helper functions -- | Loads a leveldb database and runs a resourceT action -- -- withDB :: FilePath -> (L.DB -> ResourceT IO a) -> Action a withDB path f = liftIO $ L.runResourceT (L.open path def >>= f) -- | Like 'withDB' but retry the action after repairing the db -- -- withRetryDB :: FilePath -> (L.DB -> ResourceT IO a) -> Action a withRetryDB path action = do res <- CE.try (withDB path action) case res of Right b -> return b Left (e :: BE.IOException) -> if | "Corruption" `T.isInfixOf` msg -> do -- try repairing before giving up liftIO $ L.repair path def withDB path action | "unavailable" `T.isInfixOf` msg -> throwError "database is locked (in use by another process)" | otherwise -> throwError ("error opening the database:\n " <> msg) where msg = T.pack (BE.displayException e) -- | Bypass SQLite locking mechanism -- -- SQLite manages concurrent access via POSIX locks: these are tied to a -- specific file and pid. They can be bypassed by simply creating a hard -- link (pointing to the same inode), editing the link and then removing it. bypassLocks :: String -> (FilePath -> Action a) -> Action a bypassLocks dbName cont = do dir <- asks webenginePath let real = dir dbName link = real <> "-bypass" -- bypass the SQLite POSIX locks with hard links liftIO (Posix.createLink real link) res <- cont database -- remove the hard links liftIO (Posix.removeLink link) return res -- | Loads the config file/cli options loadSettings :: Options -> IO Settings loadSettings opts = do configdir <- D.getXdgDirectory D.XdgConfig "qutebrowser" datadir <- D.getXdgDirectory D.XdgData "qutebrowser" let defaultWhitelist = joinPath [configdir, "whitelists", "cookies"] defaultWebengine = joinPath [datadir, "webengine"] config <- C.load [C.Optional (configPath opts)] whitelist <- C.lookupDefault defaultWhitelist config "whitelist-path" webengine <- C.lookupDefault defaultWebengine config "webengine-path" domains <- T.lines <$> T.readFile whitelist return (Settings webengine domains opts) -- | Catches any Selda error dbErrors :: S.SeldaError -> Action a dbErrors (S.DbError msg) = throwError $ "error opening database: " <> T.pack msg dbErrors e = if "ErrorBusy" `isInfixOf` msg then throwError "database is locked (in use by another process)" else throwError $ "database operation failed: " <> T.pack msg where msg = BE.displayException e