{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedLabels #-} {-# LANGUAGE FlexibleContexts #-} import Data.List (nub, foldl') import Data.Maybe (mapMaybe) import Data.Function ((&)) import Data.Default (def) import Data.Text.Encoding (decodeUtf8) import Control.Monad (mapM_, when, (>=>)) import Control.Monad.Reader (ReaderT, runReaderT, asks) import Control.Monad.Except (ExceptT, runExceptT, throwError) import System.FilePath (joinPath, takeBaseName, ()) import Database.Selda (Text, liftIO, (.||), (!)) import Database.Selda.SQLite (withSQLite) import qualified Database.Selda as S import qualified Database.LevelDB as L import qualified Database.LevelDB.Streaming as LS import qualified System.Directory as D import qualified Data.Configurator as C import qualified Data.Text as T import qualified Data.Text.IO as T import qualified Data.ByteString as B import Debug.Trace -- | Bisc settings data Settings = Settings { whitelistPath :: FilePath -- ^ whitelist file , webenginePath :: FilePath -- ^ webengine data directory , whitelist :: [Text] -- ^ whitelisted domains } -- SQL records -- | Just a cookie data Cookie = Cookie { host_key :: Text -- ^ cookie domain , creation_utc :: Int -- ^ creation date } deriving (S.Generic, Show) -- | The origin (domain) of a quota data QuotaOrigin = QuotaOrigin { origin :: Text -- ^ URL , last_modified_time :: Int -- ^ creation date } deriving (S.Generic, Show) instance S.SqlRow Cookie instance S.SqlRow QuotaOrigin -- SQL tables -- | Cookies table cookies :: S.Table Cookie cookies = S.table "cookies" [] -- | QuotaManager origins table quotaOrigins :: S.Table QuotaOrigin quotaOrigins = S.table "OriginInfoTable" [] -- | Main monad stack -- -- * 'ReaderT' for accessing settings -- * 'ExceptT' for custom errors type Action = ReaderT Settings (ExceptT Text IO) -- | Number of removed domains, list of domains type Result = (Int, [Text]) -- * Main -- | Clears all means of permanent storage main :: IO () main = do config <- D.getXdgDirectory D.XdgConfig ("bisc" "bisc.conf") run <- runAction <$> loadSettings config run "Cookies" deleteCookies run "QuotaManager" deleteQuotaOrigins run "IndexedDB" deleteIndexedDB run "LocalStorage" deleteLocalStorage run "SessionStorage" deleteSessionStorage -- | Runs an 'Action' and pretty-prints the results runAction :: Settings -> Text -> Action Result -> IO () runAction settings name x = do a <- runExceptT (runReaderT x settings) case a of Left err -> T.putStrLn (name <> " cleaning failed: " <> err) Right res -> printResult res where printResult (n, bad) | n > 0 = do T.putStrLn (name <> ": deleted " <> T.pack (show n) <> " entries for:") T.putStrLn (T.unlines $ map (" * " <>) bad) | otherwise = T.putStrLn (name <> ": nothing to delete.") -- * Cleaning actions -- | Deletes records in the Cookies database deleteCookies :: Action Result deleteCookies = do database <- ( "Cookies") <$> asks webenginePath whitelist <- map S.text <$> asks whitelist withSQLite database $ do bad <- S.query $ do cookie <- S.select cookies S.restrict (by whitelist cookie) return (cookie ! #host_key) n <- S.deleteFrom cookies (by whitelist) return (n, nub bad) where by set x = S.not_ (x ! #host_key `S.isIn` set) -- | Deletes records in the QuotaManager API database deleteQuotaOrigins :: Action Result deleteQuotaOrigins = do database <- ( "QuotaManager") <$> asks webenginePath whitelist <- map pattern <$> asks whitelist withSQLite database $ do bad <- S.query $ do quota <- S.select quotaOrigins S.restrict (by whitelist quota) return (quota ! #origin) n <- S.deleteFrom quotaOrigins (by whitelist) return (n, nub bad) where -- check if x ∉ set by set x = S.not_ . any_ . map (S.like (x ! #origin)) $ set -- turns domains into patterns to match a url pattern domain = S.text ("http%://%" <> domain <> "/") any_ = foldl' (.||) S.false -- | Deletes per-domain files under the IndexedDB directory -- -- For example: -- -- https_example.com_0.indexeddb.leveldb -- https_www.example.com_0.indexeddb.leveldb -- deleteIndexedDB :: Action Result deleteIndexedDB = do webengine <- asks webenginePath unlisted <- (\domains -> not . (`elem` domains)) <$> asks whitelist entries <- liftIO $ listDirectoryAbs (webengine "IndexedDB") let badFiles = filterMaybe (fmap unlisted . domain) entries badDomains = mapMaybe domain badFiles liftIO $ mapM_ D.removePathForcibly badFiles return (length badFiles, nub badDomains) where listDirectoryAbs :: FilePath -> IO [FilePath] listDirectoryAbs dir = map (dir ) <$> D.listDirectory dir maybeToBool :: Maybe Bool -> Bool maybeToBool Nothing = False maybeToBool (Just x) = x filterMaybe :: (a -> Maybe Bool) -> [a] -> [a] filterMaybe f = filter (maybeToBool . f) domain :: FilePath -> Maybe Text domain = extract . url where extract [] = Nothing extract (x:[]) = Nothing extract (x:xs) = Just $ T.unwords (init xs) url = T.splitOn "_" . T.pack . takeBaseName -- | Deletes records from the local storage levelDB database -- -- The schema consists of two (or more) records for each url: -- -- "META:" which stores metadata -- "_\NUL\SOH" which stores the actual data -- -- See https://source.chromium.org/chromium/chromium/src/+/master:components/services/storage/dom_storage/local_storage_impl.cc;l=51 -- deleteLocalStorage :: Action Result deleteLocalStorage = do webengine <- asks webenginePath whitelist <- asks whitelist let path = webengine "Local Storage" "leveldb" version <- withDB path (\db -> L.get db def "VERSION") when (version /= Just "1") (throwError "Unsupported schema version") withDB path $ \db -> L.withIterator db def $ \iter -> do L.iterFirst iter scanKeys db (by whitelist) iter where -- extract domains from the keys domain = snd . T.breakOnEnd "://" . decodeUtf8 metaDomain = domain . B.drop 5 recDomain = domain . head . B.split 0 . B.drop 1 -- scan the database and delete keys from unlisted domain scanKeys db checker i = go 0 [] where go n domains = do mkey <- L.iterKey i case mkey of -- end of database Nothing -> return (n, domains) Just key -> do let (bad, origin) = checker key let m = if bad then n+1 else n when bad (L.delete db def key) L.iterNext i go m (maybe domains (:domains) origin) -- check if unlisted and return the domain if a meta record by whitelist key | "META:" `B.isPrefixOf` key && not (metaDomain key `elem` whitelist) = (True, Just (metaDomain key)) | "_" `B.isPrefixOf` key && "\NUL\SOH" `B.isInfixOf` key && not (recDomain key `elem` whitelist) = (True, Nothing) | otherwise = (False, Nothing) -- | Deletes records from the session storage levelDB database -- -- The schema consists of a map `url -> id` and records under `id`: -- -- namespace-- = -- map-- = -- -- See https://source.chromium.org/chromium/chromium/src/+/master:components/services/storage/dom_storage/session_storage_metadata.cc;l=21 -- deleteSessionStorage :: Action Result deleteSessionStorage = do webengine <- asks webenginePath whitelist <- asks whitelist let path = webengine "Session Storage" version <- withDB path (\db -> L.get db def "version") when (version /= Just "1") (throwError "Unsupported schema version") withDB path $ \db -> do -- map of id -> isBad badMap <- L.withIterator db def $ \i -> LS.keySlice i LS.AllKeys LS.Asc & LS.filter (B.isPrefixOf "namespace") & LS.mapM (\k -> (,) <$> L.get db def k <*> pure (isBad whitelist k)) & LS.toList -- delete the unlisted domains map badDomains <- L.withIterator db def $ \i -> LS.keySlice i LS.AllKeys LS.Asc & LS.filter (B.isPrefixOf "namespace") & LS.filter (isBad whitelist) & LS.mapM (\k -> L.delete db def k >> return (domain k)) & LS.toList -- and their records n <- L.withIterator db def $ \i -> LS.keySlice i LS.AllKeys LS.Asc & LS.filter (B.isPrefixOf "map-") & LS.mapM (\k -> case lookup (originId k) badMap of Just True -> L.delete db def k >> return 1 _ -> return 0) & LS.sum return (n, nub badDomains) where isBad whitelist = not . flip elem whitelist . domain -- extract domain from keys (47 = length "namespace--") url = decodeUtf8 . B.drop 47 domain = (!! 2). T.splitOn "/" . url -- extract id from key: drop "map-", take until "-" (ascii 45) originId = Just . B.takeWhile (/= 45). B.drop 4 -- * Helper functions -- | Loads a leveldb database and runs a resourceT action -- -- withDB :: FilePath -> (L.DB -> ResourceT IO a) -> Action a withDB path f = liftIO $ L.runResourceT (L.open path def >>= f) -- | Loads the config from a file loadSettings :: FilePath -> IO Settings loadSettings path = do configdir <- D.getXdgDirectory D.XdgConfig "qutebrowser" datadir <- D.getXdgDirectory D.XdgData "qutebrowser" let defaultWhitelist = joinPath [configdir, "whitelists", "cookies"] defaultWebengine = joinPath [datadir, "webengine"] config <- C.load [C.Optional path] whitelist <- C.lookupDefault defaultWhitelist config "whitelist-path" webengine <- C.lookupDefault defaultWebengine config "webengine-path" domains <- T.lines <$> T.readFile whitelist return (Settings whitelist webengine domains)