2021-05-10 17:12:44 +02:00
|
|
|
{-# LANGUAGE DeriveGeneric #-}
|
|
|
|
{-# LANGUAGE TypeFamilies #-}
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
{-# LANGUAGE OverloadedLabels #-}
|
|
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
2018-09-21 21:34:31 +02:00
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
import Data.List (nub, foldl')
|
|
|
|
import Data.Maybe (mapMaybe)
|
2021-03-23 18:55:27 +01:00
|
|
|
import Data.Function ((&))
|
2021-03-23 16:09:03 +01:00
|
|
|
import Data.Default (def)
|
|
|
|
import Data.Text.Encoding (decodeUtf8)
|
|
|
|
import Control.Monad (mapM_, when, (>=>))
|
|
|
|
import Control.Monad.Reader (ReaderT, runReaderT, asks)
|
|
|
|
import Control.Monad.Except (ExceptT, runExceptT, throwError)
|
|
|
|
import System.FilePath (joinPath, takeBaseName, (</>))
|
|
|
|
import Database.Selda (Text, liftIO, (.||), (!))
|
2021-03-21 19:02:54 +01:00
|
|
|
import Database.Selda.SQLite (withSQLite)
|
2019-03-25 00:02:38 +01:00
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
import qualified Database.Selda as S
|
|
|
|
import qualified Database.LevelDB as L
|
|
|
|
import qualified Database.LevelDB.Streaming as LS
|
|
|
|
|
2021-05-10 17:12:44 +02:00
|
|
|
import Control.Exception as BE
|
|
|
|
import Control.Monad.Catch as CE
|
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
import qualified System.Directory as D
|
|
|
|
import qualified Data.Configurator as C
|
|
|
|
import qualified Data.Text as T
|
|
|
|
import qualified Data.Text.IO as T
|
|
|
|
import qualified Data.ByteString as B
|
2019-03-25 00:02:38 +01:00
|
|
|
|
2021-03-23 18:55:27 +01:00
|
|
|
import Debug.Trace
|
2019-03-25 00:02:38 +01:00
|
|
|
|
2021-03-21 19:02:54 +01:00
|
|
|
-- | Bisc settings
|
2019-03-25 00:02:38 +01:00
|
|
|
data Settings = Settings
|
2021-03-21 19:02:54 +01:00
|
|
|
{ whitelistPath :: FilePath -- ^ whitelist file
|
|
|
|
, webenginePath :: FilePath -- ^ webengine data directory
|
|
|
|
, whitelist :: [Text] -- ^ whitelisted domains
|
2019-03-25 00:02:38 +01:00
|
|
|
}
|
2018-09-21 21:34:31 +02:00
|
|
|
|
2021-03-21 19:02:54 +01:00
|
|
|
|
|
|
|
-- SQL records
|
|
|
|
|
|
|
|
-- | Just a cookie
|
2018-09-21 21:34:31 +02:00
|
|
|
data Cookie = Cookie
|
2021-03-21 19:02:54 +01:00
|
|
|
{ host_key :: Text -- ^ cookie domain
|
|
|
|
, creation_utc :: Int -- ^ creation date
|
2021-03-23 16:09:03 +01:00
|
|
|
} deriving (S.Generic, Show)
|
2021-03-21 19:02:54 +01:00
|
|
|
|
|
|
|
-- | The origin (domain) of a quota
|
|
|
|
data QuotaOrigin = QuotaOrigin
|
|
|
|
{ origin :: Text -- ^ URL
|
|
|
|
, last_modified_time :: Int -- ^ creation date
|
2021-03-23 16:09:03 +01:00
|
|
|
} deriving (S.Generic, Show)
|
2018-09-21 21:34:31 +02:00
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
instance S.SqlRow Cookie
|
|
|
|
instance S.SqlRow QuotaOrigin
|
2021-03-21 19:02:54 +01:00
|
|
|
|
|
|
|
|
|
|
|
-- SQL tables
|
|
|
|
|
|
|
|
-- | Cookies table
|
2021-03-23 16:09:03 +01:00
|
|
|
cookies :: S.Table Cookie
|
|
|
|
cookies = S.table "cookies" []
|
2021-03-21 19:02:54 +01:00
|
|
|
|
|
|
|
-- | QuotaManager origins table
|
2021-03-23 16:09:03 +01:00
|
|
|
quotaOrigins :: S.Table QuotaOrigin
|
|
|
|
quotaOrigins = S.table "OriginInfoTable" []
|
2018-09-21 21:34:31 +02:00
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
-- | Main monad stack
|
|
|
|
--
|
|
|
|
-- * 'ReaderT' for accessing settings
|
|
|
|
-- * 'ExceptT' for custom errors
|
|
|
|
type Action = ReaderT Settings (ExceptT Text IO)
|
2018-09-23 13:40:12 +02:00
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
-- | Number of removed domains, list of domains
|
|
|
|
type Result = (Int, [Text])
|
2018-09-21 21:34:31 +02:00
|
|
|
|
2018-09-23 13:40:12 +02:00
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
-- * Main
|
|
|
|
|
|
|
|
-- | Clears all means of permanent storage
|
2019-03-25 00:02:38 +01:00
|
|
|
main :: IO ()
|
|
|
|
main = do
|
2021-03-23 16:09:03 +01:00
|
|
|
config <- D.getXdgDirectory D.XdgConfig ("bisc" </> "bisc.conf")
|
|
|
|
run <- runAction <$> loadSettings config
|
|
|
|
run "Cookies" deleteCookies
|
|
|
|
run "QuotaManager" deleteQuotaOrigins
|
|
|
|
run "IndexedDB" deleteIndexedDB
|
|
|
|
run "LocalStorage" deleteLocalStorage
|
|
|
|
run "SessionStorage" deleteSessionStorage
|
|
|
|
|
|
|
|
|
|
|
|
-- | Runs an 'Action' and pretty-prints the results
|
|
|
|
runAction :: Settings -> Text -> Action Result -> IO ()
|
|
|
|
runAction settings name x = do
|
2021-05-10 17:12:44 +02:00
|
|
|
a <- BE.try $ runExceptT (runReaderT x settings)
|
2021-03-23 16:09:03 +01:00
|
|
|
case a of
|
2021-05-10 17:12:44 +02:00
|
|
|
Right (Right res) -> printResult res
|
|
|
|
Right (Left msg) -> printFailed msg
|
|
|
|
Left (err :: BE.IOException) -> printFailed (T.pack $ BE.displayException err)
|
2021-03-21 19:02:54 +01:00
|
|
|
where
|
2021-05-10 17:12:44 +02:00
|
|
|
printFailed msg = T.putStrLn ("- " <> name <> " cleaning failed:\n " <> msg)
|
2021-03-23 16:09:03 +01:00
|
|
|
printResult (n, bad)
|
2021-03-21 19:02:54 +01:00
|
|
|
| n > 0 = do
|
2021-05-10 17:12:44 +02:00
|
|
|
T.putStrLn ("- " <> name <> ": deleted " <> T.pack (show n) <> " entries for:")
|
2021-03-23 16:09:03 +01:00
|
|
|
T.putStrLn (T.unlines $ map (" * " <>) bad)
|
2021-05-10 17:12:44 +02:00
|
|
|
| otherwise = T.putStrLn ("- " <> name <> ": nothing to delete")
|
2021-03-23 16:09:03 +01:00
|
|
|
|
2021-03-21 19:02:54 +01:00
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
-- * Cleaning actions
|
2021-03-21 19:02:54 +01:00
|
|
|
|
|
|
|
-- | Deletes records in the Cookies database
|
2021-03-23 16:09:03 +01:00
|
|
|
deleteCookies :: Action Result
|
2021-03-21 19:02:54 +01:00
|
|
|
deleteCookies = do
|
|
|
|
database <- (</> "Cookies") <$> asks webenginePath
|
2021-05-10 17:12:44 +02:00
|
|
|
exists <- liftIO $ D.doesFileExist database
|
|
|
|
when (not exists) (throwError "database is missing")
|
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
whitelist <- map S.text <$> asks whitelist
|
2021-05-10 17:12:44 +02:00
|
|
|
CE.handle dbErrors $ withSQLite database $ do
|
2021-03-23 16:09:03 +01:00
|
|
|
bad <- S.query $ do
|
|
|
|
cookie <- S.select cookies
|
|
|
|
S.restrict (by whitelist cookie)
|
2018-09-21 21:34:31 +02:00
|
|
|
return (cookie ! #host_key)
|
2021-03-23 16:09:03 +01:00
|
|
|
n <- S.deleteFrom cookies (by whitelist)
|
2018-09-23 13:40:12 +02:00
|
|
|
return (n, nub bad)
|
|
|
|
where
|
2021-03-23 16:09:03 +01:00
|
|
|
by set x = S.not_ (x ! #host_key `S.isIn` set)
|
2018-09-23 13:40:12 +02:00
|
|
|
|
|
|
|
|
2021-03-21 19:02:54 +01:00
|
|
|
-- | Deletes records in the QuotaManager API database
|
2021-03-23 16:09:03 +01:00
|
|
|
deleteQuotaOrigins :: Action Result
|
2021-03-21 19:02:54 +01:00
|
|
|
deleteQuotaOrigins = do
|
|
|
|
database <- (</> "QuotaManager") <$> asks webenginePath
|
2021-05-10 17:12:44 +02:00
|
|
|
exists <- liftIO $ D.doesFileExist database
|
|
|
|
when (not exists) (throwError "database is missing")
|
|
|
|
|
2021-03-21 19:02:54 +01:00
|
|
|
whitelist <- map pattern <$> asks whitelist
|
2021-05-10 17:12:44 +02:00
|
|
|
CE.handle dbErrors $ withSQLite database $ do
|
2021-03-23 16:09:03 +01:00
|
|
|
bad <- S.query $ do
|
|
|
|
quota <- S.select quotaOrigins
|
|
|
|
S.restrict (by whitelist quota)
|
2021-03-21 19:02:54 +01:00
|
|
|
return (quota ! #origin)
|
2021-03-23 16:09:03 +01:00
|
|
|
n <- S.deleteFrom quotaOrigins (by whitelist)
|
2021-03-21 19:02:54 +01:00
|
|
|
return (n, nub bad)
|
|
|
|
where
|
2021-03-23 16:09:03 +01:00
|
|
|
-- check if x ∉ set
|
|
|
|
by set x = S.not_ . any_ . map (S.like (x ! #origin)) $ set
|
2021-03-21 19:02:54 +01:00
|
|
|
-- turns domains into patterns to match a url
|
2021-03-23 16:09:03 +01:00
|
|
|
pattern domain = S.text ("http%://%" <> domain <> "/")
|
|
|
|
any_ = foldl' (.||) S.false
|
2021-03-21 19:02:54 +01:00
|
|
|
|
|
|
|
|
|
|
|
-- | Deletes per-domain files under the IndexedDB directory
|
|
|
|
--
|
|
|
|
-- For example:
|
|
|
|
--
|
|
|
|
-- https_example.com_0.indexeddb.leveldb
|
|
|
|
-- https_www.example.com_0.indexeddb.leveldb
|
|
|
|
--
|
2021-03-23 16:09:03 +01:00
|
|
|
deleteIndexedDB :: Action Result
|
2021-03-21 19:02:54 +01:00
|
|
|
deleteIndexedDB = do
|
2019-03-25 00:02:38 +01:00
|
|
|
webengine <- asks webenginePath
|
2021-05-10 17:12:44 +02:00
|
|
|
exists <- liftIO $ D.doesDirectoryExist (webengine </> "IndexedDB")
|
|
|
|
when (not exists) $ throwError "directory is missing"
|
|
|
|
|
|
|
|
entries <- listDirectoryAbs (webengine </> "IndexedDB")
|
2021-03-21 19:02:54 +01:00
|
|
|
unlisted <- (\domains -> not . (`elem` domains)) <$> asks whitelist
|
2018-09-23 13:40:12 +02:00
|
|
|
let
|
|
|
|
badFiles = filterMaybe (fmap unlisted . domain) entries
|
|
|
|
badDomains = mapMaybe domain badFiles
|
2021-03-21 19:02:54 +01:00
|
|
|
liftIO $ mapM_ D.removePathForcibly badFiles
|
2018-09-23 13:40:12 +02:00
|
|
|
return (length badFiles, nub badDomains)
|
|
|
|
where
|
2021-05-10 17:12:44 +02:00
|
|
|
listDirectoryAbs :: FilePath -> Action [FilePath]
|
|
|
|
listDirectoryAbs dir = liftIO $ map (dir </>) <$> D.listDirectory dir
|
2019-06-02 10:40:13 +02:00
|
|
|
|
2018-09-23 13:40:12 +02:00
|
|
|
maybeToBool :: Maybe Bool -> Bool
|
|
|
|
maybeToBool Nothing = False
|
|
|
|
maybeToBool (Just x) = x
|
|
|
|
|
|
|
|
filterMaybe :: (a -> Maybe Bool) -> [a] -> [a]
|
|
|
|
filterMaybe f = filter (maybeToBool . f)
|
|
|
|
|
|
|
|
domain :: FilePath -> Maybe Text
|
|
|
|
domain = extract . url where
|
|
|
|
extract [] = Nothing
|
|
|
|
extract (x:[]) = Nothing
|
|
|
|
extract (x:xs) = Just $ T.unwords (init xs)
|
|
|
|
url = T.splitOn "_" . T.pack . takeBaseName
|
|
|
|
|
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
-- | Deletes records from the local storage levelDB database
|
|
|
|
--
|
|
|
|
-- The schema consists of two (or more) records for each url:
|
|
|
|
--
|
|
|
|
-- "META:<url>" which stores metadata
|
|
|
|
-- "_<url>\NUL\SOH<key>" which stores the actual data
|
|
|
|
--
|
|
|
|
-- See https://source.chromium.org/chromium/chromium/src/+/master:components/services/storage/dom_storage/local_storage_impl.cc;l=51
|
|
|
|
--
|
|
|
|
deleteLocalStorage :: Action Result
|
|
|
|
deleteLocalStorage = do
|
|
|
|
webengine <- asks webenginePath
|
|
|
|
whitelist <- asks whitelist
|
|
|
|
let path = webengine </> "Local Storage" </> "leveldb"
|
|
|
|
|
2021-03-24 10:33:05 +01:00
|
|
|
dbIsOk <- liftIO $ D.doesFileExist (path </> "LOCK")
|
2021-05-10 17:12:44 +02:00
|
|
|
when (not dbIsOk) (throwError "database is missing or corrupted")
|
2021-03-24 10:33:05 +01:00
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
version <- withDB path (\db -> L.get db def "VERSION")
|
2021-05-10 17:12:44 +02:00
|
|
|
when (version /= Just "1") (throwError "database is empty or the schema unsupported")
|
2021-03-23 16:09:03 +01:00
|
|
|
|
2021-03-23 19:52:50 +01:00
|
|
|
withDB path $ \db -> do
|
|
|
|
badDomains <- L.withIterator db def $ \i ->
|
|
|
|
LS.keySlice i LS.AllKeys LS.Asc
|
|
|
|
& LS.filter (\k -> "META:" `B.isPrefixOf ` k
|
|
|
|
&& (metaDomain k) `notElem` whitelist )
|
|
|
|
& LS.mapM (\k -> L.delete db def k >> return (metaDomain k))
|
|
|
|
& LS.toList
|
|
|
|
|
|
|
|
n <- L.withIterator db def $ \i ->
|
|
|
|
LS.keySlice i LS.AllKeys LS.Asc
|
|
|
|
& LS.filter (\k -> "_" `B.isPrefixOf` k
|
|
|
|
&& "\NUL\SOH" `B.isInfixOf` k
|
|
|
|
&& (recDomain k) `notElem` whitelist)
|
|
|
|
& LS.mapM (L.delete db def)
|
|
|
|
& LS.length
|
|
|
|
|
|
|
|
return (n, badDomains)
|
2021-03-23 16:09:03 +01:00
|
|
|
where
|
|
|
|
-- extract domains from the keys
|
|
|
|
domain = snd . T.breakOnEnd "://" . decodeUtf8
|
|
|
|
metaDomain = domain . B.drop 5
|
|
|
|
recDomain = domain . head . B.split 0 . B.drop 1
|
|
|
|
|
|
|
|
|
2021-03-23 18:55:27 +01:00
|
|
|
-- | Deletes records from the session storage levelDB database
|
|
|
|
--
|
|
|
|
-- The schema consists of a map `url -> id` and records under `id`:
|
|
|
|
--
|
|
|
|
-- namespace-<session-uid>-<url> = <id>
|
|
|
|
-- map-<id>-<key> = <value>
|
|
|
|
--
|
|
|
|
-- See https://source.chromium.org/chromium/chromium/src/+/master:components/services/storage/dom_storage/session_storage_metadata.cc;l=21
|
|
|
|
--
|
|
|
|
deleteSessionStorage :: Action Result
|
|
|
|
deleteSessionStorage = do
|
|
|
|
webengine <- asks webenginePath
|
|
|
|
whitelist <- asks whitelist
|
|
|
|
let path = webengine </> "Session Storage"
|
|
|
|
|
2021-03-24 10:33:05 +01:00
|
|
|
dbIsOk <- liftIO $ D.doesFileExist (path </> "LOCK")
|
2021-05-10 17:12:44 +02:00
|
|
|
when (not dbIsOk) (throwError "database is missing or corrupted")
|
2021-03-24 10:33:05 +01:00
|
|
|
|
2021-03-23 18:55:27 +01:00
|
|
|
version <- withDB path (\db -> L.get db def "version")
|
2021-05-10 17:12:44 +02:00
|
|
|
when (version /= Just "1") (throwError "database is empty or the schema unsupported")
|
2021-03-23 18:55:27 +01:00
|
|
|
|
|
|
|
withDB path $ \db -> do
|
|
|
|
-- map of id -> isBad
|
|
|
|
badMap <- L.withIterator db def $ \i ->
|
|
|
|
LS.keySlice i LS.AllKeys LS.Asc
|
|
|
|
& LS.filter (B.isPrefixOf "namespace")
|
|
|
|
& LS.mapM (\k -> (,) <$> L.get db def k <*> pure (isBad whitelist k))
|
|
|
|
& LS.toList
|
|
|
|
|
|
|
|
-- delete the unlisted domains map
|
|
|
|
badDomains <- L.withIterator db def $ \i ->
|
|
|
|
LS.keySlice i LS.AllKeys LS.Asc
|
|
|
|
& LS.filter (B.isPrefixOf "namespace")
|
|
|
|
& LS.filter (isBad whitelist)
|
|
|
|
& LS.mapM (\k -> L.delete db def k >> return (domain k))
|
|
|
|
& LS.toList
|
|
|
|
|
|
|
|
-- and their records
|
|
|
|
n <- L.withIterator db def $ \i ->
|
|
|
|
LS.keySlice i LS.AllKeys LS.Asc
|
|
|
|
& LS.filter (B.isPrefixOf "map-")
|
|
|
|
& LS.mapM (\k ->
|
|
|
|
case lookup (originId k) badMap of
|
|
|
|
Just True -> L.delete db def k >> return 1
|
|
|
|
_ -> return 0)
|
|
|
|
& LS.sum
|
|
|
|
return (n, nub badDomains)
|
|
|
|
where
|
|
|
|
isBad whitelist = not . flip elem whitelist . domain
|
|
|
|
-- extract domain from keys (47 = length "namespace-<uid>-")
|
|
|
|
url = decodeUtf8 . B.drop 47
|
|
|
|
domain = (!! 2). T.splitOn "/" . url
|
|
|
|
-- extract id from key: drop "map-", take until "-" (ascii 45)
|
|
|
|
originId = Just . B.takeWhile (/= 45). B.drop 4
|
|
|
|
|
|
|
|
|
2021-03-23 16:09:03 +01:00
|
|
|
-- * Helper functions
|
|
|
|
|
|
|
|
-- | Loads a leveldb database and runs a resourceT action
|
|
|
|
--
|
|
|
|
-- withDB :: FilePath -> (L.DB -> ResourceT IO a) -> Action a
|
|
|
|
withDB path f = liftIO $ L.runResourceT (L.open path def >>= f)
|
|
|
|
|
2021-03-21 19:02:54 +01:00
|
|
|
-- | Loads the config from a file
|
2019-03-25 00:02:38 +01:00
|
|
|
loadSettings :: FilePath -> IO Settings
|
|
|
|
loadSettings path = do
|
2021-03-21 19:02:54 +01:00
|
|
|
configdir <- D.getXdgDirectory D.XdgConfig "qutebrowser"
|
|
|
|
datadir <- D.getXdgDirectory D.XdgData "qutebrowser"
|
2019-03-25 00:02:38 +01:00
|
|
|
let
|
|
|
|
defaultWhitelist = joinPath [configdir, "whitelists", "cookies"]
|
|
|
|
defaultWebengine = joinPath [datadir, "webengine"]
|
2018-09-21 21:34:31 +02:00
|
|
|
|
2021-03-21 19:02:54 +01:00
|
|
|
config <- C.load [C.Optional path]
|
|
|
|
whitelist <- C.lookupDefault defaultWhitelist config "whitelist-path"
|
|
|
|
webengine <- C.lookupDefault defaultWebengine config "webengine-path"
|
|
|
|
domains <- T.lines <$> T.readFile whitelist
|
2019-03-25 00:02:38 +01:00
|
|
|
|
2021-03-21 19:02:54 +01:00
|
|
|
return (Settings whitelist webengine domains)
|
2021-05-10 17:12:44 +02:00
|
|
|
|
|
|
|
-- | Catches any Selda error
|
|
|
|
dbErrors :: S.SeldaError -> Action a
|
|
|
|
dbErrors e = throwError ("database operation failed: " <> T.pack (show e))
|