[Got this tool working properly Dino Morelli **20160319202437 Ignore-this: 86ab9fbfbb6698cec0c7d9ec157b66b3 Have now used it successfully on test data to correct the dates of all inspections and also correct bad URLs. ] hunk ./src/ks-datefix.hs 10 ---import Control.Monad ( forM ) +import Control.Arrow ( (***) ) hunk ./src/ks-datefix.hs 12 ---import Control.Monad.Trans ( liftIO ) ---import Data.Aeson ( decodeStrict ) ---import Data.Aeson.Bson ( toBson ) hunk ./src/ks-datefix.hs 13 -import Data.Bson.Generic ( fromBSON ) ---import qualified Data.ByteString.Char8 as B +import Data.Bson.Generic ( fromBSON, toBSON ) hunk ./src/ks-datefix.hs 18 ---import Data.Time.Format ( defaultTimeLocale ) hunk ./src/ks-datefix.hs 20 ---import KS.Data.Common ( utcTimeToEpoch ) hunk ./src/ks-datefix.hs 26 ---import System.Exit ( ExitCode (..), exitFailure, exitSuccess, exitWith ) hunk ./src/ks-datefix.hs 35 ---import KS.Database.Mongo.Util ( parseLastError ) - - ---coll_regional_data :: Collection - ---coll_regional_data = "regional_data" ---coll_regional_data = "test_regional_data" -- For development hunk ./src/ks-datefix.hs 39 +--coll_inspections = "inspections" +--coll_inspections = "recent_inspections" hunk ./src/ks-datefix.hs 68 -type Fix a = (StateT D.Document IO) a - -runFix :: D.Document -> Fix a -> IO a -runFix st ev = evalStateT ev st +type Fix a = (StateT (BSON.Document, D.Document) IO) a + +runFix :: (BSON.Document, D.Document) -> Fix (BSON.Document, D.Document) + -> IO (BSON.Document, D.Document) +runFix st ev = execStateT ev st hunk ./src/ks-datefix.hs 83 - --let ds = (map (fromJust . fromBSON) $ take 50 rs) :: [D.Document] - let ds = (map (fromJust . fromBSON) rs) :: [D.Document] - --print ds - mapM_ updateRecord ds - {- - _ <- forM ds $ (\d -> do - --let placeID = P.place_id . D.place $ d - let detail = I.detail . D.inspection $ d - - let ut = posixSecondsToUTCTime . realToFrac . I.date . D.inspection $ d - date <- formatTime defaultTimeLocale "%FT%T%z" <$> utcToLocalZonedTime ut - - --printf "%s %s\n" (T.unpack placeID) date - --printf "%s %s\n" date detail - printf "%s%s\n" date detail detail + -- Take the _id apart and deserialize the rest into a KS Document + let ts = map (\r -> (id *** (fromJust . fromBSON)) $ separateId r) rs + + _ <- forM ts $ (\t -> do + (_id, newDoc) <- updateRecord t + when (snd t /= newDoc) $ do + liftIO $ putStrLn " Document changed, updating" + + -- write to mongo now + let newBSON = combineId (_id, toBSON newDoc) + access pipe slaveOk (MC.database mc) $ + save coll_inspections newBSON hunk ./src/ks-datefix.hs 96 - -} - hunk ./src/ks-datefix.hs 100 -{- -updateRecord :: D.Document -> IO () -updateRecord oldDoc = runFix oldDoc $ do +updateRecord :: (BSON.Document, D.Document) -> IO (BSON.Document, D.Document) +updateRecord oldDocT = runFix oldDocT $ do hunk ./src/ks-datefix.hs 105 - saveInspection oldDoc --} ---updateRecord :: D.Document -> IO () -updateRecord oldDoc = runFix oldDoc $ do - display - fixHost - fixDate - saveInspection oldDoc + return =<< get + + +updateSnd :: D.Document -> Fix () +updateSnd d = do + (i, _) <- get + put (i, d) hunk ./src/ks-datefix.hs 116 - doc <- get + doc <- gets snd hunk ./src/ks-datefix.hs 119 + let detail = I.detail . D.inspection $ doc hunk ./src/ks-datefix.hs 122 - printf "----------\nInspection %s %s\n" (T.unpack placeID) date + printf "----------\nInspection %s %s\n %s\n" (T.unpack placeID) date detail hunk ./src/ks-datefix.hs 127 - oldDoc <- get + oldDoc <- gets snd hunk ./src/ks-datefix.hs 136 - put newDoc + updateSnd newDoc hunk ./src/ks-datefix.hs 144 - oldDoc <- get + oldDoc <- gets snd hunk ./src/ks-datefix.hs 151 - when (oldHour /= 0) $ do + when (oldHour /= 0 && oldHour /= 1) $ do hunk ./src/ks-datefix.hs 160 - put newDoc + updateSnd newDoc hunk ./src/ks-datefix.hs 167 -saveInspection :: D.Document -> Fix () -saveInspection oldDoc = do - newDoc <- get - when (oldDoc /= newDoc) $ do - liftIO $ putStrLn " Document changed, updating" - -- write to mongo now - - -{- - -- Get the stats for all regions in recent_inspections - computedStats <- access pipe slaveOk (MC.database mc) $ aggregate - "recent_inspections" [mkStatsQuery] - - -- Get the date right now - now <- utcTimeToEpoch <$> getCurrentTime - - -- Construct the regional_stats documents - newDocs <- mapM (mkRegionalStats now confDir) computedStats - - -- Report what we're about to do - infoM lname $ printf "Inserting these stats into the %s collection:" - (T.unpack coll_regional_data) - mapM_ (infoM lname . show) newDocs - - -- Upsert them into the regional_data collection - statsResults <- liftIO $ mapM (updateStatsDocument mc pipe) newDocs - - -- If this is the first of the month, - -- insert the documents into regional_data_history as well - -- historyResults <- ... - - return $ all (== True) statsResults --} - - -{- -updateStatsDocument :: MC.MongoConfig -> Pipe -> Document -> IO Bool -updateStatsDocument mc pipe doc = do - result <- access pipe slaveOk (MC.database mc) $ do - upsert (select [ "source" =: (("source" `at` doc) :: T.Text) ] - coll_regional_data) doc - parseLastError <$> runCommand [ "getLastError" =: (1::Int) ] - - either - (\e -> errorM lname e >> return False) - (\m -> noticeM lname m >> return True) - result - - -mkRegionalStats :: Int -> FilePath -> Document -> IO Document -mkRegionalStats now confDir stats = do - let source = "_id" `at` stats - sourceConfig <- loadConfig confDir source - let (county : state : _) = - fromJust - . matchRegex (mkRegex "^(.+) County, (.+)$") - . T.unpack - $ displayName sourceConfig - - return $ - [ "source" =: source - , "doctype" =: ("regional_stats" :: T.Text) - , "date" =: now - , "location" =: centroid sourceConfig - , "display_name" =: displayName sourceConfig - , "state" =: state - , "county" =: county - , "count_total" =: (("count_total" `at` stats) :: Int) - , "count_a1" =: (("count_a1" `at` stats) :: Int) - , "count_a2" =: (("count_a2" `at` stats) :: Int) - , "count_a3" =: (("count_a3" `at` stats) :: Int) - , "count_a4" =: (("count_a4" `at` stats) :: Int) - , "count_b" =: (("count_b" `at` stats) :: Int) - , "count_c" =: (("count_c" `at` stats) :: Int) - , "min_score" =: (("min_score" `at` stats) :: Float) - , "max_score" =: (("max_score" `at` stats) :: Float) - , "avg_score" =: (("avg_score" `at` stats) :: Float) - ] - - -mkStatsQuery :: Document -mkStatsQuery = toBson . fromJust . decodeStrict . B.pack . unlines $ - [ " { \"$group\":" - , " { \"_id\": \"$inspection.inspection_source\"" - , " , \"min_score\": { \"$min\": \"$inspection.score\" }" - , " , \"max_score\": { \"$max\": \"$inspection.score\" }" - , " , \"avg_score\": { \"$avg\": \"$inspection.score\" }" - , " , \"count_total\": { \"$sum\": 1 }" - , " , \"count_a4\": { \"$sum\": { \"$cond\":" - , " [ { \"$gte\": [\"$inspection.score\", 97.5] }" - , " , 1" - , " , 0" - , " ] } }" - , " , \"count_a3\": { \"$sum\": { \"$cond\":" - , " [ { \"$and\":" - , " [ { \"$gte\": [\"$inspection.score\", 95.0] }" - , " , { \"$lt\": [\"$inspection.score\", 97.5] }" - , " ] }" - , " , 1" - , " , 0" - , " ] } }" - , " , \"count_a2\": { \"$sum\": { \"$cond\":" - , " [ { \"$and\":" - , " [ { \"$gte\": [\"$inspection.score\", 92.5] }" - , " , { \"$lt\": [\"$inspection.score\", 95.0] }" - , " ] }" - , " , 1" - , " , 0" - , " ] } }" - , " , \"count_a1\": { \"$sum\": { \"$cond\":" - , " [ { \"$and\":" - , " [ { \"$gte\": [\"$inspection.score\", 90.0] }" - , " , { \"$lt\": [\"$inspection.score\", 92.5] }" - , " ] }" - , " , 1" - , " , 0" - , " ] } }" - , " , \"count_b\": { \"$sum\": { \"$cond\":" - , " [ { \"$and\":" - , " [ { \"$gte\": [\"$inspection.score\", 80.0] }" - , " , { \"$lt\": [\"$inspection.score\", 90.0] }" - , " ] }" - , " , 1" - , " , 0" - , " ] } }" - , " , \"count_c\": { \"$sum\": { \"$cond\":" - , " [ { \"$lt\": [\"$inspection.score\", 80.0] }" - , " , 1" - , " , 0" - , " ] } }" - , " }" - , " }" - ] --} - -