implement cache

This commit is contained in:
2020-07-12 08:25:11 +00:00
parent 90de05182a
commit 689a9429cd
3 changed files with 27 additions and 6 deletions
+1
View File
@@ -0,0 +1 @@
.cache.db
+4
View File
@@ -38,3 +38,7 @@ Updated data should be regularly saved to the version control system.
The script `analyze.py` can be used to locally process the data. It can be run
with Python 3.7 or higher. If called with no arguments, it will list all
available operating modes.
**Due to the size of the dataset,** the first invocation of `analyze.py` might
take a minute or two to complete. All subsequent invocations should be nearly
instantaneous thanks to the cache.
+22 -6
View File
@@ -18,8 +18,10 @@ from collections import defaultdict
from decimal import Decimal
from datetime import datetime as DateTime
from dataclasses import dataclass
import os
from pathlib import Path
import re
import shelve
# a sum in US dollars
Amount = Decimal
@@ -90,23 +92,37 @@ def parse_donations(lines: Iterable[str]) -> Iterable[Donation]:
amount=Decimal(x['amount'].replace(',', '')),
)
def all_files():
def all_source_files():
return Path().glob('*.txt')
CACHE_FILE = Path('.cache.db')
def load_data() -> Dict[str, List[Donation]]:
'''load all the parsed data, using the cache if possible'''
with shelve.open(os.fspath(CACHE_FILE), protocol=4) as cache:
fresh_time = CACHE_FILE.stat().st_mtime
for f in all_source_files():
if f.name not in cache or f.stat().st_mtime >= fresh_time:
cache[f.name] = list(parse_donations(f.open()))
data = {f.name: cache[f.name] for f in all_source_files()}
return data
def grand(args):
'''print grand total'''
print(sum(d.amount for f in all_files() for d in parse_donations(f.open())))
data = load_data()
print(sum(d.amount for (f, ds) in data.items() for d in ds))
def files(args):
'''print total per file'''
for f in sorted(all_files()):
print(f, sum(d.amount for d in parse_donations(f.open())))
data = load_data()
for (f, ds) in sorted(data.items()):
print(f, sum(d.amount for d in ds))
def paypigs(args):
'''print total per paypig'''
sums = defaultdict(Amount)
for f in all_files():
for d in parse_donations(f.open()):
data = load_data()
for (f, ds) in data.items():
for d in ds:
sums[d.paypig] += d.amount
for paypig, amount in sorted(sums.items(), key=lambda i: i[1]):
print(paypig, amount)