#!/usr/bin/env python # This is free and unencumbered software released into the public domain. '''extract and process donation data from Whiteleaf logs. This only accounts for donation done through the WL system. ''' import sys if sys.version_info < (3, 7): sys.exit("Python 3.7 or higher is required") from typing import * import argparse from collections import defaultdict from decimal import Decimal from datetime import datetime as DateTime from dataclasses import dataclass import os from pathlib import Path import re import shelve # a sum in US dollars Amount = Decimal @dataclass(frozen=True) class Donation: paypig: str date: DateTime @dataclass(frozen=True) class SimpleDonation(Donation): amount: Amount @dataclass(frozen=True) class Subscription(Donation): tier: int # reference: http://xanderhal.com/subscribe TIERS = { 1: 5, 2: 10, 3: 20, 4: 40, } @property def amount(self) -> Amount: return Subscription.TIERS[self.tier] @dataclass(frozen=True) class GiftedSubscription(Subscription): victim: str PATTERNS = [re.compile(r) for r in ( r'\[(?P.*?)\] Broadcast: (?P\w+) has donated \$(?P.+?)!', r'\[(?P.*?)\] Broadcast: (?P\w+) got a Tier (?P\d)', r'\[(?P.*?)\] Broadcast: (?P\w+) gave (?P\w+) a Tier (?P\d)', )] def parse_donations(lines: Iterable[str]) -> Iterable[Donation]: for line in lines: for pattern in PATTERNS: m = pattern.match(line) if m: break else: continue x = m.groupdict() date = DateTime.strptime(x['date'], '%Y-%m-%d %H:%M:%S %Z') if 'victim' in x: yield GiftedSubscription( date=date, paypig=x['paypig'], tier=int(x['tier']), victim=x['victim'], ) elif 'tier' in x: yield Subscription( date=date, paypig=x['paypig'], tier=int(x['tier']), ) else: yield SimpleDonation( date=date, paypig=x['paypig'], amount=Decimal(x['amount'].replace(',', '')), ) def all_source_files(): return Path().glob('*.txt') CACHE_FILE = Path('.cache.db') def load_data() -> Dict[str, List[Donation]]: '''load all the parsed data, using the cache if possible''' with shelve.open(os.fspath(CACHE_FILE), protocol=4) as cache: fresh_time = CACHE_FILE.stat().st_mtime for f in all_source_files(): if f.name not in cache or f.stat().st_mtime >= fresh_time: cache[f.name] = list(parse_donations(f.open())) data = {f.name: cache[f.name] for f in all_source_files()} return data def grand(args): '''print grand total''' data = load_data() print(sum(d.amount for (f, ds) in data.items() for d in ds)) def files(args): '''print total per file''' data = load_data() for (f, ds) in sorted(data.items()): print(f, sum(d.amount for d in ds)) def paypigs(args): '''print total per paypig''' sums = defaultdict(Amount) data = load_data() for (f, ds) in data.items(): for d in ds: sums[d.paypig] += d.amount for paypig, amount in sorted(sums.items(), key=lambda i: i[1]): print(paypig, amount) top = argparse.ArgumentParser() commands = top.add_subparsers() argparser_grand = commands.add_parser("grand", help="compute grand total of donations") argparser_grand.set_defaults(func=grand) argparser_files = commands.add_parser("files", help="tabulate sum for each logfile") argparser_files.set_defaults(func=files) argparser_paypigs = commands.add_parser("paypigs", help="tabulate sum for each paypig") argparser_paypigs.set_defaults(func=paypigs) args = top.parse_args() try: func = args.func except AttributeError: top.print_help() sys.exit(2) else: func(args)