Files
clownlib/generate_markdown.py

62 lines
2.0 KiB
Python

import os
import re
from data import Clownlib
ANCHOR_SPECIAL = r'(?i)[^\s^\.^a-z^0-9]'
ANCHOR_TO_DASH = r'[\. ]+'
MARKDOWN_CONTENTS_ENTRY = '- [{name}]({anchor})'
MARKDOWN_CONTENTS_SECTION = '# Website Archives\n{contents}'
MARKDOWN_SITE_SECTION = '# {name}\n[Back to Top](#website-archives)\n\n{table}'
SAVE_PATH = os.path.join(os.path.dirname(__file__), 'website_archives.md')
TABLE_HEADER = '| Original URL | Archive |'
TABLE_SEPARATOR = '|--------------|---------|'
TABLE_RECORD = '| {original} | [Archive]({archived}) |'
def markdown_anchor(name):
#create a markdown section anchor suitable for use as a link
anchor = re.sub(ANCHOR_SPECIAL, '', name) #delete all special characters except dots
anchor = re.sub(ANCHOR_TO_DASH, '-', anchor) #replace all dots/spaces with a single dash
return '#' + anchor.lower()
def markdown_contents_entry(name):
#create a single entry in the table of contents
return MARKDOWN_CONTENTS_ENTRY.format(name=name, anchor=markdown_anchor(name))
def markdown_site_entry(name, records):
#create markdown for a single group/site
records = sorted(records, key=lambda x:x['original'])
table = markdown_table(records)
return MARKDOWN_SITE_SECTION.format(name=name, table=table)
def markdown_table(records):
#make a markdown table with rows for every site URL
table = [TABLE_HEADER, TABLE_SEPARATOR]
for rec in records:
table.append(TABLE_RECORD.format(**rec))
return '\n'.join(table)
def main():
clown = Clownlib()
#generate markdown for table of contents and sites
contents = []
sections = []
for name in sorted(clown.groups):
contents.append(markdown_contents_entry(name))
sections.append(markdown_site_entry(name, clown[name]))
#combine output, convert it to a string
output = [MARKDOWN_CONTENTS_SECTION.format(contents='\n'.join(contents))]
output += sections
output = '\n'.join(output)
#save markdown to a file
with open(SAVE_PATH, 'w+') as file:
file.write(output)
print('Successfully wrote updated Markdown to file {}'.format(SAVE_PATH))
if __name__ == '__main__':
main()