bookshelf-generator/main.py

import requests
import argparse
import os
import jinja2
import yaml


# LIST_URL = "https://bookwyrm.social/list/2458/s/notplants-2023"
# LIST_URL = "https://bookwyrm.social/list/2404/s/2024"

PROJECT_PATH = os.path.abspath(os.path.dirname(__file__))

# output dirs
OUTPUT_DIR = os.path.join(PROJECT_PATH, 'dist')
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# input static folder
STATIC_DIR = os.path.join(PROJECT_PATH, 'static')
if not os.path.exists(STATIC_DIR):
    os.makedirs(STATIC_DIR)


def build_site(output_dir, images_dir, relative_template_path, template_vars):

    # copy over static files
    input_static_dir = images_dir
    output_static_dir = os.path.join(output_dir, 'static')
    if os.path.exists(output_static_dir):
        os.system('rm -r {}'.format(output_static_dir))
    print('copying static to {}'.format(output_static_dir))
    os.system('cp -r {} {}'.format(input_static_dir, output_static_dir))

    # render html
    template_loader = jinja2.FileSystemLoader(searchpath=PROJECT_PATH)
    template_env = jinja2.Environment(loader=template_loader)

    page_template = template_env.get_template(relative_template_path)
    page_text = page_template.render(template_vars)

    output_page_path = os.path.join(output_dir, "bookshelf.html")
    with open(output_page_path, 'w') as output_file:
        output_file.write(page_text)


def fetch_list_page(list_url, page):
    headers = {"accept": "application/ld+json"}
    url = list_url + "?page={}".format(page)
    result = requests.get(url, headers=headers)
    return result.json()


def get_apub_object(object_url):
    headers = {"accept": "application/ld+json"}
    url = object_url
    result = requests.get(url, headers=headers)
    return result.json()


def fetch_list(list_url):
    more_results = True
    page = 1
    all_items = []
    while more_results:
        try:
            results = fetch_list_page(list_url=list_url, page=page)
            items = results.get("orderedItems")
            if items:
                all_items += items
                next = results.get("next")
                if next:
                    # TODO: alternatively, this could perhaps be re-written more generally
                    # to use this next field to direct the pagination instead of an increment
                    page += 1
                if not next:
                   more_results = False
            else:
                more_results = False
        except:
            more_results = False
    return all_items


def process_list(list_url):
    list_items = fetch_list(list_url)
    processed_items = []
    for index, book in enumerate(list_items):
        base_title = book.get("title")
        subtitle = book.get("subtitle")
        if subtitle:
            title = "{}: {}".format(base_title, subtitle)
        else:
            title = base_title
        authors = book.get("authors")
        first_author = authors[0]
        author = get_apub_object(first_author)
        published_date = book.get("publishedDate")
        cover_url = book.get("cover").get("url")
        if published_date:
            published_year = published_date.split("-")[0]
        else:
            published_year = None

        # print
        to_print = "{}. {} by {}".format(index, title, author.get("name"))
        if published_year:
            to_print += " ({})".format(published_year)
        print(to_print)
        print("cover: {}".format(cover_url))
        image_title = book.get("title").replace(" ", "-")
        image_title = image_title.replace(":", "")
        image_path = "{}.jpg".format(image_title)
        # return item
        item = {
            "title": title,
            "author": author.get("name"),
            "published_year": published_year,
            "cover_url": cover_url,
            "cover_image_path": image_path
        }
        processed_items.append(item)
    return processed_items


def download_images(processed_items, output_dir):
    for item in processed_items:
        image_url = item.get("cover_url")
        image_path = os.path.join(output_dir, item.get("cover_image_path"))
        if image_url:
            print("++ downloading {}".format(image_url))
            os.system('wget {} -O "{}"'.format(image_url, image_path))


def write_yaml(items, output_dir):
    yaml_path = os.path.join(output_dir, "books.yaml")
    output = yaml.dump(items, Dumper=yaml.Dumper)
    with open(yaml_path, 'w') as f:
        f.write(output)


def load_yaml(yaml_path):
    with open(yaml_path) as f:
        contents = f.read()
        books = yaml.load(contents, Loader=yaml.Loader)
        return {
            "books": books
        }


if __name__ == '__main__':

    # create parser
    parser = argparse.ArgumentParser(
        prog='bookshelf-generator',
        description='generates HTML for a web-page to display a bookshelf using bookwyrm or a csv as input',
        epilog='<3')

    # subparser for building website
    subparsers = parser.add_subparsers(dest="subparser")
    build_parser = subparsers.add_parser('generate-html')
    bookwyrm_parser = subparsers.add_parser('bookwyrm-download')

    # bookwyrm-download subparser
    bookwyrm_parser.add_argument('list_url')  # bookwyrm list url to use as a source input
    bookwyrm_parser.add_argument('output_dir')  # path where images should be downloaded and csv should be written

    # generate-html subparser
    build_parser.add_argument('images_dir')  # path to folder containing cover images
    build_parser.add_argument('yaml_path')  # yaml path to use as input
    build_parser.add_argument('-t', '--template')  # path to jinja file to use as a template for the html

    # parse args
    args = parser.parse_args()

    if args.subparser == "bookwyrm-download":

        # subargs = bookwyrm_parser.parse_args()
        print(args.list_url)
        print(args.output_dir)

        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)

        processed_items = process_list(args.list_url)
        download_images(processed_items, output_dir=args.output_dir)
        write_yaml(processed_items, output_dir=args.output_dir)

    else:
        yaml_path = args.yaml_path
        images_dir = args.images_dir
        template_vars = load_yaml(yaml_path)
        build_site(output_dir=OUTPUT_DIR, images_dir=images_dir, relative_template_path=args.template, template_vars=template_vars)