import requests import argparse import os import jinja2 import yaml # LIST_URL = "https://bookwyrm.social/list/2458/s/notplants-2023" # LIST_URL = "https://bookwyrm.social/list/2404/s/2024" PROJECT_PATH = os.path.abspath(os.path.dirname(__file__)) # output dirs OUTPUT_DIR = os.path.join(PROJECT_PATH, 'dist') if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) # input static folder STATIC_DIR = os.path.join(PROJECT_PATH, 'static') if not os.path.exists(STATIC_DIR): os.makedirs(STATIC_DIR) def build_site(output_dir, images_dir, relative_template_path, template_vars): # copy over static files input_static_dir = images_dir if not os.path.exists(output_dir): os.makedirs(output_dir) output_static_dir = os.path.join(output_dir, 'static') if os.path.exists(output_static_dir): os.system('rm -r {}'.format(output_static_dir)) print('copying static to {}'.format(output_static_dir)) os.system('cp -r {} {}'.format(input_static_dir, output_static_dir)) # render html template_loader = jinja2.FileSystemLoader(searchpath=PROJECT_PATH) template_env = jinja2.Environment(loader=template_loader) page_template = template_env.get_template(relative_template_path) page_text = page_template.render(template_vars) output_page_path = os.path.join(output_dir, "bookshelf.html") with open(output_page_path, 'w') as output_file: output_file.write(page_text) def fetch_list_page(list_url, page): headers = {"accept": "application/ld+json"} url = list_url + "?page={}".format(page) result = requests.get(url, headers=headers) return result.json() def get_apub_object(object_url): headers = {"accept": "application/ld+json"} url = object_url result = requests.get(url, headers=headers) return result.json() def fetch_list(list_url): more_results = True page = 1 all_items = [] while more_results: try: results = fetch_list_page(list_url=list_url, page=page) items = results.get("orderedItems") if items: all_items += items next = results.get("next") if next: # TODO: alternatively, this could perhaps be re-written more generally # to use this next field to direct the pagination instead of an increment page += 1 if not next: more_results = False else: more_results = False except: more_results = False return all_items def process_list(list_url): list_items = fetch_list(list_url) processed_items = [] for index, book in enumerate(list_items): base_title = book.get("title") subtitle = book.get("subtitle") if subtitle: title = "{}: {}".format(base_title, subtitle) else: title = base_title authors = book.get("authors") first_author = authors[0] author = get_apub_object(first_author) published_date = book.get("publishedDate") cover_url = book.get("cover").get("url") if published_date: published_year = published_date.split("-")[0] else: published_year = None # print to_print = "{}. {} by {}".format(index, title, author.get("name")) if published_year: to_print += " ({})".format(published_year) print(to_print) print("cover: {}".format(cover_url)) image_title = book.get("title") image_title = ''.join(c for c in image_title if (c.isalnum() or c == " ")) image_title = image_title.replace(" ", "-") image_title = image_title.replace(":", "") image_path = "{}.jpg".format(image_title) # return item item = { "title": title, "author": author.get("name"), "published_year": published_year, "cover_url": cover_url, "cover_image_path": image_path } processed_items.append(item) return processed_items def download_images(processed_items, output_dir): for item in processed_items: image_url = item.get("cover_url") image_path = os.path.join(output_dir, item.get("cover_image_path")) if image_url: print("++ downloading {}".format(image_url)) os.system('wget {} -O "{}"'.format(image_url, image_path)) def write_yaml(items, output_dir): yaml_path = os.path.join(output_dir, "books.yaml") output = yaml.dump(items, Dumper=yaml.Dumper) with open(yaml_path, 'w') as f: f.write(output) def load_yaml(yaml_path): with open(yaml_path) as f: contents = f.read() books = yaml.load(contents, Loader=yaml.Loader) return { "books": books } if __name__ == '__main__': # create parser parser = argparse.ArgumentParser( prog='bookshelf-generator', description='generates HTML for a web-page to display a bookshelf using bookwyrm or a yaml as input', epilog='<3') # subparser for building website subparsers = parser.add_subparsers(dest="subparser") build_parser = subparsers.add_parser('generate-html') bookwyrm_parser = subparsers.add_parser('bookwyrm-download') # bookwyrm-download subparser required_bookwyrm_args = bookwyrm_parser.add_argument_group('required named arguments') required_bookwyrm_args.add_argument('-l', '--list-url', help="bookwyrm list url to use as a source input", required=True) required_bookwyrm_args.add_argument('-o', '--output-dir', help="path where images should be downloaded and yaml should be written", required=True) # generate-html subparser required_build_args = build_parser.add_argument_group('required named arguments') required_build_args.add_argument('-i', '--images-dir', help="path to folder containing cover images", required=True) required_build_args.add_argument('-y', '--yaml-path', help="yaml path to use as input", required=True) required_build_args.add_argument('-t', '--template', help="path to jinja file to use as a template for the html", required=True) required_build_args.add_argument('-o', '--output-path', help="path to generate html inside of", required=True) # parse args args = parser.parse_args() if args.subparser == "bookwyrm-download": if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) processed_items = process_list(args.list_url) download_images(processed_items, output_dir=args.output_dir) write_yaml(processed_items, output_dir=args.output_dir) elif args.subparser == "generate-html": yaml_path = args.yaml_path images_dir = args.images_dir output_path = args.output_path template_vars = load_yaml(yaml_path) build_site(output_dir=output_path, images_dir=images_dir, relative_template_path=args.template, template_vars=template_vars) else: raise Exception("invalid subcommand. must be either bookwyrm-download or generate-html")