From ce3bfc58b0fdc476249671140285e54ca15427d1 Mon Sep 17 00:00:00 2001 From: rra Date: Thu, 3 Mar 2022 07:00:09 +0100 Subject: [PATCH 01/23] remove orphaned " --- lumbunglib/templates/hashtag.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lumbunglib/templates/hashtag.md b/lumbunglib/templates/hashtag.md index 786ff6b..49e2fea 100644 --- a/lumbunglib/templates/hashtag.md +++ b/lumbunglib/templates/hashtag.md @@ -1,12 +1,12 @@ --- -date: "{{ post_metadata.created_at }}" #2021-06-10T10:46:33+02:00 +date: {{ post_metadata.created_at }} #2021-06-10T10:46:33+02:00 draft: false -author: "{{ post_metadata.account.display_name }}" -avatar: "{{ post_metadata.account.avatar }}" +author: {{ post_metadata.account.display_name }} +avatar: {{ post_metadata.account.avatar }} categories: ["shouts"] -images: [{% for i in post_metadata.media_attachments %} "{{ i.url }}", {% endfor %}] -title: "{{ post_metadata.account.display_name }}" -tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}] +images: [{% for i in post_metadata.media_attachments %} {{ i.url }}, {% endfor %}] +title: {{ post_metadata.account.display_name }} +tags: [{% for i in post_metadata.tags %} {{ i.name }} {% endfor %}] --- {% for item in post_metadata.media_attachments %} -- 2.49.0 From 3b390d1ecbb60a8a5ed7c91efb4328d88503c50f Mon Sep 17 00:00:00 2001 From: rra Date: Tue, 24 May 2022 08:42:56 +0200 Subject: [PATCH 02/23] change template to authors to accomodate author taxonomy --- lumbunglib/templates/feed.md | 2 +- lumbunglib/templates/hashtag.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lumbunglib/templates/feed.md b/lumbunglib/templates/feed.md index ddef11d..f744bff 100644 --- a/lumbunglib/templates/feed.md +++ b/lumbunglib/templates/feed.md @@ -3,7 +3,7 @@ title: "{{ frontmatter.title }}" date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 draft: false summary: "{{ frontmatter.summary }}" -author: "{{ frontmatter.author }}" +authors: ["{{ frontmatter.author }}"] original_link: "{{ frontmatter.original_link }}" feed_name: "{{ frontmatter.feed_name}}" categories: ["{{ frontmatter.card_type }}", "{{ frontmatter.feed_name}}"] diff --git a/lumbunglib/templates/hashtag.md b/lumbunglib/templates/hashtag.md index 49e2fea..bdcacba 100644 --- a/lumbunglib/templates/hashtag.md +++ b/lumbunglib/templates/hashtag.md @@ -1,7 +1,7 @@ --- date: {{ post_metadata.created_at }} #2021-06-10T10:46:33+02:00 draft: false -author: {{ post_metadata.account.display_name }} +authors: ["{{ post_metadata.account.display_name }}"] avatar: {{ post_metadata.account.avatar }} categories: ["shouts"] images: [{% for i in post_metadata.media_attachments %} {{ i.url }}, {% endfor %}] -- 2.49.0 From 6e64d6477295dcb41cf04fcd6675476353f3f160 Mon Sep 17 00:00:00 2001 From: rra Date: Tue, 24 May 2022 10:00:58 +0200 Subject: [PATCH 03/23] only return an author if there is one --- lumbunglib/templates/feed.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lumbunglib/templates/feed.md b/lumbunglib/templates/feed.md index f744bff..d9f3f9a 100644 --- a/lumbunglib/templates/feed.md +++ b/lumbunglib/templates/feed.md @@ -3,7 +3,7 @@ title: "{{ frontmatter.title }}" date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 draft: false summary: "{{ frontmatter.summary }}" -authors: ["{{ frontmatter.author }}"] +authors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} original_link: "{{ frontmatter.original_link }}" feed_name: "{{ frontmatter.feed_name}}" categories: ["{{ frontmatter.card_type }}", "{{ frontmatter.feed_name}}"] -- 2.49.0 From fecf5cd64e669fec8f488d92baadf404661d029e Mon Sep 17 00:00:00 2001 From: rra Date: Tue, 24 May 2022 15:39:11 +0200 Subject: [PATCH 04/23] add rudimentary support for enclosures & featured images --- lumbunglib/feed.py | 28 +++++++++++++++++++++++++++- lumbunglib/templates/feed.md | 1 + 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lumbunglib/feed.py b/lumbunglib/feed.py index 1e25fdc..4113e52 100644 --- a/lumbunglib/feed.py +++ b/lumbunglib/feed.py @@ -85,6 +85,11 @@ def create_frontmatter(entry): for t in entry.tags: tags.append(t['term']) + if "featured_image" in entry: + featured_image = entry.featured_image + else: + featured_image = '' + card_type = "network" if entry.feed_name == "pen.lumbung.space": card_type = "pen" @@ -110,7 +115,8 @@ def create_frontmatter(entry): 'original_link': entry.link, 'feed_name': entry['feed_name'], 'tags': str(tags), - 'card_type': card_type + 'card_type': card_type, + 'featured_image': featured_image } return frontmatter @@ -136,11 +142,30 @@ def sanitize_yaml (frontmatter): return frontmatter +def parse_enclosures(post_dir, entry): + """ + Parses feed enclosures which are featured media + Can be featured image but also podcast entries + https://pythonhosted.org/feedparser/reference-entry-enclosures.html + """ + #TODO parse more than images + #TODO handle the fact it could be multiple items + + for e in entry.enclosures: + print("found enclosed media", e.type) + if "image/" in e.type: + featured_image = grab_media(post_dir, e.href) + entry["featured_image"] = featured_image + return entry + def create_post(post_dir, entry): """ write hugo post based on RSS entry """ + if "enclosures" in entry: + entry = parse_enclosures(post_dir, entry) + frontmatter = create_frontmatter(entry) if not os.path.exists(post_dir): @@ -202,6 +227,7 @@ def parse_posts(post_dir, post_content): for img in soup(["img", "object"]): if img.get("src") != None: + local_image = grab_media(post_dir, img["src"]) if img["src"] != local_image: img["src"] = local_image diff --git a/lumbunglib/templates/feed.md b/lumbunglib/templates/feed.md index d9f3f9a..71d984b 100644 --- a/lumbunglib/templates/feed.md +++ b/lumbunglib/templates/feed.md @@ -8,6 +8,7 @@ original_link: "{{ frontmatter.original_link }}" feed_name: "{{ frontmatter.feed_name}}" categories: ["{{ frontmatter.card_type }}", "{{ frontmatter.feed_name}}"] tags: {{ frontmatter.tags }} +{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %} --- {{ content }} -- 2.49.0 From c84a9758871766897b08b9f6ae2eae536bc26a26 Mon Sep 17 00:00:00 2001 From: rra Date: Sun, 29 May 2022 12:30:55 +0200 Subject: [PATCH 05/23] add reason for failure --- lumbunglib/feed.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lumbunglib/feed.py b/lumbunglib/feed.py index 4113e52..a78c032 100644 --- a/lumbunglib/feed.py +++ b/lumbunglib/feed.py @@ -206,6 +206,9 @@ def grab_media(post_directory, url, prefered_name=None): shutil.copyfileobj(response.raw, media_file) print('Downloaded media item', media_item) return media_item + else: + print("Download failed", response.status_code) + return url return media_item elif os.path.exists(os.path.join(post_directory, media_item)): return media_item @@ -227,7 +230,6 @@ def parse_posts(post_dir, post_content): for img in soup(["img", "object"]): if img.get("src") != None: - local_image = grab_media(post_dir, img["src"]) if img["src"] != local_image: img["src"] = local_image -- 2.49.0 From cab36c8ac63acd7ceadcfcf8856cf2f3607bf8ed Mon Sep 17 00:00:00 2001 From: rra Date: Sun, 29 May 2022 14:45:11 +0200 Subject: [PATCH 06/23] add less generic headers --- lumbunglib/feed.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lumbunglib/feed.py b/lumbunglib/feed.py index a78c032..f5a8d42 100644 --- a/lumbunglib/feed.py +++ b/lumbunglib/feed.py @@ -194,13 +194,17 @@ def grab_media(post_directory, url, prefered_name=None): """ media_item = urlparse(url).path.split('/')[-1] + headers = { + 'User-Agent': 'https://git.autonomic.zone/ruangrupa/lumbunglib', + 'From': 'info@lumbung.space' # This is another valid field + } if prefered_name: media_item = prefered_name try: if not os.path.exists(os.path.join(post_directory, media_item)): #TODO: stream is true is a conditional so we could check the headers for things, mimetype etc - response = requests.get(url, stream=True) + response = requests.get(url, headers=headers, stream=True) if response.ok: with open(os.path.join(post_directory, media_item), 'wb') as media_file: shutil.copyfileobj(response.raw, media_file) -- 2.49.0 From ad591ea9cf89d896a53fdc3b649f0b45f9371c00 Mon Sep 17 00:00:00 2001 From: rra Date: Wed, 1 Jun 2022 05:51:25 +0200 Subject: [PATCH 07/23] add more checks for failures --- lumbunglib/feed.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/lumbunglib/feed.py b/lumbunglib/feed.py index f5a8d42..f51d3e9 100644 --- a/lumbunglib/feed.py +++ b/lumbunglib/feed.py @@ -152,10 +152,13 @@ def parse_enclosures(post_dir, entry): #TODO handle the fact it could be multiple items for e in entry.enclosures: - print("found enclosed media", e.type) - if "image/" in e.type: - featured_image = grab_media(post_dir, e.href) - entry["featured_image"] = featured_image + if "type" in e: + print("found enclosed media", e.type) + if "image/" in e.type: + featured_image = grab_media(post_dir, e.href) + entry["featured_image"] = featured_image + else: + print("FIXME:ignoring enclosed", e.type) return entry @@ -267,11 +270,12 @@ def grab_feed(feed_url): print(e) return False - print(data.status, feed_url) - if data.status == 200: - # 304 means the feed has not been modified since we last checked - write_etag(feed_name, data) - return data + if "status" in data: + print(data.status, feed_url) + if data.status == 200: + # 304 means the feed has not been modified since we last checked + write_etag(feed_name, data) + return data return False def create_opds_post(post_dir, entry): -- 2.49.0 From b0f77831bd10be9566a95cee889a3c8061295a6e Mon Sep 17 00:00:00 2001 From: rra Date: Thu, 2 Jun 2022 06:45:54 +0200 Subject: [PATCH 08/23] add 'contributors' as metadata category --- lumbunglib/templates/feed.md | 1 + lumbunglib/templates/hashtag.md | 3 ++- lumbunglib/templates/video.md | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lumbunglib/templates/feed.md b/lumbunglib/templates/feed.md index 71d984b..29d8e91 100644 --- a/lumbunglib/templates/feed.md +++ b/lumbunglib/templates/feed.md @@ -7,6 +7,7 @@ authors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} original_link: "{{ frontmatter.original_link }}" feed_name: "{{ frontmatter.feed_name}}" categories: ["{{ frontmatter.card_type }}", "{{ frontmatter.feed_name}}"] +contributors: ["{{ frontmatter.feed_name}}"] tags: {{ frontmatter.tags }} {% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %} --- diff --git a/lumbunglib/templates/hashtag.md b/lumbunglib/templates/hashtag.md index bdcacba..4a61a7a 100644 --- a/lumbunglib/templates/hashtag.md +++ b/lumbunglib/templates/hashtag.md @@ -2,11 +2,12 @@ date: {{ post_metadata.created_at }} #2021-06-10T10:46:33+02:00 draft: false authors: ["{{ post_metadata.account.display_name }}"] +contributors: ["{{ post_metadata.account.acct}}"] avatar: {{ post_metadata.account.avatar }} categories: ["shouts"] images: [{% for i in post_metadata.media_attachments %} {{ i.url }}, {% endfor %}] title: {{ post_metadata.account.display_name }} -tags: [{% for i in post_metadata.tags %} {{ i.name }} {% endfor %}] +tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}] --- {% for item in post_metadata.media_attachments %} diff --git a/lumbunglib/templates/video.md b/lumbunglib/templates/video.md index 96761ec..076a1bf 100644 --- a/lumbunglib/templates/video.md +++ b/lumbunglib/templates/video.md @@ -6,6 +6,7 @@ uuid: "{{v.uuid}}" video_duration: "{{ v.duration | duration }} " video_channel: "{{ v.channel.display_name }}" channel_url: "{{ v.channel.url }}" +contributors: ["{{ v.account.display_name }}"] preview_image: "{{ preview_image }}" images: ["./{{ preview_image }}"] categories: ["tv","{{ v.channel.display_name }}"] -- 2.49.0 From 00f795f16de4018632109b6e1415f95afe9d3e75 Mon Sep 17 00:00:00 2001 From: rra Date: Thu, 2 Jun 2022 09:23:58 +0200 Subject: [PATCH 09/23] rename project to konfluks for legibility, add docs --- README.md | 54 ++++++++++++++++++- docs/konfluks.svg | 31 +++++++++++ {lumbunglib => konfluks}/cloudcal.py | 0 {lumbunglib => konfluks}/feed.py | 0 {lumbunglib => konfluks}/hashtag.py | 0 .../templates/calendar.md | 0 {lumbunglib => konfluks}/templates/feed.md | 0 {lumbunglib => konfluks}/templates/hashtag.md | 0 {lumbunglib => konfluks}/templates/video.md | 0 {lumbunglib => konfluks}/video.py | 0 pyproject.toml | 12 ++--- setup.py | 14 ++--- 12 files changed, 96 insertions(+), 15 deletions(-) create mode 100644 docs/konfluks.svg rename {lumbunglib => konfluks}/cloudcal.py (100%) rename {lumbunglib => konfluks}/feed.py (100%) rename {lumbunglib => konfluks}/hashtag.py (100%) rename {lumbunglib => konfluks}/templates/calendar.md (100%) rename {lumbunglib => konfluks}/templates/feed.md (100%) rename {lumbunglib => konfluks}/templates/hashtag.md (100%) rename {lumbunglib => konfluks}/templates/video.md (100%) rename {lumbunglib => konfluks}/video.py (100%) diff --git a/README.md b/README.md index 3deeeb2..85a1be8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,56 @@ -# lumbunglib +![Konfluks logo is a stylized and schematic representation of a drainage basin](docs/Konfluks.svg) + +# Konfluks + +A drainage basin is a geographical feature that collects all precipitation in an area, first in to smaller streams and finally together in to the large river. Similarly, Konfluks can bring together small and dispersed streams of web content from different applications and websites together in a single large stream. + +Specifically, Konfluks turns Peertube videos, iCal calendar events, other websites through their RSS and OPDS feeds and Mastodon posts under a hashtag in to Hugo page bundles. This allows one to publish from diverse sources to a single stream. + +Konfluks was first made by Roel Roscam Abbing as part of , together with ruangrupa and Autonomic. + +## Philosophy + +Konfluks tries to act as a mirror representation of the input sources. That means that whenever something remote is deleted, changed or becomes unavailable, it is also changed or deleted by Konfluks. + +Konfluks tries to preserve intention. That means the above, but also by requiring explicit ways of publishing. + +Konfluks works by periodically polling the remote sources, taking care not to duplicate work. It caches files, asks for last-modified headers, and skips things it has already. This makes every poll as fast and as light as possible. + +Konfluks is written for clarity, not brevity nor cleverness. + +Konfluks is extendable, a work in progress and a messy undertaking. + +## High-level overview + +Konfluks consists of different Python scripts which each poll a particular service, say, a Peertube server, to download information and convert it in to [Hugo Page Bundles](https://gohugo.io/content-management/page-bundles/) + +Each script part of Konfluks will essentially to the following: + * Parse a source and request posts/updates/videos/a feed + * Taking care of publish ques + * Create a Hugo post for each item returned, by: + * Making a folder per post in the `output` directory + * Formatting post metadata as [Hugo Post Frontmatter](https://gohugo.io/content-management/front-matter/) in a file called `index.md` + * Grabbing local copies of media and saving them in the post folder + * Adding the post content to `index.md` + * According to jinja2 templates (see `Konfluks/templates/`) + +The page bundles created, where possible, are given human friendly names. + + +Here is a typical output structure: +`user@server: ~/Konfluks/output: tree tv/ +tv/ +├── forum-27an-mother-earth-353f93f3-5fee-49d6-b71d-8aef753f7041 +│   ├── 86ccae63-3df9-443c-91f3-edce146055db.jpg +│   └── index.md +├── keroncong-tugu-cafrinho-live-at-ruru-gallery-ruangrupa-jakarta-19-august-2014-e6d5bb2a-d77f-4a00-a449-992a579c8c0d +│   ├── 32291aa2-a391-4219-a413-87521ff373ba.jpg +│   └── index.md +├── lecture-series-1-camp-notes-on-education-8d54d3c9-0322-42af-ab6e-e954d251e076 +│   ├── 0f3c835b-42c2-48a3-a2a3-a75ddac8688a.jpg +│   └── index.md +` -> Python lib which powers `lumbung.space` automation ## hacking diff --git a/docs/konfluks.svg b/docs/konfluks.svg new file mode 100644 index 0000000..1a91ef3 --- /dev/null +++ b/docs/konfluks.svg @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lumbunglib/cloudcal.py b/konfluks/cloudcal.py similarity index 100% rename from lumbunglib/cloudcal.py rename to konfluks/cloudcal.py diff --git a/lumbunglib/feed.py b/konfluks/feed.py similarity index 100% rename from lumbunglib/feed.py rename to konfluks/feed.py diff --git a/lumbunglib/hashtag.py b/konfluks/hashtag.py similarity index 100% rename from lumbunglib/hashtag.py rename to konfluks/hashtag.py diff --git a/lumbunglib/templates/calendar.md b/konfluks/templates/calendar.md similarity index 100% rename from lumbunglib/templates/calendar.md rename to konfluks/templates/calendar.md diff --git a/lumbunglib/templates/feed.md b/konfluks/templates/feed.md similarity index 100% rename from lumbunglib/templates/feed.md rename to konfluks/templates/feed.md diff --git a/lumbunglib/templates/hashtag.md b/konfluks/templates/hashtag.md similarity index 100% rename from lumbunglib/templates/hashtag.md rename to konfluks/templates/hashtag.md diff --git a/lumbunglib/templates/video.md b/konfluks/templates/video.md similarity index 100% rename from lumbunglib/templates/video.md rename to konfluks/templates/video.md diff --git a/lumbunglib/video.py b/konfluks/video.py similarity index 100% rename from lumbunglib/video.py rename to konfluks/video.py diff --git a/pyproject.toml b/pyproject.toml index 20ba70a..c94fcf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [tool.poetry] -name = "lumbunglib" +name = "konfluks" version = "0.1.0" description = "Python lib which powers lumbung[dot]space automation" authors = ["rra", "decentral1se"] -license = "GPLv3+" +license = "AGPLv3+" [tool.poetry.dependencies] python = "^3.9" @@ -25,7 +25,7 @@ requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] -lumbunglib-cal = "lumbunglib.cloudcal:main" -lumbunglib-vid = "lumbunglib.video:main" -lumbunglib-feed = "lumbunglib.feed:main" -lumbunglib-hash = "lumbunglib.hashtag:main" +konfluks-cal = "konfluks.cloudcal:main" +konfluks-vid = "konfluks.video:main" +konfluks-feed = "konfluks.feed:main" +konfluks-hash = "konfluks.hashtag:main" diff --git a/setup.py b/setup.py index e1e1af7..34c0444 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,10 @@ from setuptools import setup packages = \ -['lumbunglib'] +['konfluks'] package_data = \ -{'': ['*'], 'lumbunglib': ['templates/*']} +{'': ['*'], 'konfluks': ['templates/*']} install_requires = \ ['Jinja2>=3.0.3,<4.0.0', @@ -20,13 +20,13 @@ install_requires = \ 'requests>=2.26.0,<3.0.0'] entry_points = \ -{'console_scripts': ['lumbunglib-cal = lumbunglib.cloudcal:main', - 'lumbunglib-feed = lumbunglib.feed:main', - 'lumbunglib-hash = lumbunglib.hashtag:main', - 'lumbunglib-vid = lumbunglib.video:main']} +{'console_scripts': ['konfluks-cal = konfluks.cloudcal:main', + 'konfluks-feed = konfluks.feed:main', + 'konfluks-hash = konfluks.hashtag:main', + 'konfluks-vid = konfluks.video:main']} setup_kwargs = { - 'name': 'lumbunglib', + 'name': 'konfluks', 'version': '0.1.0', 'description': 'Python lib which powers lumbung[dot]space automation', 'long_description': None, -- 2.49.0 From f162bb946a4e8b61f81d3511f7cb3432eb0712db Mon Sep 17 00:00:00 2001 From: rra Date: Thu, 2 Jun 2022 09:28:37 +0200 Subject: [PATCH 10/23] Update 'README.md' correcting markup / styling --- README.md | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 85a1be8..f8b2b2b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![Konfluks logo is a stylized and schematic representation of a drainage basin](docs/Konfluks.svg) +![Konfluks logo is a stylized and schematic representation of a drainage basin](https://git.autonomic.zone/r/konfluks/raw/branch/konfluks-renaming/docs/konfluks.svg) # Konfluks @@ -25,34 +25,36 @@ Konfluks is extendable, a work in progress and a messy undertaking. Konfluks consists of different Python scripts which each poll a particular service, say, a Peertube server, to download information and convert it in to [Hugo Page Bundles](https://gohugo.io/content-management/page-bundles/) Each script part of Konfluks will essentially to the following: - * Parse a source and request posts/updates/videos/a feed - * Taking care of publish ques - * Create a Hugo post for each item returned, by: - * Making a folder per post in the `output` directory - * Formatting post metadata as [Hugo Post Frontmatter](https://gohugo.io/content-management/front-matter/) in a file called `index.md` - * Grabbing local copies of media and saving them in the post folder - * Adding the post content to `index.md` - * According to jinja2 templates (see `Konfluks/templates/`) +* Parse a source and request posts/updates/videos/a feed + * Taking care of publish ques +* Create a Hugo post for each item returned, by: + * Making a folder per post in the `output` directory + * Formatting post metadata as [Hugo Post Frontmatter](https://gohugo.io/content-management/front-matter/) in a file called `index.md` + * Grabbing local copies of media and saving them in the post folder + * Adding the post content to `index.md` + * According to jinja2 templates (see `Konfluks/templates/`) The page bundles created, where possible, are given human friendly names. Here is a typical output structure: -`user@server: ~/Konfluks/output: tree tv/ -tv/ -├── forum-27an-mother-earth-353f93f3-5fee-49d6-b71d-8aef753f7041 -│   ├── 86ccae63-3df9-443c-91f3-edce146055db.jpg -│   └── index.md -├── keroncong-tugu-cafrinho-live-at-ruru-gallery-ruangrupa-jakarta-19-august-2014-e6d5bb2a-d77f-4a00-a449-992a579c8c0d -│   ├── 32291aa2-a391-4219-a413-87521ff373ba.jpg -│   └── index.md -├── lecture-series-1-camp-notes-on-education-8d54d3c9-0322-42af-ab6e-e954d251e076 -│   ├── 0f3c835b-42c2-48a3-a2a3-a75ddac8688a.jpg -│   └── index.md -` +``` + user@server: ~/Konfluks/output: tree tv/ + tv/ + ├── forum-27an-mother-earth-353f93f3-5fee-49d6-b71d-8aef753f7041 + │   ├── 86ccae63-3df9-443c-91f3-edce146055db.jpg + │   └── index.md + ├── keroncong-tugu-cafrinho-live-at-ruru-gallery-ruangrupa-jakarta-19-august-2014-e6d5bb2a-d77f-4a00-a449-992a579c8c0d + │   ├── 32291aa2-a391-4219-a413-87521ff373ba.jpg + │   └── index.md + ├── lecture-series-1-camp-notes-on-education-8d54d3c9-0322-42af-ab6e-e954d251e076 + │   ├── 0f3c835b-42c2-48a3-a2a3-a75ddac8688a.jpg + │   └── index.md +``` -## hacking + +## Hacking Install [poetry](https://python-poetry.org/docs/#osx--linux--bashonwindows-install-instructions): -- 2.49.0 From 845a54787b9dc0b0d272b50c78e072aa04e4add6 Mon Sep 17 00:00:00 2001 From: rra Date: Thu, 2 Jun 2022 09:29:20 +0200 Subject: [PATCH 11/23] Update 'README.md' --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f8b2b2b..1af1154 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A drainage basin is a geographical feature that collects all precipitation in an Specifically, Konfluks turns Peertube videos, iCal calendar events, other websites through their RSS and OPDS feeds and Mastodon posts under a hashtag in to Hugo page bundles. This allows one to publish from diverse sources to a single stream. -Konfluks was first made by Roel Roscam Abbing as part of , together with ruangrupa and Autonomic. +Konfluks was first made by Roel Roscam Abbing as part of [lumbung.space](https://lumbung.space), together with ruangrupa and Autonomic. ## Philosophy -- 2.49.0 From 41bc532ebc2238917633bb19d828507e15e1efdb Mon Sep 17 00:00:00 2001 From: Aadil Ayub Date: Fri, 10 Jun 2022 15:55:17 +0500 Subject: [PATCH 12/23] separate hashtags by comma --- lumbunglib/templates/hashtag.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lumbunglib/templates/hashtag.md b/lumbunglib/templates/hashtag.md index bdcacba..36a9308 100644 --- a/lumbunglib/templates/hashtag.md +++ b/lumbunglib/templates/hashtag.md @@ -6,7 +6,7 @@ avatar: {{ post_metadata.account.avatar }} categories: ["shouts"] images: [{% for i in post_metadata.media_attachments %} {{ i.url }}, {% endfor %}] title: {{ post_metadata.account.display_name }} -tags: [{% for i in post_metadata.tags %} {{ i.name }} {% endfor %}] +tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}] --- {% for item in post_metadata.media_attachments %} -- 2.49.0 From ff76378cdd2657a8820625a71f1c09e398c46ae6 Mon Sep 17 00:00:00 2001 From: Aadil Ayub Date: Tue, 14 Jun 2022 19:27:31 +0500 Subject: [PATCH 13/23] merge christopher's changes pulling the timeline from pen.lumbung.space --- lumbunglib/templates/timeline.md | 14 ++ lumbunglib/timeline.py | 381 +++++++++++++++++++++++++++++++ pyproject.toml | 1 + setup.py | 1 + 4 files changed, 397 insertions(+) create mode 100644 lumbunglib/templates/timeline.md create mode 100644 lumbunglib/timeline.py diff --git a/lumbunglib/templates/timeline.md b/lumbunglib/templates/timeline.md new file mode 100644 index 0000000..0cc9c2b --- /dev/null +++ b/lumbunglib/templates/timeline.md @@ -0,0 +1,14 @@ +--- +title: "{{ frontmatter.title }}" +date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 +draft: false +summary: "{{ frontmatter.summary }}" +authors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} +original_link: "{{ frontmatter.original_link }}" +feed_name: "{{ frontmatter.feed_name}}" +categories: ["timeline", "{{ frontmatter.feed_name}}"] +timelines: {{ frontmatter.timelines }} +hidden: true +--- + +{{ content }} \ No newline at end of file diff --git a/lumbunglib/timeline.py b/lumbunglib/timeline.py new file mode 100644 index 0000000..d1382de --- /dev/null +++ b/lumbunglib/timeline.py @@ -0,0 +1,381 @@ +import os +import shutil +import time +from hashlib import md5 +from ast import literal_eval as make_tuple +from pathlib import Path +from urllib.parse import urlparse +from re import sub + +import arrow +import feedparser +import jinja2 +import requests +from bs4 import BeautifulSoup +from slugify import slugify +from re import compile as re_compile +yamlre = re_compile('"') + + +def write_etag(feed_name, feed_data): + """ + save timestamp of when feed was last modified + """ + etag = "" + modified = "" + + if "etag" in feed_data: + etag = feed_data.etag + if "modified" in feed_data: + modified = feed_data.modified + + if etag or modified: + with open(os.path.join("etags", feed_name + ".txt"), "w") as f: + f.write(str((etag, modified))) + + +def get_etag(feed_name): + """ + return timestamp of when feed was last modified + """ + fn = os.path.join("etags", feed_name + ".txt") + etag = "" + modified = "" + + if os.path.exists(fn): + etag, modified = make_tuple(open(fn, "r").read()) + + return etag, modified + + +def create_frontmatter(entry): + """ + parse RSS metadata and return as frontmatter + """ + if 'published' in entry: + published = entry.published_parsed + if 'updated' in entry: + published = entry.updated_parsed + + published = arrow.get(published) + + if 'author' in entry: + author = entry.author + else: + author = '' + + if 'authors' in entry: + authors = [] + for a in entry.authors: + authors.append(a['name']) + + if 'summary' in entry: + summary = entry.summary + else: + summary = '' + + if 'publisher' in entry: + publisher = entry.publisher + else: + publisher = '' + + tags = [] + if 'tags' in entry: + #TODO finish categories + for t in entry.tags: + tags.append(t['term']) + + frontmatter = { + 'title':entry.title, + 'date': published.format(), + 'summary': '', + 'author': author, + 'original_link': entry.link, + 'feed_name': entry['feed_name'], + 'timelines': str(tags), + } + + return frontmatter + +def sanitize_yaml (frontmatter): + """ + Escapes any occurences of double quotes + in any of the frontmatter fields + See: https://docs.octoprint.org/en/master/configuration/yaml.html#interesting-data-types + """ + for k, v in frontmatter.items(): + if type(v) == type([]): + #some fields are lists + l = [] + for i in v: + i = yamlre.sub('\\"', i) + l.append(i) + frontmatter[k] = l + + else: + v = yamlre.sub('\\"', v) + frontmatter[k] = v + + return frontmatter + + +def create_post(post_dir, entry): + """ + write hugo post based on RSS entry + """ + frontmatter = create_frontmatter(entry) + + if not os.path.exists(post_dir): + os.makedirs(post_dir) + + if "content" in entry: + post_content = entry.content[0].value + else: + post_content = entry.summary + + parsed_content = parse_posts(post_dir, post_content) + + template_dir = os.path.join(Path(__file__).parent.resolve(), "templates") + env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir)) + template = env.get_template("timeline.md") + with open(os.path.join(post_dir, "index.html"), "w") as f: # n.b. .html + post = template.render(frontmatter=sanitize_yaml(frontmatter), content=parsed_content) + f.write(post) + print("created post for", entry.title, "({})".format(entry.link)) + + +def grab_media(post_directory, url, prefered_name=None): + """ + download media linked in post to have local copy + if download succeeds return new local path otherwise return url + """ + media_item = urlparse(url).path.split('/')[-1] + + if prefered_name: + media_item = prefered_name + + try: + if not os.path.exists(os.path.join(post_directory, media_item)): + #TODO: stream is true is a conditional so we could check the headers for things, mimetype etc + response = requests.get(url, stream=True) + if response.ok: + with open(os.path.join(post_directory, media_item), 'wb') as media_file: + shutil.copyfileobj(response.raw, media_file) + print('Downloaded media item', media_item) + return media_item + return media_item + elif os.path.exists(os.path.join(post_directory, media_item)): + return media_item + + except Exception as e: + print('Failed to download image', url) + print(e) + return url + + +def parse_posts(post_dir, post_content): + """ + parse the post content to for media items + replace foreign image with local copy + filter out iframe sources not in allowlist + """ + soup = BeautifulSoup(post_content, "html.parser") + allowed_iframe_sources = ["youtube.com", "vimeo.com", "tv.lumbung.space"] + + for img in soup(["img", "object"]): + if img.get("src") != None: + local_image = grab_media(post_dir, img["src"]) + if img["src"] != local_image: + img["src"] = local_image + + for iframe in soup(["iframe"]): + if not any(source in iframe["src"] for source in allowed_iframe_sources): + print("filtered iframe: {}...".format(iframe["src"][:25])) + iframe.decompose() + return soup.decode() + + +def grab_feed(feed_url): + """ + check whether feed has been updated + download & return it if it has + """ + feed_name = urlparse(feed_url).netloc + + etag, modified = get_etag(feed_name) + + try: + if modified: + data = feedparser.parse(feed_url, modified=modified) + elif etag: + data = feedparser.parse(feed_url, etag=etag) + else: + data = feedparser.parse(feed_url) + except Exception as e: + print("Error grabbing feed") + print(feed_name) + print(e) + return False + + print(data.status, feed_url) + if data.status == 200: + # 304 means the feed has not been modified since we last checked + write_etag(feed_name, data) + return data + return False + +def create_opds_post(post_dir, entry): + """ + create a HUGO post based on OPDS entry + or update it if the timestamp is newer + Downloads the cover & file + """ + + frontmatter = create_frontmatter(entry) + + template_dir = os.path.join(Path(__file__).parent.resolve(), "templates") + env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir)) + template = env.get_template("feed.md") + + if not os.path.exists(post_dir): + os.makedirs(post_dir) + + if os.path.exists(os.path.join(post_dir, '.timestamp')): + old_timestamp = open(os.path.join(post_dir, '.timestamp')).read() + old_timestamp = arrow.get(float(old_timestamp)) + current_timestamp = arrow.get(entry['updated_parsed']) + + if current_timestamp > old_timestamp: + pass + else: + print('Book "{}..." already up to date'.format(entry['title'][:32])) + return + + for item in entry.links: + ft = item['type'].split('/')[-1] + fn = item['rel'].split('/')[-1] + + if fn == "acquisition": + fn = "publication" #calling the publications acquisition is weird + + prefered_name = "{}-{}.{}".format(fn, slugify(entry['title']), ft) + + grab_media(post_dir, item['href'], prefered_name) + + if "summary" in entry: + summary = entry.summary + else: + summary = "" + + with open(os.path.join(post_dir,'index.md'),'w') as f: + post = template.render(frontmatter=sanitize_yaml(frontmatter), content=summary) + f.write(post) + print('created post for Book', entry.title) + + with open(os.path.join(post_dir, '.timestamp'), 'w') as f: + timestamp = arrow.get(entry['updated_parsed']) + f.write(timestamp.format('X')) + + +def main(): + feed_urls = open("feeds_list_timeline.txt", "r").read().splitlines() + + start = time.time() + + if not os.path.exists("etags"): + os.mkdir("etags") + + output_dir = os.environ.get("OUTPUT_DIR") + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + feed_dict = dict() + for url in feed_urls: + feed_name = urlparse(url).netloc + feed_dict[url] = feed_name + + feed_names = feed_dict.values() + content_dirs = os.listdir(output_dir) + for i in content_dirs: + if i not in feed_names: + shutil.rmtree(os.path.join(output_dir, i)) + print("%s not in feeds_list.txt, removing local data" %(i)) + + # add iframe to the allowlist of feedparser's sanitizer, + # this is now handled in parse_post() + feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {"iframe"} + + for feed_url in feed_urls: + + feed_name = feed_dict[feed_url] + + feed_dir = os.path.join(output_dir, feed_name) + + if not os.path.exists(feed_dir): + os.makedirs(feed_dir) + + existing_posts = os.listdir(feed_dir) + + data = grab_feed(feed_url) + + if data: + + opds_feed = False + for i in data.feed['links']: + if i['rel'] == 'self': + if 'opds' in i['type']: + opds_feed = True + print("OPDS type feed!") + + + for entry in data.entries: + # if 'tags' in entry: + # for tag in entry.tags: + # for x in ['lumbung.space', 'D15', 'lumbung']: + # if x in tag['term']: + # print(entry.title) + entry["feed_name"] = feed_name + + post_name = slugify(entry.title) + + # pixelfed returns the whole post text as the post name. max + # filename length is 255 on many systems. here we're shortening + # the name and adding a hash to it to avoid a conflict in a + # situation where 2 posts start with exactly the same text. + if len(post_name) > 150: + post_hash = md5(bytes(post_name, "utf-8")) + post_name = post_name[:150] + "-" + post_hash.hexdigest() + + if opds_feed: + entry['opds'] = True + #format: Beyond-Debiasing-Report_Online-75535a4886e3 + post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1] + + post_dir = os.path.join(output_dir, feed_name, post_name) + + if post_name not in existing_posts: + # if there is a blog entry we dont already have, make it + if opds_feed: + create_opds_post(post_dir, entry) + else: + create_post(post_dir, entry) + + elif post_name in existing_posts: + # if we already have it, update it + if opds_feed: + create_opds_post(post_dir, entry) + else: + create_post(post_dir, entry) + existing_posts.remove( + post_name + ) # create list of posts which have not been returned by the feed + + for post in existing_posts: + # remove blog posts no longer returned by the RSS feed + print("deleted", post) + shutil.rmtree(os.path.join(feed_dir, slugify(post))) + + end = time.time() + + print(end - start) diff --git a/pyproject.toml b/pyproject.toml index 20ba70a..75e14df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,4 +28,5 @@ build-backend = "poetry.core.masonry.api" lumbunglib-cal = "lumbunglib.cloudcal:main" lumbunglib-vid = "lumbunglib.video:main" lumbunglib-feed = "lumbunglib.feed:main" +lumbunglib-timeline = "lumbunglib.timeline:main" lumbunglib-hash = "lumbunglib.hashtag:main" diff --git a/setup.py b/setup.py index e1e1af7..9aae1b7 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ install_requires = \ entry_points = \ {'console_scripts': ['lumbunglib-cal = lumbunglib.cloudcal:main', 'lumbunglib-feed = lumbunglib.feed:main', + 'lumbunglib-timeline = lumbunglib.timeline:main', 'lumbunglib-hash = lumbunglib.hashtag:main', 'lumbunglib-vid = lumbunglib.video:main']} -- 2.49.0 From e66e3202dae46fbbc65688ea33e9fdcf9586afd1 Mon Sep 17 00:00:00 2001 From: decentral1se Date: Tue, 21 Jun 2022 00:00:32 +0200 Subject: [PATCH 14/23] add new hashtag --- lumbunglib/hashtag.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lumbunglib/hashtag.py b/lumbunglib/hashtag.py index baf21c9..ff29707 100644 --- a/lumbunglib/hashtag.py +++ b/lumbunglib/hashtag.py @@ -23,6 +23,7 @@ hashtags = [ "ruruhaus", "offbeatentrack_kassel", "lumbungofpublishers", + "lumbungkiosproducts", ] -- 2.49.0 From 6020db4d15a881dfdd07b52f3298f1d072bd6b79 Mon Sep 17 00:00:00 2001 From: decentral1se Date: Mon, 18 Jul 2022 12:16:52 +0200 Subject: [PATCH 15/23] additional gardening for konfluks rename --- README.md | 41 ++++++++++---------------- docs/konfluks.svg => konfluks.svg | 0 konfluks/{cloudcal.py => calendars.py} | 0 pyproject.toml | 6 ++-- setup.py | 2 +- 5 files changed, 19 insertions(+), 30 deletions(-) rename docs/konfluks.svg => konfluks.svg (100%) rename konfluks/{cloudcal.py => calendars.py} (100%) diff --git a/README.md b/README.md index 1af1154..acb2c2e 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ -![Konfluks logo is a stylized and schematic representation of a drainage basin](https://git.autonomic.zone/r/konfluks/raw/branch/konfluks-renaming/docs/konfluks.svg) +![Konfluks logo is a stylized and schematic representation of a drainage basin](./konfluks.svg) # Konfluks -A drainage basin is a geographical feature that collects all precipitation in an area, first in to smaller streams and finally together in to the large river. Similarly, Konfluks can bring together small and dispersed streams of web content from different applications and websites together in a single large stream. +A drainage basin is a geographical feature that collects all precipitation in an area, first in to smaller streams and finally together in to the large river. Similarly, Konfluks can bring together small and dispersed streams of web content from different applications and websites together in a single large stream. -Specifically, Konfluks turns Peertube videos, iCal calendar events, other websites through their RSS and OPDS feeds and Mastodon posts under a hashtag in to Hugo page bundles. This allows one to publish from diverse sources to a single stream. +Specifically, Konfluks turns Peertube videos, iCal calendar events, other websites through their RSS and OPDS feeds and Mastodon posts under a hashtag in to Hugo page bundles. This allows one to publish from diverse sources to a single stream. -Konfluks was first made by Roel Roscam Abbing as part of [lumbung.space](https://lumbung.space), together with ruangrupa and Autonomic. +Konfluks was first made by [Roel Roscam Abbing](https://test.roelof.info/) as part of [lumbung.space](https://lumbung.space), together with [ruangrupa](https://ruangrupa.id) and [Autonomic](https://autonomic.zone). ## Philosophy @@ -22,24 +22,26 @@ Konfluks is extendable, a work in progress and a messy undertaking. ## High-level overview -Konfluks consists of different Python scripts which each poll a particular service, say, a Peertube server, to download information and convert it in to [Hugo Page Bundles](https://gohugo.io/content-management/page-bundles/) +Konfluks consists of different Python scripts which each poll a particular service, say, a [Peertube](https://joinpeertube.org) server, to download information and convert it in to [Hugo Page Bundles](https://gohugo.io/content-management/page-bundles/) Each script part of Konfluks will essentially to the following: + * Parse a source and request posts/updates/videos/a feed * Taking care of publish ques + * Create a Hugo post for each item returned, by: * Making a folder per post in the `output` directory * Formatting post metadata as [Hugo Post Frontmatter](https://gohugo.io/content-management/front-matter/) in a file called `index.md` * Grabbing local copies of media and saving them in the post folder * Adding the post content to `index.md` - * According to jinja2 templates (see `Konfluks/templates/`) + * According to jinja2 templates (see `konfluks/templates/`) The page bundles created, where possible, are given human friendly names. - Here is a typical output structure: + ``` - user@server: ~/Konfluks/output: tree tv/ + user@server: ~/konfluks/output: tree tv/ tv/ ├── forum-27an-mother-earth-353f93f3-5fee-49d6-b71d-8aef753f7041 │   ├── 86ccae63-3df9-443c-91f3-edce146055db.jpg @@ -52,8 +54,6 @@ Here is a typical output structure: │   └── index.md ``` - - ## Hacking Install [poetry](https://python-poetry.org/docs/#osx--linux--bashonwindows-install-instructions): @@ -62,31 +62,20 @@ Install [poetry](https://python-poetry.org/docs/#osx--linux--bashonwindows-insta curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python - ``` -We use Poetry because it locks the dependencies all the way down and makes it -easier to manage installation & maintenance in the long-term. Then install the -dependencies & have them managed by Poetry: +We use Poetry because it locks the dependencies all the way down and makes it easier to manage installation & maintenance in the long-term. Then install the dependencies & have them managed by Poetry: ``` poetry install ``` -Each script requires some environment variables to run, you can see the latest -deployment configuration over -[here](https://git.autonomic.zone/ruangrupa/lumbung.space/src/branch/main/compose.yml), -look for the values under the `environment: ...` stanza. +Each script requires some environment variables to run, you can see the latest deployment configuration over [here](https://git.autonomic.zone/ruangrupa/lumbung.space/src/branch/main/compose.yml), look for the values under the `environment: ...` stanza. -All scripts have an entrypoint described in the -[`pypoetry.toml`](https://git.autonomic.zone/ruangrupa/lumbunglib/src/commit/40bf9416b8792c08683ad8ac878093c7ef1b2f5d/pyproject.toml#L27-L31) -which you can run via `poetry run ...`. For example, if you want to run the -[`lumbunglib/video.py`](./lumbunglib/video.py) script, you'd do: +All scripts have an entrypoint described in the [`pypoetry.toml`](https://git.autonomic.zone/ruangrupa/konfluks/src/commit/40bf9416b8792c08683ad8ac878093c7ef1b2f5d/pyproject.toml#L27-L31) which you can run via `poetry run ...`. For example, if you want to run the [`konfluks/video.py`](./knofluks/video.py) script, you'd do: ``` mkdir -p testdir export OUTPUT_DIR=/testdir -poetry run lumbunglib-vid +poetry run konfluks-vid ``` -Run `poetry run poetry2setup > setup.py` if updating the poetry dependencies. -This allows us to run `pip install .` in the deployment and Pip will understand -that it is just a regular Python package. If adding a new cli command, extend -`pyproject.toml` with a new `[tool.poetry.scripts]` entry. +Run `poetry run poetry2setup > setup.py` if updating the poetry dependencies. This allows us to run `pip install .` in the deployment and Pip will understand that it is just a regular Python package. If adding a new cli command, extend `pyproject.toml` with a new `[tool.poetry.scripts]` entry. diff --git a/docs/konfluks.svg b/konfluks.svg similarity index 100% rename from docs/konfluks.svg rename to konfluks.svg diff --git a/konfluks/cloudcal.py b/konfluks/calendars.py similarity index 100% rename from konfluks/cloudcal.py rename to konfluks/calendars.py diff --git a/pyproject.toml b/pyproject.toml index 33b9ded..14e518a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,8 @@ [tool.poetry] name = "konfluks" version = "0.1.0" -description = "Python lib which powers lumbung[dot]space automation" -authors = ["rra", "decentral1se"] +description = "Brings together small and dispersed streams of web content from different applications and websites together in a single large stream." +authors = ["rra", "decentral1se", "knoflook"] license = "AGPLv3+" [tool.poetry.dependencies] @@ -25,7 +25,7 @@ requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] -konfluks-cal = "konfluks.cloudcal:main" +konfluks-cal = "konfluks.calendars:main" konfluks-vid = "konfluks.video:main" konfluks-feed = "konfluks.feed:main" konfluks-timeline = "konfluks.timeline:main" diff --git a/setup.py b/setup.py index b76bc40..2384fd2 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ install_requires = \ 'requests>=2.26.0,<3.0.0'] entry_points = \ -{'console_scripts': ['konfluks-cal = konfluks.cloudcal:main', +{'console_scripts': ['konfluks-cal = konfluks.calendars:main', 'konfluks-feed = konfluks.feed:main', 'konfluks-timeline = lumbunglib.timeline:main', 'konfluks-hash = konfluks.hashtag:main', -- 2.49.0 From 98299daa1be35aecfc0fc82da0b6c7a34ba6aa73 Mon Sep 17 00:00:00 2001 From: decentral1se Date: Mon, 18 Jul 2022 12:21:01 +0200 Subject: [PATCH 16/23] fix links --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index acb2c2e..311271d 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ poetry install Each script requires some environment variables to run, you can see the latest deployment configuration over [here](https://git.autonomic.zone/ruangrupa/lumbung.space/src/branch/main/compose.yml), look for the values under the `environment: ...` stanza. -All scripts have an entrypoint described in the [`pypoetry.toml`](https://git.autonomic.zone/ruangrupa/konfluks/src/commit/40bf9416b8792c08683ad8ac878093c7ef1b2f5d/pyproject.toml#L27-L31) which you can run via `poetry run ...`. For example, if you want to run the [`konfluks/video.py`](./knofluks/video.py) script, you'd do: +All scripts have an entrypoint described in the [`pypoetry.toml`](./pyproject.toml) which you can run via `poetry run ...`. For example, if you want to run the [`konfluks/video.py`](./konfluks/video.py) script, you'd do: ``` mkdir -p testdir -- 2.49.0 From d21158eb91ebf8c77082c2547a3d0b5262f474b8 Mon Sep 17 00:00:00 2001 From: rra Date: Fri, 9 Sep 2022 13:22:32 +0200 Subject: [PATCH 17/23] add support for videos in posts --- konfluks/hashtag.py | 44 +++++++++++++++++++++++------------ konfluks/templates/hashtag.md | 14 ++++++++++- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/konfluks/hashtag.py b/konfluks/hashtag.py index ff29707..39c2a00 100644 --- a/konfluks/hashtag.py +++ b/konfluks/hashtag.py @@ -11,19 +11,19 @@ instance = "https://social.lumbung.space" email = "" password = "" hashtags = [ - "documentafifteen", - "harvestedbyputra", - "jalansesama", - "lumbungdotspace", - "majelisakakbar", - "majelisakbar", - "warungkopi", - "lumbungkios", - "kassel_ecosystem", - "ruruhaus", - "offbeatentrack_kassel", - "lumbungofpublishers", - "lumbungkiosproducts", + "documentafifteen"#, + # "harvestedbyputra", + # "jalansesama", + # "lumbungdotspace", + # "majelisakakbar", + # "majelisakbar", + # "warungkopi", + # "lumbungkios", + # "kassel_ecosystem", + # "ruruhaus", + # "offbeatentrack_kassel", + # "lumbungofpublishers", + # "lumbungkiosproducts", ] @@ -60,6 +60,21 @@ def download_media(post_directory, media_attachments): with open(os.path.join(post_directory, image), "wb") as img_file: shutil.copyfileobj(response.raw, img_file) print("Downloaded cover image", image) + elif item["type"] == "video": + video = localize_media_url(item["url"]) + if not os.path.exists(os.path.join(post_directory, video)): + # download video file + response = requests.get(item["url"], stream=True) + with open(os.path.join(post_directory, video), "wb") as video_file: + shutil.copyfileobj(response.raw, video_file) + print("Downloaded video in post", video) + if not os.path.exists(os.path.join(post_directory, "thumbnail.png")): + #download video preview + response = requests.get(item["preview_url"], stream=True) + with open(os.path.join(post_directory, "thumbnail.png"), "wb") as thumbnail: + shutil.copyfileobj(response.raw, thumbnail) + print("Downloaded thumbnail for", video) + def create_post(post_directory, post_metadata): @@ -78,7 +93,6 @@ def create_post(post_directory, post_metadata): post_metadata["account"]["display_name"] = name env.filters["localize_media_url"] = localize_media_url env.filters["filter_mastodon_urls"] = filter_mastodon_urls - template = env.get_template("hashtag.md") with open(os.path.join(post_directory, "index.html"), "w") as f: @@ -130,7 +144,7 @@ def main(): timeline ) # returns all the rest n.b. can take a while because of rate limit - for post_metadata in timeline: + for post_metadata in timeline[:6]: post_dir = os.path.join(hashtag_dir, str(post_metadata["id"])) # if there is a post in the feed we dont already have locally, make it if str(post_metadata["id"]) not in all_existing_posts: diff --git a/konfluks/templates/hashtag.md b/konfluks/templates/hashtag.md index 4a61a7a..1148b79 100644 --- a/konfluks/templates/hashtag.md +++ b/konfluks/templates/hashtag.md @@ -5,13 +5,25 @@ authors: ["{{ post_metadata.account.display_name }}"] contributors: ["{{ post_metadata.account.acct}}"] avatar: {{ post_metadata.account.avatar }} categories: ["shouts"] -images: [{% for i in post_metadata.media_attachments %} {{ i.url }}, {% endfor %}] title: {{ post_metadata.account.display_name }} tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}] +images: [{% for i in post_metadata.media_attachments %}{% if i.type == "image" %}"{{ i.url | localize_media_url }}", {%endif%}{% endfor %}] +videos: [{% for i in post_metadata.media_attachments %}{% if i.type == "video" %}"{{ i.url | localize_media_url }}", {%endif%}{% endfor %}] --- {% for item in post_metadata.media_attachments %} +{% if item.type == "image" %} {{item.description}} +{% endif %} +{% endfor %} + +{% for item in post_metadata.media_attachments %} +{% if item.type == "video" %} + +{% endif %} {% endfor %} {{ post_metadata.content | filter_mastodon_urls }} -- 2.49.0 From 657ced1ceb7dce8d6a0fe1d7fee0bf33f78b7207 Mon Sep 17 00:00:00 2001 From: rra Date: Fri, 9 Sep 2022 13:27:29 +0200 Subject: [PATCH 18/23] undo dev setup changes --- konfluks/hashtag.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/konfluks/hashtag.py b/konfluks/hashtag.py index 39c2a00..baa39ed 100644 --- a/konfluks/hashtag.py +++ b/konfluks/hashtag.py @@ -11,19 +11,19 @@ instance = "https://social.lumbung.space" email = "" password = "" hashtags = [ - "documentafifteen"#, - # "harvestedbyputra", - # "jalansesama", - # "lumbungdotspace", - # "majelisakakbar", - # "majelisakbar", - # "warungkopi", - # "lumbungkios", - # "kassel_ecosystem", - # "ruruhaus", - # "offbeatentrack_kassel", - # "lumbungofpublishers", - # "lumbungkiosproducts", + "documentafifteen", + "harvestedbyputra", + "jalansesama", + "lumbungdotspace", + "majelisakakbar", + "majelisakbar", + "warungkopi", + "lumbungkios", + "kassel_ecosystem", + "ruruhaus", + "offbeatentrack_kassel", + "lumbungofpublishers", + "lumbungkiosproducts", ] @@ -144,7 +144,7 @@ def main(): timeline ) # returns all the rest n.b. can take a while because of rate limit - for post_metadata in timeline[:6]: + for post_metadata in timeline: post_dir = os.path.join(hashtag_dir, str(post_metadata["id"])) # if there is a post in the feed we dont already have locally, make it if str(post_metadata["id"]) not in all_existing_posts: -- 2.49.0 From 0ecc0ecd3ab2307485d7bd31375f50d5dcf8dd8a Mon Sep 17 00:00:00 2001 From: rra Date: Fri, 9 Sep 2022 14:19:19 +0200 Subject: [PATCH 19/23] handle paths and extensions properly, fix #41 --- konfluks/calendars.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/konfluks/calendars.py b/konfluks/calendars.py index 0645e45..6f53656 100644 --- a/konfluks/calendars.py +++ b/konfluks/calendars.py @@ -138,9 +138,9 @@ def create_event_post(post_dir, event): for img in event_metadata["images"]: # parse img url to safe local image name - img_name = img.split("/")[-1] - fn, ext = img_name.split(".") - img_name = slugify(fn) + "." + ext + img_name = os.path.basename(img) + fn, ext = os.path.splitext(img_name) + img_name = slugify(fn) + '.' + ext local_image = os.path.join(post_dir, img_name) -- 2.49.0 From a4f749ebd7383f1aa80722e1b72bd4f405bcd56d Mon Sep 17 00:00:00 2001 From: rra Date: Thu, 15 Sep 2022 18:37:25 +0200 Subject: [PATCH 20/23] adjust templates to new taxonomy --- konfluks/templates/calendar.md | 2 +- konfluks/templates/feed.md | 6 +++--- konfluks/templates/hashtag.md | 4 +--- konfluks/templates/timeline.md | 4 ++-- konfluks/templates/video.md | 2 +- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/konfluks/templates/calendar.md b/konfluks/templates/calendar.md index e27acbe..f1072a2 100644 --- a/konfluks/templates/calendar.md +++ b/konfluks/templates/calendar.md @@ -2,7 +2,7 @@ title: "{{ event.name }}" date: "{{ event.begin }}" #2021-06-10T10:46:33+02:00 draft: false -categories: "calendar" +source: "lumbung calendar" event_begin: "{{ event.begin }}" event_end: "{{ event.end }}" duration: "{{ event.duration }}" diff --git a/konfluks/templates/feed.md b/konfluks/templates/feed.md index 29d8e91..54492f1 100644 --- a/konfluks/templates/feed.md +++ b/konfluks/templates/feed.md @@ -3,11 +3,11 @@ title: "{{ frontmatter.title }}" date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 draft: false summary: "{{ frontmatter.summary }}" -authors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} +contributors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} original_link: "{{ frontmatter.original_link }}" feed_name: "{{ frontmatter.feed_name}}" -categories: ["{{ frontmatter.card_type }}", "{{ frontmatter.feed_name}}"] -contributors: ["{{ frontmatter.feed_name}}"] +card_type: "{{ frontmatter.card_type }}" +sources: ["{{ frontmatter.feed_name}}"] tags: {{ frontmatter.tags }} {% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %} --- diff --git a/konfluks/templates/hashtag.md b/konfluks/templates/hashtag.md index 1148b79..ed41ffe 100644 --- a/konfluks/templates/hashtag.md +++ b/konfluks/templates/hashtag.md @@ -1,10 +1,8 @@ --- date: {{ post_metadata.created_at }} #2021-06-10T10:46:33+02:00 draft: false -authors: ["{{ post_metadata.account.display_name }}"] -contributors: ["{{ post_metadata.account.acct}}"] +contributors: ["{{ post_metadata.account.display_name }}"] avatar: {{ post_metadata.account.avatar }} -categories: ["shouts"] title: {{ post_metadata.account.display_name }} tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}] images: [{% for i in post_metadata.media_attachments %}{% if i.type == "image" %}"{{ i.url | localize_media_url }}", {%endif%}{% endfor %}] diff --git a/konfluks/templates/timeline.md b/konfluks/templates/timeline.md index 0cc9c2b..b089a76 100644 --- a/konfluks/templates/timeline.md +++ b/konfluks/templates/timeline.md @@ -3,10 +3,10 @@ title: "{{ frontmatter.title }}" date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 draft: false summary: "{{ frontmatter.summary }}" -authors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} +contributors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} original_link: "{{ frontmatter.original_link }}" feed_name: "{{ frontmatter.feed_name}}" -categories: ["timeline", "{{ frontmatter.feed_name}}"] +sources: ["timeline", "{{ frontmatter.feed_name}}"] timelines: {{ frontmatter.timelines }} hidden: true --- diff --git a/konfluks/templates/video.md b/konfluks/templates/video.md index 076a1bf..eafa1a8 100644 --- a/konfluks/templates/video.md +++ b/konfluks/templates/video.md @@ -9,7 +9,7 @@ channel_url: "{{ v.channel.url }}" contributors: ["{{ v.account.display_name }}"] preview_image: "{{ preview_image }}" images: ["./{{ preview_image }}"] -categories: ["tv","{{ v.channel.display_name }}"] +sources: ["{{ v.channel.display_name }}"] is_live: {{ v.is_live }} --- -- 2.49.0 From e01aa9a607bd7fd3f35d3c42a2df289a06d63c53 Mon Sep 17 00:00:00 2001 From: rra Date: Wed, 6 Nov 2024 16:48:41 +0100 Subject: [PATCH 21/23] Test whether a url still returns a feed, pass right filename as featured_image when handling enclosure, pass post_dir to existing_posts --- konfluks/feed.py | 100 ++++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 48 deletions(-) diff --git a/konfluks/feed.py b/konfluks/feed.py index f51d3e9..08606d2 100644 --- a/konfluks/feed.py +++ b/konfluks/feed.py @@ -156,7 +156,8 @@ def parse_enclosures(post_dir, entry): print("found enclosed media", e.type) if "image/" in e.type: featured_image = grab_media(post_dir, e.href) - entry["featured_image"] = featured_image + media_item = urlparse(e.href).path.split('/')[-1] + entry["featured_image"] = media_item else: print("FIXME:ignoring enclosed", e.type) return entry @@ -374,61 +375,64 @@ def main(): data = grab_feed(feed_url) if data: - opds_feed = False - for i in data.feed['links']: - if i['rel'] == 'self': - if 'opds' in i['type']: - opds_feed = True - print("OPDS type feed!") + if 'links' in data.feed: + for i in data.feed['links']: + if i['rel'] == 'self': + if 'opds' in i['type']: + opds_feed = True + print("OPDS type feed!") + if data.feed: + for entry in data.entries: + # if 'tags' in entry: + # for tag in entry.tags: + # for x in ['lumbung.space', 'D15', 'lumbung']: + # if x in tag['term']: + # print(entry.title) + entry["feed_name"] = feed_name - for entry in data.entries: - # if 'tags' in entry: - # for tag in entry.tags: - # for x in ['lumbung.space', 'D15', 'lumbung']: - # if x in tag['term']: - # print(entry.title) - entry["feed_name"] = feed_name + post_name = slugify(entry.title) - post_name = slugify(entry.title) + # pixelfed returns the whole post text as the post name. max + # filename length is 255 on many systems. here we're shortening + # the name and adding a hash to it to avoid a conflict in a + # situation where 2 posts start with exactly the same text. + if len(post_name) > 150: + post_hash = md5(bytes(post_name, "utf-8")) + post_name = post_name[:150] + "-" + post_hash.hexdigest() - # pixelfed returns the whole post text as the post name. max - # filename length is 255 on many systems. here we're shortening - # the name and adding a hash to it to avoid a conflict in a - # situation where 2 posts start with exactly the same text. - if len(post_name) > 150: - post_hash = md5(bytes(post_name, "utf-8")) - post_name = post_name[:150] + "-" + post_hash.hexdigest() - - if opds_feed: - entry['opds'] = True - #format: Beyond-Debiasing-Report_Online-75535a4886e3 - post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1] - - post_dir = os.path.join(output_dir, feed_name, post_name) - - if post_name not in existing_posts: - # if there is a blog entry we dont already have, make it if opds_feed: - create_opds_post(post_dir, entry) - else: - create_post(post_dir, entry) + entry['opds'] = True + #format: Beyond-Debiasing-Report_Online-75535a4886e3 + post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1] - elif post_name in existing_posts: - # if we already have it, update it - if opds_feed: - create_opds_post(post_dir, entry) - else: - create_post(post_dir, entry) - existing_posts.remove( - post_name - ) # create list of posts which have not been returned by the feed + post_dir = os.path.join(output_dir, feed_name, post_name) - for post in existing_posts: - # remove blog posts no longer returned by the RSS feed - print("deleted", post) - shutil.rmtree(os.path.join(feed_dir, slugify(post))) + if post_name not in existing_posts: + # if there is a blog entry we dont already have, make it + if opds_feed: + create_opds_post(post_dir, entry) + else: + create_post(post_dir, entry) + + elif post_name in existing_posts: + # if we already have it, update it + if opds_feed: + create_opds_post(post_dir, entry) + else: + create_post(post_dir, entry) + existing_posts.remove( + post_dir + ) # create list of posts which have not been returned by the feed + + + for post in existing_posts: + # remove blog posts no longer returned by the RSS feed + print("deleted", post) + shutil.rmtree(post) + else: + print(feed_url, "is not or no longer a feed!") end = time.time() -- 2.49.0 From 9d9f8f6d72840f6e90aa9e621237d559936b5a69 Mon Sep 17 00:00:00 2001 From: rra Date: Wed, 6 Nov 2024 17:24:55 +0100 Subject: [PATCH 22/23] do proper deletion --- konfluks/feed.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/konfluks/feed.py b/konfluks/feed.py index 08606d2..53435ba 100644 --- a/konfluks/feed.py +++ b/konfluks/feed.py @@ -374,16 +374,16 @@ def main(): data = grab_feed(feed_url) - if data: - opds_feed = False - if 'links' in data.feed: - for i in data.feed['links']: - if i['rel'] == 'self': - if 'opds' in i['type']: - opds_feed = True - print("OPDS type feed!") + if data: #whenever we get a 200 + if data.feed: #only if it is an actual feed + opds_feed = False + if 'links' in data.feed: + for i in data.feed['links']: + if i['rel'] == 'self': + if 'opds' in i['type']: + opds_feed = True + print("OPDS type feed!") - if data.feed: for entry in data.entries: # if 'tags' in entry: # for tag in entry.tags: @@ -423,14 +423,15 @@ def main(): else: create_post(post_dir, entry) existing_posts.remove( - post_dir + post_name ) # create list of posts which have not been returned by the feed for post in existing_posts: # remove blog posts no longer returned by the RSS feed - print("deleted", post) - shutil.rmtree(post) + post_dir = os.path.join(output_dir, feed_name, post) + shutil.rmtree(post_dir) + print("deleted", post_dir) else: print(feed_url, "is not or no longer a feed!") -- 2.49.0 From 028bc1df84c9089391bd20f4c90d4db8a3f3ec16 Mon Sep 17 00:00:00 2001 From: rra Date: Wed, 20 Nov 2024 11:25:26 +0100 Subject: [PATCH 23/23] fix issue where posts with enclosures would not download files because of missing dir --- konfluks/feed.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/konfluks/feed.py b/konfluks/feed.py index 53435ba..844e0da 100644 --- a/konfluks/feed.py +++ b/konfluks/feed.py @@ -155,6 +155,8 @@ def parse_enclosures(post_dir, entry): if "type" in e: print("found enclosed media", e.type) if "image/" in e.type: + if not os.path.exists(post_dir): #this might be redundant with create_post + os.makedirs(post_dir) featured_image = grab_media(post_dir, e.href) media_item = urlparse(e.href).path.split('/')[-1] entry["featured_image"] = media_item -- 2.49.0