add part 5 draft tutorial

This commit is contained in:
glyph 2022-09-05 13:58:17 +01:00
parent 198ecafb78
commit b141b53648
8 changed files with 421 additions and 56 deletions

3
Cargo.lock generated
View File

@ -1640,11 +1640,14 @@ version = "0.1.0"
dependencies = [
"async-std",
"bincode",
"chrono",
"futures",
"golgi",
"log",
"rocket",
"rocket_dyn_templates",
"serde",
"serde_json",
"sled",
"xdg",
]

View File

@ -8,10 +8,13 @@ edition = "2021"
[dependencies]
async-std = "1.10"
bincode = "1.3"
chrono = "0.4"
futures = "0.3"
golgi = { git = "https://git.coopcloud.tech/golgi-ssb/golgi.git" }
log = "0.4"
rocket = "0.5.0-rc.1"
rocket_dyn_templates = { version = "0.1.0-rc.1", features = ["tera"] }
serde = "1"
serde_json = "1"
sled = "0.34"
xdg = "2.4.1"

View File

@ -4,7 +4,7 @@
### Introduction
In the last installment we added support to our key-value database for dealing with Scuttlebutt posts and wrote code to create and filter streams of Scuttlebutt messages. Since our peers may have authored tens of thousands of messages, it's useful to create a way of fetching and filtering message streams as a background process. Today we'll do just that; writing a task loop that we can be invoked from our web application route handlers and used to execute potentially long-running processes.
In the last installment we added support to our key-value database for dealing with Scuttlebutt posts and wrote code to create and filter streams of Scuttlebutt messages. Since our peers may have authored tens of thousands of messages, it's useful to create a way of fetching and filtering message streams as a background process. Today we'll do just that; writing a task loop that can be invoked from our web application route handlers and used to execute potentially long-running processes.
### Outline
@ -13,14 +13,9 @@ Here's what we'll tackle in this fifth part of the series:
- Create an asynchronous task loop
- Create a message passing channel and spawn the task loop
- Write sbot-related task functions
- Pass database instance into task loop
- Fetch root posts on subscription
### Libraries
The following libraries are introduced in this part:
- [`async-std`](https://crates.io/crates/async-std)
### Create an Asynchronous Task Loop
Let's start by defining a task type that enumerates the various tasks we might want to carry out. We'll create a separate module for our task loop:
@ -118,18 +113,64 @@ use crate::task_loop::Task;
}
```
Reviewing the code above: first an unbounded, asynchronous channel is created and split into transmitting (`tx`) and receiving (`rx`) ends, after which the transmitting channel is cloned. The task loop is then spawned and takes with it the receiving end of the channel. As we did previously with the `db` instance, the transmitting half of the channel is added to the managed state of the Rocket application; this will allow us to transmit tasks to the task loop from our web route handlers. And finaly, a shutdown handler is attached to the Rocket application in order to send a cancellation task to the task loop before the program ends. This ensures that the task loop closes cleanly.
Reviewing the code above: first an unbounded, asynchronous channel is created and split into transmitting (`tx`) and receiving (`rx`) ends, after which the transmitting channel is cloned. The task loop is then spawned and takes with it the receiving end of the channel. As we did previously with the `db` instance, the transmitting half of the channel is added to the managed state of the Rocket application; this will allow us to transmit tasks to the task loop from our web route handlers. Finally, a shutdown handler is attached to the Rocket application in order to send a cancellation task to the task loop before the program ends. This ensures that the task loop closes cleanly.
### Write Sbot-Related Task Functions
Now it's time to write the functions that will be executed when the `FetchAllPosts` and `FetchLatestName` tasks are invoked. These functions will be responsible for retrieving data from the sbot and updating the database with the latest values:
Before we can write the sbot-related task functions, we first need to add a method to our database code to allow the retrieval of data for a specific peer. Since we serialized the peer data as bincode before inserting it into the database, we need to deserialize the value after fetching it.
`src/db.rs`
```rust
impl Database {
// pub fn add_peer(&self, peer: Peer) -> Result<Option<IVec>> {
// ...
// }
// Get a single peer from the peer tree, defined by the given public key.
// The byte value for the matching entry, if found, is deserialized from
// bincode into an instance of the Peer struct.
pub fn get_peer(&self, public_key: &str) -> Result<Option<Peer>> {
debug!(
"Retrieving peer data for {} from 'peers' database tree",
&public_key
);
let peer = self
.peer_tree
.get(public_key.as_bytes())
.unwrap()
.map(|peer| {
debug!("Deserializing peer data for {} from bincode", &public_key);
bincode::deserialize(&peer).unwrap()
});
Ok(peer)
}
// ...
}
```
Now it's time to write the functions that will be executed when the `FetchAllPosts` and `FetchLatestName` tasks are invoked. These functions will be responsible for retrieving data from the sbot and updating the database with the latest values. We can keep our task loop neat and readable by separating this logic into functions:
`src/task_loop.rs`
```rust
use log::warn;
use crate::{Database, sbot};
// Retrieve a set of posts from the local sbot instance and add them to the
// posts tree of the database.
//
// A stream of messages is first requested for the peer represented by the
// given public key (ID), starting after the given sequence number. The root
// posts are filtered from the set of messages and added to the database as a
// batch. Finally, the value of the latest sequence for the peer is updated
// and saved to the existing database entry.
async fn fetch_posts_and_update_db(db: &Database, peer_id: String, after_sequence: u64) {
let peer_msgs = sbot::get_message_stream(&peer_id, after_sequence).await;
let (latest_sequence, root_posts) = sbot::get_root_posts(peer_msgs).await;
let (_latest_sequence, root_posts) = sbot::get_root_posts(peer_msgs).await;
match db.add_post_batch(&peer_id, root_posts) {
Ok(_) => {
@ -143,17 +184,10 @@ async fn fetch_posts_and_update_db(db: &Database, peer_id: String, after_sequenc
&peer_id, e
),
}
// Update the value of the latest sequence number for
// the peer (this is stored in the database).
if let Ok(Some(peer)) = db.get_peer(&peer_id) {
db.add_peer(peer.set_latest_sequence(latest_sequence))
.unwrap();
}
}
/// Request the name of the peer represented by the given public key (ID)
/// and update the existing entry in the database.
// Request the name of the peer represented by the given public key (ID)
// and update the existing entry in the database.
async fn fetch_name_and_update_db(db: &Database, peer_id: String) {
match sbot::get_name(&peer_id).await {
Ok(name) => {
@ -172,8 +206,124 @@ async fn fetch_name_and_update_db(db: &Database, peer_id: String) {
}
```
These function calls can now be added to our task matching code in the task loop. Note that we also need to add the database instance as a parameter in the function isgnature:
`src/task_loop.rs`
```rust
pub async fn spawn(db: Database, rx: Receiver<Task>) {
task::spawn(async move {
while let Ok(task) = rx.recv().await {
match task {
Task::FetchAllPosts(peer_id) => {
info!("Fetching all posts for peer: {}", peer_id);
fetch_posts_and_update_db(&db, peer_id, 0).await;
}
Task::FetchLatestName(peer_id) => {
info!("Fetching latest name for peer: {}", peer_id);
fetch_name_and_update_db(&db, peer_id).await;
}
Task::Cancel => {
info!("Exiting task loop...");
break;
}
}
}
});
}
```
### Pass Database Instance Into Task Loop
As it currently stands, our code will fail to compile because `task_loop::spawn()` expects a database instance which has not yet been provided. We need to revisit the code in the root of our application to clone the database and pass it into the task loop:
`src/main.rs`
```rust
#[launch]
async fn rocket() -> _ {
// ...
let db = Database::init(&db_path);
// Clone the database instance.
let db_clone = db.clone();
// Create a message passing channel.
let (tx, rx) = channel::unbounded();
let tx_clone = tx.clone();
// Spawn the task loop.
info!("Spawning task loop");
// Pass the clone database instance and the rx channel into the task loop.
task_loop::spawn(db_clone, rx).await;
// ...
}
```
### Fetch Root Posts on Subscription
Great, the task loop is primed and ready for action. We are very close to being able to initiate tasks from the route handler(s) of our web application. Earlier in this installment of the tutorial we created a message passing channel in `src.main.rs` and added the transmission end of the channel to the managed state of our Rocket instance. We need to add the transmitter as a parameter of the `subscribe_form` function before we can invoke tasks:
`src/routes.rs`
```rust
use async_std::channel::Sender;
use crate::task_loop::Task;
#[post("/subscribe", data = "<peer>")]
pub async fn subscribe_form(
db: &State<Database>,
tx: &State<Sender<Task>>,
peer: Form<PeerForm>,
) -> Result<Redirect, Flash<Redirect>> {
info!("Subscribing to peer {}", &peer.public_key);
// ...
}
```
Now, when a subscription event occurs (ie. the subscribe form is submitted with a peer ID), we can trigger a task to fetch all the root posts for that peer and add them to the key-value database. Note that I've omitted most of the code we've already written from the sample below. The most important three lines are those beginning with `if let Err(e) = tx.send...`.
```rust
#[post("/subscribe", data = "<peer>")]
pub async fn subscribe_form(
db: &State<Database>,
tx: &State<Sender<Task>>,
peer: Form<PeerForm>,
) -> Result<Redirect, Flash<Redirect>> {
// ... {
match sbot::follow_if_not_following(&peer.public_key).await {
Ok(_) => {
if db.add_peer(peer_info).is_ok() {
// ...
// Fetch all root posts authored by the peer we're subscribing
// to. Posts will be added to the key-value database.
if let Err(e) = tx.send(Task::FetchAllPosts(peer_id)).await {
warn!("Task loop error: {}", e)
}
} else {
// ...
}
}
Err(e) => {
// ...
}
}
}
Ok(Redirect::to(uri!(home)))
}
```
### Conclusion
In this installment we wrote an asynchronous task loop and `Task` type to be able to execute background processes in our application. We created task variants and functions for two primary operations: 1. fetching all the root posts for a peer and adding them to the key-value database, and 2. fetching the latest name assigned to a peer. We created a message passing channel, passed the receiving end to the task loop and the transmitting end to the managed state of our web application, and invoked the fetch-all task from our subscription route handler.
The `Task` type and loop we wrote today can be easily extended by adding more variants. It's a part of the code we will return to in a future installment.
In the next tutorial installment we'll focus on updating the web interface. We'll add more templates to create a modular layout, write some CSS and populate a list of peers from the data in our key-value store. Soon the application will begin to take shape!
## Funding
This work has been funded by a Scuttlebutt Community Grant.

View File

@ -2,7 +2,7 @@ use std::path::Path;
use log::{debug, info};
use serde::{Deserialize, Serialize};
use sled::{Db, IVec, Result, Tree};
use sled::{Batch, Db, IVec, Result, Tree};
/// Scuttlebutt peer data.
#[derive(Debug, Deserialize, Serialize)]
@ -31,6 +31,49 @@ impl Peer {
}
}
/// The text and metadata of a Scuttlebutt root post.
#[derive(Debug, Deserialize, Serialize)]
pub struct Post {
/// The key of the post-type message, also known as a message reference.
pub key: String,
/// The text of the post (may be formatted as markdown).
pub text: String,
/// The date the post was published (e.g. 17 May 2021).
pub date: String,
/// The sequence number of the post-type message.
pub sequence: u64,
/// The read state of the post; true if read, false if unread.
pub read: bool,
/// The timestamp representing the date the post was published.
pub timestamp: i64,
/// The subject of the post, represented as the first 53 characters of
/// the post text.
pub subject: Option<String>,
}
impl Post {
// Create a new instance of the Post struct. A default value of `false` is
// set for `read`.
pub fn new(
key: String,
text: String,
date: String,
sequence: u64,
timestamp: i64,
subject: Option<String>,
) -> Post {
Post {
key,
text,
date,
sequence,
timestamp,
subject,
read: false,
}
}
}
/// An instance of the key-value database and relevant trees.
#[allow(dead_code)]
#[derive(Clone)]
@ -40,6 +83,9 @@ pub struct Database {
/// A database tree containing Peer struct instances for all the peers
/// we are subscribed to.
peer_tree: Tree,
/// A database tree containing Post struct instances for all of the posts
/// we have downloaded from the peer to whom we subscribe.
pub post_tree: Tree,
}
impl Database {
@ -55,8 +101,16 @@ impl Database {
let peer_tree = db
.open_tree("peers")
.expect("Failed to open 'peers' database tree");
debug!("Opening 'posts' database tree");
let post_tree = db
.open_tree("posts")
.expect("Failed to open 'posts' database tree");
Database { db, peer_tree }
Database {
db,
peer_tree,
post_tree,
}
}
/// Add a peer to the database by inserting the public key into the peer
@ -72,10 +126,59 @@ impl Database {
self.peer_tree.insert(&peer.public_key, peer_bytes)
}
/// Get a single peer from the peer tree, defined by the given public key.
/// The byte value for the matching entry, if found, is deserialized from
/// bincode into an instance of the Peer struct.
pub fn get_peer(&self, public_key: &str) -> Result<Option<Peer>> {
debug!(
"Retrieving peer data for {} from 'peers' database tree",
&public_key
);
let peer = self
.peer_tree
.get(public_key.as_bytes())
.unwrap()
.map(|peer| {
debug!("Deserializing peer data for {} from bincode", &public_key);
bincode::deserialize(&peer).unwrap()
});
Ok(peer)
}
/// Remove a peer from the database, as represented by the given public
/// key.
pub fn remove_peer(&self, public_key: &str) -> Result<()> {
debug!("Removing peer {} from 'peers' database tree", &public_key);
self.peer_tree.remove(&public_key).map(|_| ())
}
/// Add a post to the database by inserting an instance of the Post struct
/// into the post tree.
pub fn add_post(&self, public_key: &str, post: Post) -> Result<Option<IVec>> {
let post_key = format!("{}_{}", public_key, post.key);
debug!("Serializing post data for {} to bincode", &post_key);
let post_bytes = bincode::serialize(&post).unwrap();
debug!("Inserting post {} into 'posts' database tree", &post_key);
self.post_tree.insert(post_key.as_bytes(), post_bytes)
}
/// Add a batch of posts to the database by inserting a vector of instances
/// of the Post struct into the post tree.
pub fn add_post_batch(&self, public_key: &str, posts: Vec<Post>) -> Result<()> {
let mut post_batch = Batch::default();
for post in posts {
let post_key = format!("{}_{}", public_key, post.key);
debug!("Serializing post data for {} to bincode", &post_key);
let post_bytes = bincode::serialize(&post).unwrap();
debug!("Inserting post {} into 'posts' database tree", &post_key);
post_batch.insert(post_key.as_bytes(), post_bytes)
}
debug!("Applying batch insertion into 'posts' database tree");
self.post_tree.apply_batch(post_batch)
}
}

View File

@ -20,6 +20,7 @@ async fn rocket() -> _ {
.place_config_file("database")
.expect("cannot create database directory");
let db = Database::init(&db_path);
let db_clone = db.clone();
// Create a message passing channel.
let (tx, rx) = channel::unbounded();
@ -27,7 +28,7 @@ async fn rocket() -> _ {
// Spawn the task loop, passing in the receiver half of the channel.
info!("Spawning task loop");
task_loop::spawn(rx).await;
task_loop::spawn(db_clone, rx).await;
rocket::build()
.manage(db)

View File

@ -1,3 +1,4 @@
use async_std::channel::Sender;
use log::{info, warn};
use rocket::{
form::Form,
@ -10,7 +11,9 @@ use rocket_dyn_templates::{context, Template};
use crate::{
db::{Database, Peer},
sbot, utils,
sbot,
task_loop::Task,
utils,
};
#[derive(FromForm)]
@ -31,6 +34,7 @@ pub async fn home(flash: Option<FlashMessage<'_>>) -> Template {
#[post("/subscribe", data = "<peer>")]
pub async fn subscribe_form(
db: &State<Database>,
tx: &State<Sender<Task>>,
peer: Form<PeerForm>,
) -> Result<Redirect, Flash<Redirect>> {
if let Err(e) = utils::validate_public_key(&peer.public_key) {
@ -55,6 +59,13 @@ pub async fn subscribe_form(
// Add the peer to the database.
if db.add_peer(peer_info).is_ok() {
info!("Added {} to 'peers' database tree", &peer.public_key);
let peer_id = peer.public_key.to_string();
// Fetch all root posts authored by the peer we're subscribing
// to. Posts will be added to the key-value database.
if let Err(e) = tx.send(Task::FetchAllPosts(peer_id)).await {
warn!("Task loop error: {}", e)
}
} else {
let err_msg = format!(
"Failed to add peer {} to 'peers' database tree",
@ -109,33 +120,3 @@ pub async fn unsubscribe_form(
Ok(Redirect::to(uri!(home)))
}
/*
#[post("/subscribe", data = "<peer>")]
pub async fn subscribe_form(peer: Form<PeerForm>) -> Result<Redirect, Flash<Redirect>> {
if let Err(e) = utils::validate_public_key(&peer.public_key) {
let validation_err_msg = format!("Public key {} is invalid: {}", &peer.public_key, e);
warn!("{}", validation_err_msg);
return Err(Flash::error(Redirect::to(uri!(home)), validation_err_msg));
} else {
info!("Public key {} is valid", &peer.public_key);
sbot::follow_if_not_following(&peer.public_key).await;
}
Ok(Redirect::to(uri!(home)))
}
#[post("/unsubscribe", data = "<peer>")]
pub async fn unsubscribe_form(peer: Form<PeerForm>) -> Result<Redirect, Flash<Redirect>> {
if let Err(e) = utils::validate_public_key(&peer.public_key) {
let validation_err_msg = format!("Public key {} is invalid: {}", &peer.public_key, e);
warn!("{}", validation_err_msg);
return Err(Flash::error(Redirect::to(uri!(home)), validation_err_msg));
} else {
info!("Public key {} is valid", &peer.public_key);
sbot::unfollow_if_following(&peer.public_key).await;
}
Ok(Redirect::to(uri!(home)))
}
*/

View File

@ -1,7 +1,17 @@
use std::env;
use golgi::{api::friends::RelationshipQuery, sbot::Keystore, Sbot};
use async_std::stream::StreamExt;
use chrono::NaiveDateTime;
use golgi::{
api::{friends::RelationshipQuery, history_stream::CreateHistoryStream},
messages::{SsbMessageContentType, SsbMessageKVT},
sbot::Keystore,
GolgiError, Sbot,
};
use log::{info, warn};
use serde_json::value::Value;
use crate::db::Post;
/// Initialise a connection to a Scuttlebutt server.
pub async fn init_sbot() -> Result<Sbot, String> {
@ -128,3 +138,76 @@ pub async fn unfollow_if_following(remote_peer: &str) -> Result<(), String> {
Err(err_msg)
}
}
/// Return a stream of messages authored by the given public key.
///
/// This returns all messages regardless of type.
pub async fn get_message_stream(
public_key: &str,
sequence_number: u64,
) -> impl futures::Stream<Item = Result<SsbMessageKVT, GolgiError>> {
let mut sbot = init_sbot().await.unwrap();
let history_stream_args = CreateHistoryStream::new(public_key.to_string())
.keys_values(true, true)
.after_seq(sequence_number);
sbot.create_history_stream(history_stream_args)
.await
.unwrap()
}
/// Filter a stream of messages and return a vector of root posts.
///
/// Each returned vector element includes the key of the post, the content
/// text, the date the post was published, the sequence number of the post
/// and whether it is read or unread.
pub async fn get_root_posts(
history_stream: impl futures::Stream<Item = Result<SsbMessageKVT, GolgiError>>,
) -> (u64, Vec<Post>) {
let mut latest_sequence = 0;
let mut posts = Vec::new();
futures::pin_mut!(history_stream);
while let Some(res) = history_stream.next().await {
match res {
Ok(msg) => {
if msg.value.is_message_type(SsbMessageContentType::Post) {
let content = msg.value.content.to_owned();
if let Value::Object(content_map) = content {
if !content_map.contains_key("root") {
latest_sequence = msg.value.sequence;
let text = match content_map.get_key_value("text") {
Some(value) => value.1.to_string(),
None => String::from(""),
};
let timestamp = msg.value.timestamp.round() as i64 / 1000;
let datetime = NaiveDateTime::from_timestamp(timestamp, 0);
let date = datetime.format("%d %b %Y").to_string();
let subject = text.get(0..52).map(|s| s.to_string());
let post = Post::new(
msg.key.to_owned(),
text,
date,
msg.value.sequence,
timestamp,
subject,
);
posts.push(post)
}
}
}
}
Err(err) => {
// Print the `GolgiError` of this element to `stderr`.
warn!("err: {:?}", err);
}
}
}
(latest_sequence, posts)
}

View File

@ -1,5 +1,44 @@
use async_std::{channel::Receiver, task};
use log::info;
use log::{info, warn};
use crate::{sbot, Database};
async fn fetch_posts_and_update_db(db: &Database, peer_id: String, after_sequence: u64) {
let peer_msgs = sbot::get_message_stream(&peer_id, after_sequence).await;
let (_latest_sequence, root_posts) = sbot::get_root_posts(peer_msgs).await;
match db.add_post_batch(&peer_id, root_posts) {
Ok(_) => {
info!(
"Inserted batch of posts into database post tree for peer: {}",
&peer_id
)
}
Err(e) => warn!(
"Failed to insert batch of posts into database post tree for peer: {}: {}",
&peer_id, e
),
}
}
/// Request the name of the peer represented by the given public key (ID)
/// and update the existing entry in the database.
async fn fetch_name_and_update_db(db: &Database, peer_id: String) {
match sbot::get_name(&peer_id).await {
Ok(name) => {
if let Ok(Some(peer)) = db.get_peer(&peer_id) {
let updated_peer = peer.set_name(&name);
match db.add_peer(updated_peer) {
Ok(_) => info!("Updated name for peer: {}", &peer_id),
Err(e) => {
warn!("Failed to update name for peer: {}: {}", &peer_id, e)
}
}
}
}
Err(e) => warn!("Failed to fetch name for {}: {}", &peer_id, e),
}
}
pub enum Task {
Cancel,
@ -9,7 +48,7 @@ pub enum Task {
/// Spawn an asynchronous loop which receives tasks over an unbounded channel
/// and invokes task functions accordingly.
pub async fn spawn(rx: Receiver<Task>) {
pub async fn spawn(db: Database, rx: Receiver<Task>) {
task::spawn(async move {
while let Ok(task) = rx.recv().await {
match task {
@ -18,11 +57,13 @@ pub async fn spawn(rx: Receiver<Task>) {
// database.
Task::FetchAllPosts(peer_id) => {
info!("Fetching all posts for peer: {}", peer_id);
fetch_posts_and_update_db(&db, peer_id, 0).await;
}
// Fetch the latest name for the given peer and update the
// peer entry in the peers tree of the database.
Task::FetchLatestName(peer_id) => {
info!("Fetching latest name for peer: {}", peer_id);
fetch_name_and_update_db(&db, peer_id).await;
}
// Break out of the task loop.
Task::Cancel => {