From fca4095a3f384a699ac6540a9b7b095775cb0d6d Mon Sep 17 00:00:00 2001
From: brooke <brooke@myco.systems>
Date: Fri, 7 Mar 2025 17:11:18 -0500
Subject: [PATCH] init

---
 .env.sample         |  16 ++-
 README.md           |   9 +-
 about.html.tmpl     |  39 +++++
 abra.sh             |   5 +
 api.txt.tmpl        | 338 ++++++++++++++++++++++++++++++++++++++++++++
 compose.captcha.yml |  12 ++
 compose.yml         |  52 +++++--
 header.html.tmpl    |  36 +++++
 home.html.tmpl      |  36 +++++
 robots.txt.tmpl     |   4 +
 10 files changed, 535 insertions(+), 12 deletions(-)
 create mode 100644 about.html.tmpl
 create mode 100644 abra.sh
 create mode 100644 api.txt.tmpl
 create mode 100644 compose.captcha.yml
 create mode 100644 header.html.tmpl
 create mode 100644 home.html.tmpl
 create mode 100644 robots.txt.tmpl
diff --git a/.env.sample b/.env.sample
index f69efbb..a8204aa 100644
--- a/.env.sample
+++ b/.env.sample
@@ -2,7 +2,17 @@ TYPE=4get
 
 DOMAIN=4get.example.com
 
-## Domain aliases
-#EXTRA_DOMAINS=', `www.4get.example.com`'
-
 LETS_ENCRYPT_ENV=production
+
+FOURGET_SERVER_NAME=4get.example.com
+FOURGET_SHORT_DESCRIPTION="4get 4get 4get 4get"
+
+## uncomment to customize the instance list
+#FOURGET_INSTANCES=https://4get.ca
+
+## Enable captcha protection, please see readme.
+#COMPOSE_FILE="$COMPOSE_FILE:compose.captcha.yml"
+
+INCLUDE_NEWS_LINK=false
+INCLUDE_4GET_DONATION_LINK=true
+INCLUDE_SOURCE_LINK=false
\ No newline at end of file
diff --git a/README.md b/README.md
index 0ed3b10..2d3f76d 100644
--- a/README.md
+++ b/README.md
@@ -21,4 +21,11 @@
 * `abra app config <app-name>`
 * `abra app deploy <app-name>`
 
-For more, see [`docs.coopcloud.tech`](https://docs.coopcloud.tech).
+### Custom banners
+
+* `abra app cp <app-domain> /local/path/to/banners app:/var/www/html/4get/banner/`
+
+## Captcha
+
+
+For more, see [`docs.coopcloud.tech`](https://docs.coopcloud.tech).
\ No newline at end of file
diff --git a/about.html.tmpl b/about.html.tmpl
new file mode 100644
index 0000000..c5fe675
--- /dev/null
+++ b/about.html.tmpl
@@ -0,0 +1,39 @@
+<a href="/" class="link">&lt; Go back</a>
+
+<h1>Set as default search engine</h1>
+<a href="#firefox"><h2 id="firefox">On Firefox and other Gecko based browsers</h2></a>
+To set this as your default search engine on Firefox, right click the URL bar and select <div class="code-inline">Add "4get"</div>. Then, visit <a href="about:preferences#search" target="_BLANK" class="link">about:preferences#search</a> and select <div class="code-inline">4get</div> in the dropdown menu.
+
+<a href="#chrome"><h2 id="chrome">On Chromium and Blink based browsers</h2></a>
+Click the 3 superpositioned dots at the top right of the screen and click on <div class="code-inline">Settings</div>, then search for <div class="code-inline">default search engine</div>, or visit <a href="chrome://settings/searchEngines">chrome://settings/searchEngines</a>.<br><br>
+
+Once you're there, click the pencil on the last entry under "Search engines" (it's probably DuckDuckGo). Once you do that, a popup will appear. Populate it with the following information: 
+
+<table>
+	<tr>
+		<td><b>Field</b></td>
+		<td><b>Value</b></td>
+	</tr>
+	<tr>
+		<td>Search engine</td>
+		<td>{%server_name%}</td>
+	</tr>
+	<tr>
+		<td>Shortcut</td>
+		<td>{%server_name%}</td>
+	</tr>
+	<tr>
+		<td>Search URL</td>
+		<td>https://{{ env "DOMAIN" }}/web?s=%s</td>
+	</tr>
+	<tr>
+		<td>Autocomplete URL</td>
+		<td>https://{{ env "DOMAIN" }}/api/v1/ac?s=%s</td>
+	</tr>
+</table>
+
+Once that's done, click <div class="code-inline">Save</div>. Then, on the right handside of the newly created entry, open the dropdown menu and select <div class="code-inline">Make default</div>.
+
+<br><br>
+<h2 id="DMCA">Message to all DMCA enforcers</h2> 
+This server hosts no content. Everything you see here is <u>proxied</u> and this service provides no endorcement of any content. Please reach out to the service hosting the infringing content instead.<br><br>
\ No newline at end of file
diff --git a/abra.sh b/abra.sh
new file mode 100644
index 0000000..46e0804
--- /dev/null
+++ b/abra.sh
@@ -0,0 +1,5 @@
+export ROBOTS_TXT_VERSION=v1
+export ABOUT_HTML_VERSION=v1
+export HEADER_HTML_VERSION=v1
+export HOME_HTML_VERSION=v1
+export API_TXT_VERSION=v1
diff --git a/api.txt.tmpl b/api.txt.tmpl
new file mode 100644
index 0000000..09f57ed
--- /dev/null
+++ b/api.txt.tmpl
@@ -0,0 +1,338 @@
+                   44
+                 4444444      44   
+                 44444444   44444       444
+                 44444444  444444    444444444
+                  44444   44444444  444444444
+                         444444444   4444444
+                        4444444444    444444
+                      4444444444444
+                    444444444444444444
+                       444444444444444
+                          44444444
+                          4444
+                           44
+                 
+           + Welcome to the 4get API documentation +
+
++ Terms of use
+
+    - Human-Driven Requests Only: API requests must be initiated by 
+      humans (e.g., chatroom bots presenting user-driven search results).
+
+    - No Automated Scraping: Do not use the API to scrape website SEO 
+      content or engage in automated extraction without authorization.
+
+    - No Requests for Illegal Activities: Do not request or distribute 
+      content related to illegal activities as defined by the jurisdiction 
+      of the server hosting the service.
+      
+    - Limit Test Queries: Avoid submitting constant "test queries." 
+      Use caching for API responses during development to minimize 
+      unnecessary requests.
+
++ Passes
+    Depending of the instance, you may need to provide a "pass" token
+    in the cookies of your request. These can be obtained from solving
+    a captcha which will allow you to make 100 requests in the next 24
+    hours. In the future, you will be able to ask the serber maintainer
+    for a "pass" which will allow you to bypass the captcha requirement.
+    
+    The captcha doesn't need javascript to work.
+
+
++ Decode the data
+    All payloads returned by the API are encoded in the JSON format. If
+    you don't know how to tackle the problem, maybe programming is not
+    for you.
+    
+    All of the endpoints use the GET method.
+
+
++ Check if an API call was successful
+    All API responses come with an array index named "status". If the
+    status is something else than the string "ok", something went wrong.
+    You can supply the content of the "status" string back to your
+    application to inform the user of what went wrong.
+    
+    The HTTP code will be 429 if your pass is invalid. It is set to 200
+    otherwise.
+
+
++ Get the next page of results
+    All API responses come with an array index named "npt". To get the
+    next page of results, you must make another API call with &npt.
+    
+    Example ::
+        
+        + First API call
+            /api/v1/web?s=higurashi
+        
+        + Second API call
+            /api/v1/web?npt=ddg1._rJ2hWmYSjpI2hsXWmYajJx < ... >
+    
+    You shouldn't specify the search term, only the &npt parameter
+    suffices.
+    
+    The first part of the token before the dot (ddg1) refers to an
+    array position on the serber's memory. The second part is an
+    encryption key used to decode the data at that position. This way,
+    it is impossible to supply invalid pagination data and it is
+    impossible for a 4get operator to peek at the private data of the
+    user after a request has been made.
+    
+    The tokens will expire as soon as they are used or after a 15
+    minutes inactivity period, whichever comes first.
+
+
++ Beware of null values!
+    Most fields in the API responses can return "null". You don't need
+    to worry about unset values.
+
+
++ API Parameters
+    To construct a valid request, you can use the 4get web interface
+    to craft a valid request, and replace "/web" with "/api/v1/web".
+
+
++ "date" and "time" parameters
+    "date" always refer to a calendar date.
+    "time" always refer to the duration of some media.
+    
+    They are both integers that uses seconds as its unit. The "date"
+    parameter specifies the number of seconds that passed since January
+    1st 1970. 
+    
+
+             ______          __            _       __      
+            / ____/___  ____/ /___  ____  (_)___  / /______
+           / __/ / __ \/ __  / __ \/ __ \/ / __ \/ __/ ___/
+          / /___/ / / / /_/ / /_/ / /_/ / / / / / /_(__  ) 
+         /_____/_/ /_/\__,_/ .___/\____/_/_/ /_/\__/____/  
+                          /_/                              
+
++ /ami4get
+    Tells you basic information about the 4get instance. CORS requests
+    are allowed on this endpoint.
+
+
++ /api/v1/web
+    + &extendedsearch
+        When using the ddg(DuckDuckGo) scraper, you may make use of the
+        &extendedsearch parameter. If you need rich answer data from
+        additional sources like StackOverflow, music lyrics sites, etc.,
+        you need to specify the value of (string)"true".
+        
+        The default value is "false" for API calls.
+    
+    
+    + Parse the "spelling"
+        The array index named "spelling" contains 3 indexes ::
+            
+            spelling:
+                type:         "including"
+                using:        "4chan"
+                correction:   '"4cha"'
+        
+        
+        The "type" may be any of these 3 values. When rendering the
+        autocorrect text inside your application, it should look like
+        what follows right after the parameter value ::
+            
+            no_correction    <Empty>
+            including        Including results for %using%. Did you mean
+                             %correction%?
+                            
+            not_many         Not many results for %using%. Did you mean
+                             %correction%?
+        
+        
+        As of right now, the "spelling" is only available on
+        "/api/v1/web".
+        
+    
+    + Parse the "answer"
+        The array index named "answer" may contain a list of multiple
+        answers. The array index "description" contains a linear list of
+        nodes that can help you construct rich formatted data inside of
+        your application. The structure is similar to the one below:
+        
+        answer:
+            0:
+                title: "Higurashi"
+                description:
+                    0:
+                        type:     "text"
+                        value:    "Higurashi is a great show!"
+                    1:
+                        type:     "quote"
+                        value:    "Source: my ass"
+        
+        
+        Each "description" node contains an array index named "type".
+        Here is a list of them:
+            
+              text
+            + title
+              italic
+            + quote
+            + code
+              inline_code
+              link
+            + image
+            + audio
+        
+        
+        Each individual node prepended with a "+" should be prepended by
+        a newline when constructing the rendered description object.
+        
+        There are some nodes that differ from the type-value format.
+        Please parse them accordingly ::
+            
+            + link
+                type:     "link"
+                url:      "https://lolcat.ca"
+                value:    "Visit my website!"
+            
+            
+            + image
+                type:    "image"
+                url:     "https://lolcat.ca/static/pixels.png"
+            
+            
+            + audio
+                type:    "audio"
+                url:     "https://lolcat.ca/static/whatever.mp3"
+        
+        
+        The array index named "table" is an associative array. You can
+        loop over the data using this PHP code, for example ::
+            
+            foreach($table as $website_name => $url){ // ...
+        
+        
+		The rest of the JSON is pretty self explanatory.
+        
+        
++ /api/v1/images
+    All images are contained within "image". The structure looks like
+    below ::
+        
+        image:
+            0:
+                title: "My awesome Higurashi image"
+                source:
+                    0:
+                        url: "https://lolcat.ca/static/profile_pix.png"
+                        width: 400
+                        height: 400
+                    1:
+                        url: "https://lolcat.ca/static/pixels.png"
+                        width: 640
+                        height: 640
+                    2:
+                        url: "https://tse1.mm.bing.net/th?id=OIP.VBM3BQg
+                        euf0-xScO1bl1UgHaGG"
+                        width: 194
+                        height: 160
+        
+    
+    The last image of the "source" array is always the thumbnail, and is
+    a good fallback to use when other sources fail to load. There can be
+    more than 1 source; this is especially true when using the Yandex
+    scraper, but beware of captcha rate limits.
+    
+    
++ /api/v1/videos
+    The "time" parameter for videos may be set to "_LIVE". For live
+    streams, the amount of people currently watching is passed in
+    "views".
+
+
++ /api/v1/news
+    Just make a request to "/api/v1/news?s=elon+musk". The payload
+    has nothing special about it and is very self explanatory, just like
+    the endpoint above.
+
+
++ /api/v1/music
+    Each entry under "song" contains a array index called "stream" that
+    looks like this ::
+    
+        endpoint: sc
+        url: https://api-v2.soundcloud <...>
+    
+    
+    When the endpoint is something else than "linear", you MUST use
+    the specified endpoint. Otherwise, you are free to handle that
+    json+m3u8 crap yourself. If the endpoint is equal to "linear", the
+    URL should return a valid HTTP audio stream. To access the endpoint,
+    you must add the following prefix in your request, like so:
+	
+        https://4get.ca/audio/<endpoint>?s=<url>
+
+
++ /favicon
+    Get the favicon for a website. The only parameter is "s", and must
+    include the protocol for fetching in case the favicon is not cached
+    yet.
+    
+    Example ::
+        
+        /favicon?s=https://lolcat.ca
+    
+    
+    If we had to revert to using Google's favicon cache, it will throw
+    an error in the X-Error header field. If Google's favicon cache
+    also failed to return an image, or if you're too retarded to specify
+    a valid domain name, a default placeholder image will be returned
+    alongside the "404" HTTP error code.
+
+
++ /proxy
+    Get a proxied image. Useful if you don't want to leak your user's IP
+    address. The parameters are "i" for the image link and "s" for the
+    size.
+    
+    Acceptable "s" parameters:
+        
+        portrait     90x160
+        landscape    160x90
+        square       90x90
+        thumb        236x180
+        cover        207x270
+        original     <Original resolution>
+	
+    You can also ommit the "s" parameter if you wish to view the
+    original image. When an error occurs, an "X-Error" header field
+    is set.
+
+
++ /audio/linear
+    Get a proxied audio file. Does not support "Range" headers, as it's
+    only used to proxy small files (hence why it's called linear DUH)
+    
+    The parameter is "s" for the audio link.
+
+
++ /audio/sc
+    Get a proxied audio file for SoundCloud. Does not support downloads
+    trough WGET or CURL, since it returns 30kb~160kb "206 Partial
+    Content" parts, due to technical limitations that comes with
+    converting m3u8 playlists to seekable audio files. If you use this
+    endpoint, you must support these 206 codes and also handle the
+    initial 302 HTTP redirect. I used this method as I didn't want to
+    store information about your request needlessly. This method also
+    allows noJS users to access the files.
+    
+    The parameter is "s" for the SoundCloud JSON m3u8 abomination. It
+    does not support "normal" SoundCloud URLs at this time.
+
+
++ /audio/spotify
+    Get a proxied Spotify audio file. Accepts a track ID for the "s"
+    parameter. Will only allow you to fetch the 30 second preview.
+
+
++ Appendix
+    If you have any questions or need clarifications, please send an
+    email my way to will at lolcat.ca
diff --git a/compose.captcha.yml b/compose.captcha.yml
new file mode 100644
index 0000000..bbbe9f8
--- /dev/null
+++ b/compose.captcha.yml
@@ -0,0 +1,12 @@
+---
+version: "3.11"
+
+services:
+  app:
+    environment:
+      - FOURGET_BOT_PROTECTION=1
+    volumes:
+      - captcha:/var/www/html/4get/data/captcha
+
+volumes:
+  captcha:
\ No newline at end of file
diff --git a/compose.yml b/compose.yml
index a2c3805..9d0f60f 100644
--- a/compose.yml
+++ b/compose.yml
@@ -1,9 +1,11 @@
 ---
-version: "3.8"
+version: "3.11"
 
 services:
   app:
-    image: nginx:1.20.0
+    image: luuul/4get:1.0.21
+    environment:
+      - FOURGET_PROTO=http
     networks:
       - proxy
     deploy:
@@ -12,21 +14,55 @@ services:
       labels:
         - "traefik.enable=true"
         - "traefik.http.services.${STACK_NAME}.loadbalancer.server.port=80"
-        - "traefik.http.routers.${STACK_NAME}.rule=Host(`${DOMAIN}`${EXTRA_DOMAINS})"
+        - "traefik.http.routers.${STACK_NAME}.rule=Host(`${DOMAIN}`)"
         - "traefik.http.routers.${STACK_NAME}.entrypoints=web-secure"
         - "traefik.http.routers.${STACK_NAME}.tls.certresolver=${LETS_ENCRYPT_ENV}"
-        ## Redirect from EXTRA_DOMAINS to DOMAIN
-        #- "traefik.http.routers.${STACK_NAME}.middlewares=${STACK_NAME}-redirect"
-        #- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLForceHost=true"
-        #- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLHost=${DOMAIN}"
         - "coop-cloud.${STACK_NAME}.version="
+    volumes:
+      - banners:/var/www/html/4get/banner/
+    configs:
+      - source: robots_txt
+        target: /robots.txt
+      - source: about_html
+        target: /var/www/html/4get/template/about.html
+      - source: home_html
+        target: /var/www/html/4get/template/home.html
+      - source: api_txt
+        target: /var/www/html/4get/api.txt
+      - source: header_html
+        target: /var/www/html/4get/template/header.html
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost"]
+      test: [ "CMD", "curl", "-f", "http://localhost" ]
       interval: 30s
       timeout: 10s
       retries: 10
       start_period: 1m
 
+configs:
+  robots_txt:
+    name: ${STACK_NAME}_entrypoint_conf_${ROBOTS_TXT_VERSION}
+    file: robots.txt.tmpl
+    template_driver: golang
+  about_html:
+    name: ${STACK_NAME}_about_html_${ABOUT_HTML_VERSION}
+    file: about.html.tmpl
+    template_driver: golang
+  home_html:
+    name: ${STACK_NAME}_home_html_${HOME_HTML_VERSION}
+    file: home.html.tmpl
+    template_driver: golang
+  api_txt:
+    name: ${STACK_NAME}_api_txt_${API_TXT_VERSION}
+    file: api.txt.tmpl
+    template_driver: golang
+  header_html:
+    name: ${STACK_NAME}_header_html_${HEADER_HTML_VERSION}
+    file: header.html.tmpl
+    template_driver: golang
+
+volumes:
+  banners:
+
 networks:
   proxy:
     external: true
diff --git a/header.html.tmpl b/header.html.tmpl
new file mode 100644
index 0000000..1f8c249
--- /dev/null
+++ b/header.html.tmpl
@@ -0,0 +1,36 @@
+<!DOCTYPE html>
+<html lang="en">
+	<head>
+		<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
+		<title>{%title%}</title>
+		<link title="{%server_name%}" href="/opensearch{%ac%}" rel="search" type="application/opensearchdescription+xml">
+		<link rel="stylesheet" href="/static/style.css?v{%version%}">
+		{%style%}
+		<meta name="viewport" content="width=device-width,initial-scale=1">
+		<meta name="robots" content="{%index%}index,{%index%}follow">
+		<link rel="icon" type="image/x-icon" href="/favicon.ico">
+		<meta name="description" content="{%server_name%} - {{ env "FOURGET_SHORT_DESCRIPTION" }}">
+	</head>
+	<body>
+		<div class="navigation">
+			<a href="/">Home</a>
+			<a href="/settings">Settings</a>
+            {{ if eq (env "INCLUDE_NEWS_LINK" ) "true" }}
+			<a href="https://git.lolcat.ca/lolcat/4get_news" target="_BLANK">News</a>
+            {{ end }}
+		</div>
+		<form method="GET" autocomplete="off">
+			<div class="searchbox">
+				<input type="submit" value="Search" tabindex="-1">
+				<div class="wrapper">
+					<input type="text" value="{%search%}" maxlength="500" name="s" placeholder="Proxy search..." required>
+				</div>
+				<div class="autocomplete"></div>
+			</div>
+			<div class="tabs">
+				{%tabs%}
+			</div>
+			<div class="filters">
+				{%filters%}
+			</div>
+		</form>
diff --git a/home.html.tmpl b/home.html.tmpl
new file mode 100644
index 0000000..b784fde
--- /dev/null
+++ b/home.html.tmpl
@@ -0,0 +1,36 @@
+<!DOCTYPE html>
+<html lang="en">
+	<head>
+		<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
+		<title>{%server_name%}</title>
+		<link title="{%server_name%}" href="/opensearch{%ac%}" rel="search" type="application/opensearchdescription+xml">
+		<link rel="sitemap" type="application/xml" title="Sitemap" href="/sitemap">
+		<meta name="viewport" content="width=device-width,initial-scale=1">
+		<link rel="stylesheet" href="/static/style.css?v{%version%}">
+		{%style%}
+		<meta name="robots" content="index,follow">
+		<link rel="icon" type="image/x-icon" href="/favicon.ico">
+		<meta name="description" content="{%server_name%} - {{ env "FOURGET_SHORT_DESCRIPTION" }}">
+	</head>
+	<body class="home">
+		<div id="center">
+			<form method="GET" autocomplete="off" action="web">
+				<div class="logo">
+					<a href="/">
+						<img src="{%banner%}" alt="4get">
+					</a>
+				</div>
+				<div class="searchbox">
+					<input type="submit" value="Search" tabindex="-1">
+					<div class="wrapper">
+						<input type="text" maxlength="500" name="s" placeholder="Proxy search..." required autofocus>
+					</div>
+					<div class="autocomplete"></div>
+				</div>
+			</form>
+			<a href="settings">Settings</a> • <a href="instances">Instances</a> • {{ if eq (env "INCLUDE_NEWS_LINK" ) "true" }} <a href="https://git.lolcat.ca/lolcat/4get_news">News</a> • {{ end }}<a href="api.txt">API</a> • <a href="about">About</a>{{ if eq (env "INCLUDE_SOURCE_LINK" ) "true" }} • <a href="https://git.lolcat.ca/lolcat/4get">Source</a><br>{{ end }}
+			<br><br><a href="/donate"><b>Donate to 4get</b></a>
+		</div>
+		<script src="/static/client.js?v{%version%}"></script>
+	</body>
+</html>
diff --git a/robots.txt.tmpl b/robots.txt.tmpl
new file mode 100644
index 0000000..b2cc895
--- /dev/null
+++ b/robots.txt.tmpl
@@ -0,0 +1,4 @@
+User-agent: *
+Disallow:
+Host: {{ env "DOMAIN" }}
+Sitemap: https://{{ env "DOMAIN" }}/sitemap

Field	Value
Search engine	{%server_name%}
Shortcut	{%server_name%}
Search URL	https://{{ env "DOMAIN" }}/web?s=%s
Autocomplete URL	https://{{ env "DOMAIN" }}/api/v1/ac?s=%s