From fca4095a3f384a699ac6540a9b7b095775cb0d6d Mon Sep 17 00:00:00 2001 From: brooke Date: Fri, 7 Mar 2025 17:11:18 -0500 Subject: [PATCH] init --- .env.sample | 16 ++- README.md | 9 +- about.html.tmpl | 39 +++++ abra.sh | 5 + api.txt.tmpl | 338 ++++++++++++++++++++++++++++++++++++++++++++ compose.captcha.yml | 12 ++ compose.yml | 52 +++++-- header.html.tmpl | 36 +++++ home.html.tmpl | 36 +++++ robots.txt.tmpl | 4 + 10 files changed, 535 insertions(+), 12 deletions(-) create mode 100644 about.html.tmpl create mode 100644 abra.sh create mode 100644 api.txt.tmpl create mode 100644 compose.captcha.yml create mode 100644 header.html.tmpl create mode 100644 home.html.tmpl create mode 100644 robots.txt.tmpl diff --git a/.env.sample b/.env.sample index f69efbb..a8204aa 100644 --- a/.env.sample +++ b/.env.sample @@ -2,7 +2,17 @@ TYPE=4get DOMAIN=4get.example.com -## Domain aliases -#EXTRA_DOMAINS=', `www.4get.example.com`' - LETS_ENCRYPT_ENV=production + +FOURGET_SERVER_NAME=4get.example.com +FOURGET_SHORT_DESCRIPTION="4get 4get 4get 4get" + +## uncomment to customize the instance list +#FOURGET_INSTANCES=https://4get.ca + +## Enable captcha protection, please see readme. +#COMPOSE_FILE="$COMPOSE_FILE:compose.captcha.yml" + +INCLUDE_NEWS_LINK=false +INCLUDE_4GET_DONATION_LINK=true +INCLUDE_SOURCE_LINK=false \ No newline at end of file diff --git a/README.md b/README.md index 0ed3b10..2d3f76d 100644 --- a/README.md +++ b/README.md @@ -21,4 +21,11 @@ * `abra app config ` * `abra app deploy ` -For more, see [`docs.coopcloud.tech`](https://docs.coopcloud.tech). +### Custom banners + +* `abra app cp /local/path/to/banners app:/var/www/html/4get/banner/` + +## Captcha + + +For more, see [`docs.coopcloud.tech`](https://docs.coopcloud.tech). \ No newline at end of file diff --git a/about.html.tmpl b/about.html.tmpl new file mode 100644 index 0000000..c5fe675 --- /dev/null +++ b/about.html.tmpl @@ -0,0 +1,39 @@ +< Go back + +

Set as default search engine

+

On Firefox and other Gecko based browsers

+To set this as your default search engine on Firefox, right click the URL bar and select
Add "4get"
. Then, visit about:preferences#search and select
4get
in the dropdown menu. + +

On Chromium and Blink based browsers

+Click the 3 superpositioned dots at the top right of the screen and click on
Settings
, then search for
default search engine
, or visit chrome://settings/searchEngines.

+ +Once you're there, click the pencil on the last entry under "Search engines" (it's probably DuckDuckGo). Once you do that, a popup will appear. Populate it with the following information: + + + + + + + + + + + + + + + + + + + + + + +
FieldValue
Search engine{%server_name%}
Shortcut{%server_name%}
Search URLhttps://{{ env "DOMAIN" }}/web?s=%s
Autocomplete URLhttps://{{ env "DOMAIN" }}/api/v1/ac?s=%s
+ +Once that's done, click
Save
. Then, on the right handside of the newly created entry, open the dropdown menu and select
Make default
. + +

+

Message to all DMCA enforcers

+This server hosts no content. Everything you see here is proxied and this service provides no endorcement of any content. Please reach out to the service hosting the infringing content instead.

\ No newline at end of file diff --git a/abra.sh b/abra.sh new file mode 100644 index 0000000..46e0804 --- /dev/null +++ b/abra.sh @@ -0,0 +1,5 @@ +export ROBOTS_TXT_VERSION=v1 +export ABOUT_HTML_VERSION=v1 +export HEADER_HTML_VERSION=v1 +export HOME_HTML_VERSION=v1 +export API_TXT_VERSION=v1 diff --git a/api.txt.tmpl b/api.txt.tmpl new file mode 100644 index 0000000..09f57ed --- /dev/null +++ b/api.txt.tmpl @@ -0,0 +1,338 @@ + 44 + 4444444 44 + 44444444 44444 444 + 44444444 444444 444444444 + 44444 44444444 444444444 + 444444444 4444444 + 4444444444 444444 + 4444444444444 + 444444444444444444 + 444444444444444 + 44444444 + 4444 + 44 + + + Welcome to the 4get API documentation + + ++ Terms of use + + - Human-Driven Requests Only: API requests must be initiated by + humans (e.g., chatroom bots presenting user-driven search results). + + - No Automated Scraping: Do not use the API to scrape website SEO + content or engage in automated extraction without authorization. + + - No Requests for Illegal Activities: Do not request or distribute + content related to illegal activities as defined by the jurisdiction + of the server hosting the service. + + - Limit Test Queries: Avoid submitting constant "test queries." + Use caching for API responses during development to minimize + unnecessary requests. + ++ Passes + Depending of the instance, you may need to provide a "pass" token + in the cookies of your request. These can be obtained from solving + a captcha which will allow you to make 100 requests in the next 24 + hours. In the future, you will be able to ask the serber maintainer + for a "pass" which will allow you to bypass the captcha requirement. + + The captcha doesn't need javascript to work. + + ++ Decode the data + All payloads returned by the API are encoded in the JSON format. If + you don't know how to tackle the problem, maybe programming is not + for you. + + All of the endpoints use the GET method. + + ++ Check if an API call was successful + All API responses come with an array index named "status". If the + status is something else than the string "ok", something went wrong. + You can supply the content of the "status" string back to your + application to inform the user of what went wrong. + + The HTTP code will be 429 if your pass is invalid. It is set to 200 + otherwise. + + ++ Get the next page of results + All API responses come with an array index named "npt". To get the + next page of results, you must make another API call with &npt. + + Example :: + + + First API call + /api/v1/web?s=higurashi + + + Second API call + /api/v1/web?npt=ddg1._rJ2hWmYSjpI2hsXWmYajJx < ... > + + You shouldn't specify the search term, only the &npt parameter + suffices. + + The first part of the token before the dot (ddg1) refers to an + array position on the serber's memory. The second part is an + encryption key used to decode the data at that position. This way, + it is impossible to supply invalid pagination data and it is + impossible for a 4get operator to peek at the private data of the + user after a request has been made. + + The tokens will expire as soon as they are used or after a 15 + minutes inactivity period, whichever comes first. + + ++ Beware of null values! + Most fields in the API responses can return "null". You don't need + to worry about unset values. + + ++ API Parameters + To construct a valid request, you can use the 4get web interface + to craft a valid request, and replace "/web" with "/api/v1/web". + + ++ "date" and "time" parameters + "date" always refer to a calendar date. + "time" always refer to the duration of some media. + + They are both integers that uses seconds as its unit. The "date" + parameter specifies the number of seconds that passed since January + 1st 1970. + + + ______ __ _ __ + / ____/___ ____/ /___ ____ (_)___ / /______ + / __/ / __ \/ __ / __ \/ __ \/ / __ \/ __/ ___/ + / /___/ / / / /_/ / /_/ / /_/ / / / / / /_(__ ) + /_____/_/ /_/\__,_/ .___/\____/_/_/ /_/\__/____/ + /_/ + ++ /ami4get + Tells you basic information about the 4get instance. CORS requests + are allowed on this endpoint. + + ++ /api/v1/web + + &extendedsearch + When using the ddg(DuckDuckGo) scraper, you may make use of the + &extendedsearch parameter. If you need rich answer data from + additional sources like StackOverflow, music lyrics sites, etc., + you need to specify the value of (string)"true". + + The default value is "false" for API calls. + + + + Parse the "spelling" + The array index named "spelling" contains 3 indexes :: + + spelling: + type: "including" + using: "4chan" + correction: '"4cha"' + + + The "type" may be any of these 3 values. When rendering the + autocorrect text inside your application, it should look like + what follows right after the parameter value :: + + no_correction + including Including results for %using%. Did you mean + %correction%? + + not_many Not many results for %using%. Did you mean + %correction%? + + + As of right now, the "spelling" is only available on + "/api/v1/web". + + + + Parse the "answer" + The array index named "answer" may contain a list of multiple + answers. The array index "description" contains a linear list of + nodes that can help you construct rich formatted data inside of + your application. The structure is similar to the one below: + + answer: + 0: + title: "Higurashi" + description: + 0: + type: "text" + value: "Higurashi is a great show!" + 1: + type: "quote" + value: "Source: my ass" + + + Each "description" node contains an array index named "type". + Here is a list of them: + + text + + title + italic + + quote + + code + inline_code + link + + image + + audio + + + Each individual node prepended with a "+" should be prepended by + a newline when constructing the rendered description object. + + There are some nodes that differ from the type-value format. + Please parse them accordingly :: + + + link + type: "link" + url: "https://lolcat.ca" + value: "Visit my website!" + + + + image + type: "image" + url: "https://lolcat.ca/static/pixels.png" + + + + audio + type: "audio" + url: "https://lolcat.ca/static/whatever.mp3" + + + The array index named "table" is an associative array. You can + loop over the data using this PHP code, for example :: + + foreach($table as $website_name => $url){ // ... + + + The rest of the JSON is pretty self explanatory. + + ++ /api/v1/images + All images are contained within "image". The structure looks like + below :: + + image: + 0: + title: "My awesome Higurashi image" + source: + 0: + url: "https://lolcat.ca/static/profile_pix.png" + width: 400 + height: 400 + 1: + url: "https://lolcat.ca/static/pixels.png" + width: 640 + height: 640 + 2: + url: "https://tse1.mm.bing.net/th?id=OIP.VBM3BQg + euf0-xScO1bl1UgHaGG" + width: 194 + height: 160 + + + The last image of the "source" array is always the thumbnail, and is + a good fallback to use when other sources fail to load. There can be + more than 1 source; this is especially true when using the Yandex + scraper, but beware of captcha rate limits. + + ++ /api/v1/videos + The "time" parameter for videos may be set to "_LIVE". For live + streams, the amount of people currently watching is passed in + "views". + + ++ /api/v1/news + Just make a request to "/api/v1/news?s=elon+musk". The payload + has nothing special about it and is very self explanatory, just like + the endpoint above. + + ++ /api/v1/music + Each entry under "song" contains a array index called "stream" that + looks like this :: + + endpoint: sc + url: https://api-v2.soundcloud <...> + + + When the endpoint is something else than "linear", you MUST use + the specified endpoint. Otherwise, you are free to handle that + json+m3u8 crap yourself. If the endpoint is equal to "linear", the + URL should return a valid HTTP audio stream. To access the endpoint, + you must add the following prefix in your request, like so: + + https://4get.ca/audio/?s= + + ++ /favicon + Get the favicon for a website. The only parameter is "s", and must + include the protocol for fetching in case the favicon is not cached + yet. + + Example :: + + /favicon?s=https://lolcat.ca + + + If we had to revert to using Google's favicon cache, it will throw + an error in the X-Error header field. If Google's favicon cache + also failed to return an image, or if you're too retarded to specify + a valid domain name, a default placeholder image will be returned + alongside the "404" HTTP error code. + + ++ /proxy + Get a proxied image. Useful if you don't want to leak your user's IP + address. The parameters are "i" for the image link and "s" for the + size. + + Acceptable "s" parameters: + + portrait 90x160 + landscape 160x90 + square 90x90 + thumb 236x180 + cover 207x270 + original + + You can also ommit the "s" parameter if you wish to view the + original image. When an error occurs, an "X-Error" header field + is set. + + ++ /audio/linear + Get a proxied audio file. Does not support "Range" headers, as it's + only used to proxy small files (hence why it's called linear DUH) + + The parameter is "s" for the audio link. + + ++ /audio/sc + Get a proxied audio file for SoundCloud. Does not support downloads + trough WGET or CURL, since it returns 30kb~160kb "206 Partial + Content" parts, due to technical limitations that comes with + converting m3u8 playlists to seekable audio files. If you use this + endpoint, you must support these 206 codes and also handle the + initial 302 HTTP redirect. I used this method as I didn't want to + store information about your request needlessly. This method also + allows noJS users to access the files. + + The parameter is "s" for the SoundCloud JSON m3u8 abomination. It + does not support "normal" SoundCloud URLs at this time. + + ++ /audio/spotify + Get a proxied Spotify audio file. Accepts a track ID for the "s" + parameter. Will only allow you to fetch the 30 second preview. + + ++ Appendix + If you have any questions or need clarifications, please send an + email my way to will at lolcat.ca diff --git a/compose.captcha.yml b/compose.captcha.yml new file mode 100644 index 0000000..bbbe9f8 --- /dev/null +++ b/compose.captcha.yml @@ -0,0 +1,12 @@ +--- +version: "3.11" + +services: + app: + environment: + - FOURGET_BOT_PROTECTION=1 + volumes: + - captcha:/var/www/html/4get/data/captcha + +volumes: + captcha: \ No newline at end of file diff --git a/compose.yml b/compose.yml index a2c3805..9d0f60f 100644 --- a/compose.yml +++ b/compose.yml @@ -1,9 +1,11 @@ --- -version: "3.8" +version: "3.11" services: app: - image: nginx:1.20.0 + image: luuul/4get:1.0.21 + environment: + - FOURGET_PROTO=http networks: - proxy deploy: @@ -12,21 +14,55 @@ services: labels: - "traefik.enable=true" - "traefik.http.services.${STACK_NAME}.loadbalancer.server.port=80" - - "traefik.http.routers.${STACK_NAME}.rule=Host(`${DOMAIN}`${EXTRA_DOMAINS})" + - "traefik.http.routers.${STACK_NAME}.rule=Host(`${DOMAIN}`)" - "traefik.http.routers.${STACK_NAME}.entrypoints=web-secure" - "traefik.http.routers.${STACK_NAME}.tls.certresolver=${LETS_ENCRYPT_ENV}" - ## Redirect from EXTRA_DOMAINS to DOMAIN - #- "traefik.http.routers.${STACK_NAME}.middlewares=${STACK_NAME}-redirect" - #- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLForceHost=true" - #- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLHost=${DOMAIN}" - "coop-cloud.${STACK_NAME}.version=" + volumes: + - banners:/var/www/html/4get/banner/ + configs: + - source: robots_txt + target: /robots.txt + - source: about_html + target: /var/www/html/4get/template/about.html + - source: home_html + target: /var/www/html/4get/template/home.html + - source: api_txt + target: /var/www/html/4get/api.txt + - source: header_html + target: /var/www/html/4get/template/header.html healthcheck: - test: ["CMD", "curl", "-f", "http://localhost"] + test: [ "CMD", "curl", "-f", "http://localhost" ] interval: 30s timeout: 10s retries: 10 start_period: 1m +configs: + robots_txt: + name: ${STACK_NAME}_entrypoint_conf_${ROBOTS_TXT_VERSION} + file: robots.txt.tmpl + template_driver: golang + about_html: + name: ${STACK_NAME}_about_html_${ABOUT_HTML_VERSION} + file: about.html.tmpl + template_driver: golang + home_html: + name: ${STACK_NAME}_home_html_${HOME_HTML_VERSION} + file: home.html.tmpl + template_driver: golang + api_txt: + name: ${STACK_NAME}_api_txt_${API_TXT_VERSION} + file: api.txt.tmpl + template_driver: golang + header_html: + name: ${STACK_NAME}_header_html_${HEADER_HTML_VERSION} + file: header.html.tmpl + template_driver: golang + +volumes: + banners: + networks: proxy: external: true diff --git a/header.html.tmpl b/header.html.tmpl new file mode 100644 index 0000000..1f8c249 --- /dev/null +++ b/header.html.tmpl @@ -0,0 +1,36 @@ + + + + + {%title%} + + + {%style%} + + + + + + + +
+ +
+ {%tabs%} +
+
+ {%filters%} +
+
diff --git a/home.html.tmpl b/home.html.tmpl new file mode 100644 index 0000000..b784fde --- /dev/null +++ b/home.html.tmpl @@ -0,0 +1,36 @@ + + + + + {%server_name%} + + + + + {%style%} + + + + + +
+
+ + +
+ SettingsInstances • {{ if eq (env "INCLUDE_NEWS_LINK" ) "true" }} News • {{ end }}APIAbout{{ if eq (env "INCLUDE_SOURCE_LINK" ) "true" }} • Source
{{ end }} +

Donate to 4get +
+ + + diff --git a/robots.txt.tmpl b/robots.txt.tmpl new file mode 100644 index 0000000..b2cc895 --- /dev/null +++ b/robots.txt.tmpl @@ -0,0 +1,4 @@ +User-agent: * +Disallow: +Host: {{ env "DOMAIN" }} +Sitemap: https://{{ env "DOMAIN" }}/sitemap