Compare commits

..

No commits in common. 'aaef6ed851ff20e3101a1a9d037a6353bc22e8f8' and 'b587cde27b88c134d41d1bf521f2865607ea57e4' have entirely different histories.

  1. 1
      .env
  2. 185
      Cargo.lock
  3. 6
      Cargo.toml
  4. 51
      README.md
  5. 49
      run.py
  6. 1
      src/configs.rs
  7. 32
      src/error.rs
  8. 113
      src/handlers.rs
  9. 107
      src/main.rs
  10. 40
      src/models.rs
  11. 77
      src/repo.rs

@ -1,2 +1 @@
export URL_HANDLERS_POOL_SIZE=10 export URL_HANDLERS_POOL_SIZE=10
export PORT=8000

185
Cargo.lock generated

@ -175,21 +175,6 @@ version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bincode"
version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
dependencies = [
"serde",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.8.0" version = "2.8.0"
@ -202,12 +187,6 @@ version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]] [[package]]
name = "bytes" name = "bytes"
version = "1.9.0" version = "1.9.0"
@ -251,30 +230,6 @@ version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "crc32fast"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]] [[package]]
name = "displaydoc" name = "displaydoc"
version = "0.2.5" version = "0.2.5"
@ -385,16 +340,6 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "fs2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
dependencies = [
"libc",
"winapi",
]
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.31" version = "0.3.31"
@ -484,15 +429,6 @@ dependencies = [
"slab", "slab",
] ]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.2.15" version = "0.2.15"
@ -821,15 +757,6 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "instant"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "ipnet" name = "ipnet"
version = "2.11.0" version = "2.11.0"
@ -876,16 +803,6 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]]
name = "lock_api"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.25" version = "0.4.25"
@ -968,7 +885,7 @@ version = "0.10.69"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5e534d133a060a3c19daec1eb3e98ec6f4685978834f2dbadfe2ec215bab64e" checksum = "f5e534d133a060a3c19daec1eb3e98ec6f4685978834f2dbadfe2ec215bab64e"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags",
"cfg-if", "cfg-if",
"foreign-types", "foreign-types",
"libc", "libc",
@ -1006,31 +923,6 @@ dependencies = [
"vcpkg", "vcpkg",
] ]
[[package]]
name = "parking_lot"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
dependencies = [
"instant",
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc"
dependencies = [
"cfg-if",
"instant",
"libc",
"redox_syscall",
"smallvec",
"winapi",
]
[[package]] [[package]]
name = "percent-encoding" name = "percent-encoding"
version = "2.3.1" version = "2.3.1"
@ -1073,15 +965,6 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "redox_syscall"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [
"bitflags 1.3.2",
]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.11.1" version = "1.11.1"
@ -1182,7 +1065,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
@ -1249,19 +1132,13 @@ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "2.11.1" version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags",
"core-foundation", "core-foundation",
"core-foundation-sys", "core-foundation-sys",
"libc", "libc",
@ -1338,15 +1215,6 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signal-hook-registry"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "slab" name = "slab"
version = "0.4.9" version = "0.4.9"
@ -1356,22 +1224,6 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "sled"
version = "0.34.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"
dependencies = [
"crc32fast",
"crossbeam-epoch",
"crossbeam-utils",
"fs2",
"fxhash",
"libc",
"log",
"parking_lot",
]
[[package]] [[package]]
name = "smallvec" name = "smallvec"
version = "1.13.2" version = "1.13.2"
@ -1443,7 +1295,7 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags",
"core-foundation", "core-foundation",
"system-configuration-sys", "system-configuration-sys",
] ]
@ -1513,7 +1365,6 @@ dependencies = [
"libc", "libc",
"mio", "mio",
"pin-project-lite", "pin-project-lite",
"signal-hook-registry",
"socket2", "socket2",
"tokio-macros", "tokio-macros",
"windows-sys 0.52.0", "windows-sys 0.52.0",
@ -1645,7 +1496,6 @@ name = "url_parser"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"axum", "axum",
"bincode",
"dotenv", "dotenv",
"env_logger", "env_logger",
"envy", "envy",
@ -1653,10 +1503,9 @@ dependencies = [
"log", "log",
"reqwest", "reqwest",
"serde", "serde",
"sled",
"thiserror", "thiserror",
"tokio", "tokio",
"tokio-util", "url",
] ]
[[package]] [[package]]
@ -1788,28 +1637,6 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-registry" name = "windows-registry"
version = "0.2.0" version = "0.2.0"
@ -1928,7 +1755,7 @@ version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags",
] ]
[[package]] [[package]]

@ -5,7 +5,6 @@ edition = "2021"
[dependencies] [dependencies]
axum = { version = "0.8.1", features = ["macros"] } axum = { version = "0.8.1", features = ["macros"] }
bincode = "1.3.3"
dotenv = "0.15.0" dotenv = "0.15.0"
env_logger = "0.11.6" env_logger = "0.11.6"
envy = "0.4.2" envy = "0.4.2"
@ -13,7 +12,6 @@ futures = "0.3.31"
log = "0.4.25" log = "0.4.25"
reqwest = "0.12.12" reqwest = "0.12.12"
serde = { version = "1.0.217", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
sled = "0.34.7"
thiserror = "2.0.11" thiserror = "2.0.11"
tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread", "signal"] } tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread"] }
tokio-util = "0.7.13" url = "2.5.4"

@ -1,51 +0,0 @@
# Тестовое задание для Stilsoft
Сервис для асинхронной обработки ссылок.
Для настройки сервиса используются переменные окружения в файле `.env`. Экспортировать их в терминал не нужно, они считаются автоматически.
Запуск осуществляется через `cargo run`.
Реализованные фичи:
- сервер на axum с API для управления ссылками (просмотр/загрузка/удаление, см. схему ниже)
- in-memory хранилище на sled
- из результата запроса ссылки достаётся количество символов
- кэширование результатов (бессрочное)
- удаление дубликатов ссылок
- возможность указать размер пула обработчиков ссылок через URL_HANDLERS_POOL_SIZE
- логирование и чтение настроек логирования через RUST_LOG
- отмена загрузки ссылок на SIGHUP
Для отправки HTTP-запросов к сервису можно использовать дополнительную утилиту run.py:
```
./run.py list <url1> <url2>
./run.py upload <url1> <url2>
./run.py del <url1> <url2>
```
Схема:
Получение списка сохранённых ссылок:
```
Без фильтров:
GET localhost:8000/url
С фильтром:
POST localhost:8000/url
body: {"urls": [url1, url2]}
```
Загрузка ссылок:
```
POST localhost:8000/url/upload
body: {"urls": [url1, url2]}
```
Удаление сохранённых ссылок:
```
Удаление всех ссылок:
GET localhost:8000/url/delete
Удаление заданных ссылок:
POST localhost:8000/url
body: {"urls": [url1, url2]}
```

@ -1,49 +0,0 @@
#!/usr/bin/env python3
import sys
import json
import requests
BASE_URL = "http://localhost:8000"
def usage():
print("Possible actions:\n list [url1] [url2] ...\n upload [url1] [url2] ...\n del [url1] [url2] ...")
sys.exit(1)
def main():
try:
action = sys.argv[1]
except IndexError:
usage()
try:
urls = sys.argv[2:]
except IndexError:
urls = []
if action == "list":
if len(urls) == 0:
result = requests.get(f"{BASE_URL}/urls")
else:
result = requests.post(f"{BASE_URL}/urls", json={"urls": urls})
elif action == "upload":
result = requests.post(f"{BASE_URL}/urls/upload", json={"urls": urls})
elif action == "del":
if len(urls) == 0:
result = requests.get(f"{BASE_URL}/urls/delete")
else:
result = requests.post(f"{BASE_URL}/urls/delete", json={"urls": urls})
else:
usage()
if result.status_code == 200:
if len(result.text) > 0:
print(json.dumps(result.json()))
else:
print("200 OK")
else:
print(result.text)
if __name__ == '__main__':
main()

@ -3,7 +3,6 @@ use serde::Deserialize;
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
pub struct Config { pub struct Config {
pub url_handlers_pool_size: usize, pub url_handlers_pool_size: usize,
pub port: u16,
} }
pub fn get_configs() -> Result<Config, envy::Error> { pub fn get_configs() -> Result<Config, envy::Error> {

@ -1,7 +1,3 @@
use std::string::FromUtf8Error;
use axum::{http::StatusCode, response::IntoResponse};
pub type AppResult<R> = Result<R, AppError>; pub type AppResult<R> = Result<R, AppError>;
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
@ -9,29 +5,17 @@ pub enum AppError {
#[error(transparent)] #[error(transparent)]
ConfigError(#[from] envy::Error), ConfigError(#[from] envy::Error),
#[error(transparent)] #[error(
IOError(#[from] std::io::Error), "Usage:\n\t url_parser -f <file with urls splitted by \\n> \n\t url_parser -u url1 url2 url3"
)]
#[error(transparent)] UsageError,
ReqwestError(#[from] reqwest::Error),
#[error(transparent)] #[error(transparent)]
SledError(#[from] sled::Error), IOError(#[from] std::io::Error),
#[error(transparent)] #[error("Bad url '{0}': {1}")]
BincodeError(#[from] bincode::Error), InvalidUrl(String, url::ParseError),
#[error(transparent)] #[error(transparent)]
Utf8Error(#[from] FromUtf8Error), ReqwestError(#[from] reqwest::Error),
#[error("Cancelled: {0}")]
Cancelled(String),
}
impl IntoResponse for AppError {
fn into_response(self) -> axum::response::Response {
let err = self.to_string();
(StatusCode::INTERNAL_SERVER_ERROR, err).into_response()
}
} }

@ -1,60 +1,17 @@
use std::collections::HashSet;
use axum::{extract::State, response::IntoResponse, Json}; use axum::{extract::State, response::IntoResponse, Json};
use tokio::select;
use crate::{ use crate::{
error::{AppError, AppResult}, error::AppResult,
models::{AppStateShared, UrlReportResponse, UrlRequest, UrlResult}, models::{AppStateShared, UrlReportResponse, UrlRequest, UrlResponse},
repo::Repo,
}; };
pub async fn get_urls<R: Repo>( pub async fn get_urls() {}
State(state): State<AppStateShared<R>>,
req: Option<Json<UrlRequest>>,
) -> AppResult<impl IntoResponse> {
let urls = req.map(|urls| {
urls.0
.urls
.into_iter()
.map(|u| u.to_string())
.collect::<HashSet<_>>()
});
let r_state = state.read().await;
let result = r_state.repo.get_urls(urls.as_ref())?;
Ok(Json(result))
}
pub async fn upload_urls<R: Repo>( pub async fn upload_urls(
State(state): State<AppStateShared<R>>, State(state): State<AppStateShared>,
Json(payload): Json<UrlRequest>, Json(payload): Json<UrlRequest>,
) -> AppResult<impl IntoResponse> { ) -> impl IntoResponse {
let mut successes = vec![]; let urls = payload.urls;
let mut failures = vec![];
let urls = {
let r_state = state.read().await;
let cached_urls = r_state.repo.get_urls(None)?;
payload
.urls
.into_iter()
.filter(|url| match cached_urls.get(&url.to_string()) {
Some(val) => {
info!("{} is cached, not fetching", url);
successes.push(UrlResult {
status: None,
url: url.clone(),
content_length: *val,
error_msg: None,
});
false
}
None => true,
})
.collect::<HashSet<_>>()
};
let tasks = { let tasks = {
urls.into_iter() urls.into_iter()
@ -63,33 +20,25 @@ pub async fn upload_urls<R: Repo>(
tokio::spawn(async move { tokio::spawn(async move {
let r_state = state.read().await; let r_state = state.read().await;
let _sem = r_state.url_handling_semaphore.acquire().await.unwrap(); let _sem = r_state.url_handling_semaphore.acquire().await.unwrap();
let ct = r_state.cancellation_token.clone();
info!("fetching {url}"); info!("fetching {url}");
let request = (|| async { let response: AppResult<_> = (|| async {
let resp = reqwest::get(url.to_string()).await?; let resp = reqwest::get(url.to_string()).await?;
let status = resp.status().as_u16(); let status = resp.status().as_u16();
let result = resp.text().await?; let result = resp.text().await?;
Ok((status, result)) Ok((status, result))
})(); })()
.await;
let response: AppResult<_> = select! {
r = request => r,
_ = ct.cancelled() => {
info!("fetching {url} cancelled");
Err(AppError::Cancelled(url.to_string()))
}
};
match response { match response {
Ok((status, resp)) => UrlResult { Ok((status, resp)) => UrlResponse {
status: Some(status), status: Some(status),
url, url,
error_msg: None, error_msg: None,
content_length: resp.len(), content_length: resp.len(),
}, },
Err(err) => UrlResult { Err(err) => UrlResponse {
status: None, status: None,
url, url,
error_msg: Some(err.to_string()), error_msg: Some(err.to_string()),
@ -101,14 +50,11 @@ pub async fn upload_urls<R: Repo>(
.collect::<Vec<_>>() .collect::<Vec<_>>()
}; };
let ct = state.read().await.cancellation_token.clone(); let mut successes = vec![];
for task in tasks { let mut failures = vec![];
let result = if let Some(result) = ct.run_until_cancelled(task).await {
result.unwrap()
} else {
continue;
};
for task in tasks {
let result = task.await.unwrap();
let status = result.status.unwrap_or(0); let status = result.status.unwrap_or(0);
if status >= 200 && status < 400 { if status >= 200 && status < 400 {
@ -117,32 +63,11 @@ pub async fn upload_urls<R: Repo>(
failures.push(result); failures.push(result);
} }
} }
info!("fetching completed");
let mut w_state = state.write().await; Json(UrlReportResponse {
w_state.repo.insert_urls(&successes)?;
w_state.regen_cancellation_token();
drop(w_state);
Ok(Json(UrlReportResponse {
successes, successes,
failures, failures,
})) })
} }
pub async fn delete_urls<R: Repo>( pub async fn delete_urls() {}
State(state): State<AppStateShared<R>>,
req: Option<Json<UrlRequest>>,
) -> AppResult<impl IntoResponse> {
let urls = req.map(|urls| {
urls.0
.urls
.into_iter()
.map(|u| u.to_string())
.collect::<HashSet<_>>()
});
let deleted = state.write().await.repo.delete_urls(urls.as_ref())?;
Ok(deleted.to_string())
}

@ -4,76 +4,95 @@ mod handlers;
mod models; mod models;
mod repo; mod repo;
use std::sync::Arc; use std::{fs, sync::Arc};
use axum::{routing::post, Router};
use axum::{routing::get, Router};
use configs::Config;
use error::{AppError, AppResult}; use error::{AppError, AppResult};
use models::AppState; use models::AppState;
use repo::Repo; use reqwest::Url;
use tokio::{net::TcpListener, sync::RwLock}; use tokio::{net::TcpListener, sync::RwLock};
use tokio_util::sync::CancellationToken;
#[macro_use] #[macro_use]
extern crate log; extern crate log;
/// Parse urls and return 2 lists: valid urls and errors
fn parse_urls(url_list: &[String]) -> (Vec<Url>, Vec<String>) {
let (valid, invalid): (Vec<_>, Vec<_>) = url_list
.into_iter()
.map(|url| Url::parse(&url).map_err(|e| AppError::InvalidUrl(url.to_string(), e)))
.partition(Result::is_ok);
(
valid.into_iter().map(Result::unwrap).collect(),
invalid
.into_iter()
.map(|e| e.unwrap_err().to_string())
.collect(),
)
}
/// Extract raw urls from different sources:
/// - from args list
/// - from file
fn parse_urls_from_args(
args: impl IntoIterator<Item = String>,
) -> AppResult<(Vec<Url>, Vec<String>)> {
let mut args_iter = args.into_iter();
match args_iter.nth(1) {
Some(action) => match action.as_str() {
"-f" => {
let Some(source) = args_iter.next() else {
return Err(AppError::UsageError);
};
println!("Reading file '{source}'");
let raw_urls = fs::read_to_string(source)?
.split('\n')
.map(|s| s.to_owned())
.collect::<Vec<_>>();
Ok(parse_urls(&raw_urls))
}
"-u" => {
let urls_list = args_iter.collect::<Vec<_>>();
Ok(parse_urls(&urls_list))
}
_ => return Err(AppError::UsageError),
},
None => return Err(AppError::UsageError),
}
}
async fn process_urls(config: &Config, urls: &[Url]) {}
async fn run() -> AppResult<()> { async fn run() -> AppResult<()> {
dotenv::dotenv().ok(); dotenv::dotenv().ok();
let cfg = configs::get_configs()?; let cfg = configs::get_configs()?;
info!("{cfg:?}"); info!("{cfg:?}");
let repo = create_repo()?; // let (valid_urls, errors) = parse_urls_from_args(args)?;
let ct = CancellationToken::new(); let state = Arc::new(RwLock::new(AppState::new(cfg.url_handlers_pool_size)));
let state = Arc::new(RwLock::new(AppState::new(
cfg.url_handlers_pool_size,
repo,
ct.clone(),
)));
#[cfg(unix)]
{
use tokio::signal::unix::{signal, SignalKind};
let state = state.clone();
tokio::spawn(async move {
let mut signal = signal(SignalKind::hangup()).unwrap();
loop {
signal.recv().await;
state.read().await.cancellation_token.cancel();
info!("SIGHUP");
}
});
}
let app = Router::new() let app = Router::new()
.route( .route(
"/urls", "/urls",
post(|state, body| handlers::get_urls(state, Some(body))) get(handlers::get_urls)
.get(|state| handlers::get_urls(state, None)), .post(handlers::upload_urls)
) .delete(handlers::delete_urls),
.route("/urls/upload", post(handlers::upload_urls))
.route(
"/urls/delete",
post(|state, req| handlers::delete_urls(state, Some(req)))
.get(|state| handlers::delete_urls(state, None)),
) )
.with_state(state); .with_state(state);
let listener = TcpListener::bind("0.0.0.0:8000").await?;
let bind_addr = format!("0.0.0.0:{}", cfg.port);
let listener = TcpListener::bind(bind_addr).await?;
axum::serve(listener, app).await.map_err(AppError::from) axum::serve(listener, app).await.map_err(AppError::from)
} }
fn create_repo() -> AppResult<impl Repo> {
let config = sled::Config::new().temporary(true);
config.open().map_err(AppError::from)
}
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
env_logger::init(); env_logger::init();
let _ = run().await.inspect_err(|e| println!("{e}")); if let Err(e) = run().await {
println!("{e}");
}
} }

@ -1,36 +1,28 @@
use std::{collections::HashSet, sync::Arc}; use std::{collections::HashMap, sync::Arc};
use axum::http::Uri; use axum::http::{uri::Scheme, Uri};
use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
use tokio::sync::{RwLock, Semaphore}; use tokio::sync::{RwLock, Semaphore};
use tokio_util::sync::CancellationToken;
use crate::repo::Repo; pub type AppStateShared = Arc<RwLock<AppState>>;
pub type AppStateShared<R> = Arc<RwLock<AppState<R>>>; pub struct AppState {
/// url: content
pub struct AppState<R: Repo> { pub cache: HashMap<String, String>,
pub repo: R,
pub url_handling_semaphore: Semaphore, pub url_handling_semaphore: Semaphore,
pub cancellation_token: CancellationToken,
} }
impl<R: Repo> AppState<R> { impl AppState {
pub fn new(url_handlers_pool_size: usize, repo: R, ct: CancellationToken) -> Self { pub fn new(url_handlers_pool_size: usize) -> Self {
Self { Self {
repo, cache: HashMap::new(),
url_handling_semaphore: Semaphore::new(url_handlers_pool_size), url_handling_semaphore: Semaphore::new(url_handlers_pool_size),
cancellation_token: ct,
}
} }
pub fn regen_cancellation_token(&mut self) {
self.cancellation_token = CancellationToken::new();
} }
} }
#[derive(Serialize)] #[derive(Serialize)]
pub struct UrlResult { pub struct UrlResponse {
pub status: Option<u16>, pub status: Option<u16>,
#[serde(serialize_with = "serialize_uri")] #[serde(serialize_with = "serialize_uri")]
pub url: Uri, pub url: Uri,
@ -44,26 +36,26 @@ fn serialize_uri<S: Serializer>(uri: &Uri, serializer: S) -> Result<S::Ok, S::Er
#[derive(Serialize)] #[derive(Serialize)]
pub struct UrlReportResponse { pub struct UrlReportResponse {
pub successes: Vec<UrlResult>, pub successes: Vec<UrlResponse>,
pub failures: Vec<UrlResult>, pub failures: Vec<UrlResponse>,
} }
#[derive(Deserialize)] #[derive(Deserialize)]
pub struct UrlRequest { pub struct UrlRequest {
#[serde(deserialize_with = "deser_uri")] #[serde(deserialize_with = "deser_uri")]
pub urls: HashSet<Uri>, pub urls: Vec<Uri>,
} }
fn deser_uri<'de, D>(deserializer: D) -> Result<HashSet<Uri>, D::Error> fn deser_uri<'de, D>(deserializer: D) -> Result<Vec<Uri>, D::Error>
where where
D: Deserializer<'de>, D: Deserializer<'de>,
{ {
let raw_urls = HashSet::<String>::deserialize(deserializer)?; let raw_urls = Vec::<String>::deserialize(deserializer)?;
raw_urls raw_urls
.into_iter() .into_iter()
.map(|url| { .map(|url| {
Uri::try_from(&url) Uri::try_from(&url)
.map_err(|e| serde::de::Error::custom(format!("{}, url: '{url}'", e.to_string()))) .map_err(|e| serde::de::Error::custom(format!("{}, url: '{url}'", e.to_string())))
}) })
.collect::<Result<HashSet<_>, _>>() .collect::<Result<Vec<_>, _>>()
} }

@ -1,77 +0,0 @@
use std::collections::{HashMap, HashSet};
use sled::{Batch, Db};
use crate::{
error::{AppError, AppResult},
models::UrlResult,
};
pub trait Repo: Send + Sync + 'static {
fn insert_urls(&self, urls: &[UrlResult]) -> AppResult<()>;
fn delete_urls(&self, urls: Option<&HashSet<String>>) -> AppResult<usize>;
fn get_urls(&self, urls: Option<&HashSet<String>>) -> AppResult<HashMap<String, usize>>;
}
impl Repo for Db {
fn insert_urls(&self, urls: &[UrlResult]) -> AppResult<()> {
let mut batch = Batch::default();
for url in urls {
batch.insert(
url.url.to_string().as_bytes(),
bincode::serialize(&url.content_length)?,
);
}
self.apply_batch(batch).map_err(AppError::from)
}
fn delete_urls(&self, urls: Option<&HashSet<String>>) -> AppResult<usize> {
let mut deleted = 0;
match urls {
Some(urls) => {
for url in urls {
info!("deleting {url}");
let del = self.remove(url.as_bytes())?;
if del.is_some() {
deleted += 1
}
}
}
None => {
info!("deleting all urls");
deleted = self.len();
self.clear()?;
}
};
Ok(deleted)
}
fn get_urls(&self, urls: Option<&HashSet<String>>) -> AppResult<HashMap<String, usize>> {
match urls {
Some(urls) => {
let mut result = HashMap::new();
for url in urls {
let Some(val) = self.get(url.as_bytes())? else {
continue;
};
result.insert(url.to_owned(), bincode::deserialize(&val)?);
}
Ok(result)
}
None => self
.iter()
.map(|result| {
let (k, v) = result?;
let key = String::from_utf8(k.to_vec())?;
let val = bincode::deserialize(&v)?;
Ok((key, val))
})
.collect::<AppResult<_>>(),
}
}
}
Loading…
Cancel
Save