From a840789afc88af5446bad9916e658dcaff2bb03f Mon Sep 17 00:00:00 2001 From: plazmoid Date: Sat, 1 Feb 2025 19:41:41 +0300 Subject: [PATCH] implement repo --- .env | 3 +- Cargo.lock | 174 ++++++++++++++++++++++++++++++++++++++++++++++-- Cargo.toml | 3 +- src/configs.rs | 1 + src/error.rs | 29 +++++--- src/handlers.rs | 54 ++++++++++++--- src/main.rs | 82 ++++++----------------- src/models.rs | 33 ++++----- src/repo.rs | 67 +++++++++++++++++++ 9 files changed, 344 insertions(+), 102 deletions(-) diff --git a/.env b/.env index b78ad68..44cb232 100644 --- a/.env +++ b/.env @@ -1 +1,2 @@ -export URL_HANDLERS_POOL_SIZE=10 \ No newline at end of file +export URL_HANDLERS_POOL_SIZE=10 +export PORT=8000 \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index ffb2b09..bcacca2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -175,6 +175,21 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.8.0" @@ -187,6 +202,12 @@ version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.9.0" @@ -230,6 +251,30 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "displaydoc" version = "0.2.5" @@ -340,6 +385,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "futures" version = "0.3.31" @@ -429,6 +484,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -757,6 +821,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -803,6 +876,16 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.25" @@ -885,7 +968,7 @@ version = "0.10.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5e534d133a060a3c19daec1eb3e98ec6f4685978834f2dbadfe2ec215bab64e" dependencies = [ - "bitflags", + "bitflags 2.8.0", "cfg-if", "foreign-types", "libc", @@ -923,6 +1006,31 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -965,6 +1073,15 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "regex" version = "1.11.1" @@ -1065,7 +1182,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags", + "bitflags 2.8.0", "errno", "libc", "linux-raw-sys", @@ -1132,13 +1249,19 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "security-framework" version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags", + "bitflags 2.8.0", "core-foundation", "core-foundation-sys", "libc", @@ -1224,6 +1347,22 @@ dependencies = [ "autocfg", ] +[[package]] +name = "sled" +version = "0.34.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" +dependencies = [ + "crc32fast", + "crossbeam-epoch", + "crossbeam-utils", + "fs2", + "fxhash", + "libc", + "log", + "parking_lot", +] + [[package]] name = "smallvec" version = "1.13.2" @@ -1295,7 +1434,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags", + "bitflags 2.8.0", "core-foundation", "system-configuration-sys", ] @@ -1496,6 +1635,7 @@ name = "url_parser" version = "0.1.0" dependencies = [ "axum", + "bincode", "dotenv", "env_logger", "envy", @@ -1503,9 +1643,9 @@ dependencies = [ "log", "reqwest", "serde", + "sled", "thiserror", "tokio", - "url", ] [[package]] @@ -1637,6 +1777,28 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-registry" version = "0.2.0" @@ -1755,7 +1917,7 @@ version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" dependencies = [ - "bitflags", + "bitflags 2.8.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index a20d689..781dd0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] axum = { version = "0.8.1", features = ["macros"] } +bincode = "1.3.3" dotenv = "0.15.0" env_logger = "0.11.6" envy = "0.4.2" @@ -12,6 +13,6 @@ futures = "0.3.31" log = "0.4.25" reqwest = "0.12.12" serde = { version = "1.0.217", features = ["derive"] } +sled = "0.34.7" thiserror = "2.0.11" tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread"] } -url = "2.5.4" diff --git a/src/configs.rs b/src/configs.rs index e36b2c8..7499d65 100644 --- a/src/configs.rs +++ b/src/configs.rs @@ -3,6 +3,7 @@ use serde::Deserialize; #[derive(Debug, Deserialize)] pub struct Config { pub url_handlers_pool_size: usize, + pub port: u16, } pub fn get_configs() -> Result { diff --git a/src/error.rs b/src/error.rs index 6623ada..0a09ad3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,3 +1,7 @@ +use std::string::FromUtf8Error; + +use axum::{http::StatusCode, response::IntoResponse}; + pub type AppResult = Result; #[derive(Debug, thiserror::Error)] @@ -5,17 +9,26 @@ pub enum AppError { #[error(transparent)] ConfigError(#[from] envy::Error), - #[error( - "Usage:\n\t url_parser -f \n\t url_parser -u url1 url2 url3" - )] - UsageError, - #[error(transparent)] IOError(#[from] std::io::Error), - #[error("Bad url '{0}': {1}")] - InvalidUrl(String, url::ParseError), - #[error(transparent)] ReqwestError(#[from] reqwest::Error), + + #[error(transparent)] + SledError(#[from] sled::Error), + + #[error(transparent)] + BincodeError(#[from] bincode::Error), + + #[error(transparent)] + Utf8Error(#[from] FromUtf8Error), +} + +impl IntoResponse for AppError { + fn into_response(self) -> axum::response::Response { + let err = self.to_string(); + + (StatusCode::INTERNAL_SERVER_ERROR, err).into_response() + } } diff --git a/src/handlers.rs b/src/handlers.rs index bdca108..978eaf4 100644 --- a/src/handlers.rs +++ b/src/handlers.rs @@ -1,16 +1,34 @@ +use std::collections::HashSet; + use axum::{extract::State, response::IntoResponse, Json}; use crate::{ error::AppResult, - models::{AppStateShared, UrlReportResponse, UrlRequest, UrlResponse}, + models::{AppStateShared, UrlReportResponse, UrlRequest, UrlResult}, + repo::Repo, }; -pub async fn get_urls() {} +pub async fn get_urls( + State(state): State>, + req: Option>, +) -> AppResult { + let urls = req.map(|urls| { + urls.0 + .urls + .into_iter() + .map(|u| u.to_string()) + .collect::>() + }); + let r_state = state.read().await; + let result = r_state.repo.list_urls(urls.as_ref())?; + + Ok(Json(result)) +} -pub async fn upload_urls( - State(state): State, +pub async fn upload_urls( + State(state): State>, Json(payload): Json, -) -> impl IntoResponse { +) -> AppResult { let urls = payload.urls; let tasks = { @@ -32,13 +50,13 @@ pub async fn upload_urls( .await; match response { - Ok((status, resp)) => UrlResponse { + Ok((status, resp)) => UrlResult { status: Some(status), url, error_msg: None, content_length: resp.len(), }, - Err(err) => UrlResponse { + Err(err) => UrlResult { status: None, url, error_msg: Some(err.to_string()), @@ -63,11 +81,27 @@ pub async fn upload_urls( failures.push(result); } } + state.write().await.repo.insert_urls(&successes)?; - Json(UrlReportResponse { + Ok(Json(UrlReportResponse { successes, failures, - }) + })) } -pub async fn delete_urls() {} +pub async fn delete_urls( + State(state): State>, + req: Option>, +) -> AppResult<()> { + let urls = req.map(|urls| { + urls.0 + .urls + .into_iter() + .map(|u| u.to_string()) + .collect::>() + }); + + state.write().await.repo.delete_urls(urls.as_ref())?; + + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index f5f2b03..35e1b0f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,90 +4,52 @@ mod handlers; mod models; mod repo; -use std::{fs, sync::Arc}; +use std::sync::Arc; + +use axum::{routing::post, Router}; -use axum::{routing::get, Router}; -use configs::Config; use error::{AppError, AppResult}; use models::AppState; -use reqwest::Url; +use repo::Repo; use tokio::{net::TcpListener, sync::RwLock}; #[macro_use] extern crate log; -/// Parse urls and return 2 lists: valid urls and errors -fn parse_urls(url_list: &[String]) -> (Vec, Vec) { - let (valid, invalid): (Vec<_>, Vec<_>) = url_list - .into_iter() - .map(|url| Url::parse(&url).map_err(|e| AppError::InvalidUrl(url.to_string(), e))) - .partition(Result::is_ok); - - ( - valid.into_iter().map(Result::unwrap).collect(), - invalid - .into_iter() - .map(|e| e.unwrap_err().to_string()) - .collect(), - ) -} - -/// Extract raw urls from different sources: -/// - from args list -/// - from file -fn parse_urls_from_args( - args: impl IntoIterator, -) -> AppResult<(Vec, Vec)> { - let mut args_iter = args.into_iter(); - - match args_iter.nth(1) { - Some(action) => match action.as_str() { - "-f" => { - let Some(source) = args_iter.next() else { - return Err(AppError::UsageError); - }; - println!("Reading file '{source}'"); - let raw_urls = fs::read_to_string(source)? - .split('\n') - .map(|s| s.to_owned()) - .collect::>(); - - Ok(parse_urls(&raw_urls)) - } - "-u" => { - let urls_list = args_iter.collect::>(); - Ok(parse_urls(&urls_list)) - } - _ => return Err(AppError::UsageError), - }, - None => return Err(AppError::UsageError), - } -} - -async fn process_urls(config: &Config, urls: &[Url]) {} - async fn run() -> AppResult<()> { dotenv::dotenv().ok(); let cfg = configs::get_configs()?; info!("{cfg:?}"); - // let (valid_urls, errors) = parse_urls_from_args(args)?; - let state = Arc::new(RwLock::new(AppState::new(cfg.url_handlers_pool_size))); + let repo = create_repo()?; + let state = Arc::new(RwLock::new(AppState::new(cfg.url_handlers_pool_size, repo))); let app = Router::new() .route( "/urls", - get(handlers::get_urls) - .post(handlers::upload_urls) - .delete(handlers::delete_urls), + post(|state, body| handlers::get_urls(state, Some(body))) + .get(|state| handlers::get_urls(state, None)), + ) + .route("/urls/upload", post(handlers::upload_urls)) + .route( + "/urls/delete", + post(|state, req| handlers::delete_urls(state, Some(req))) + .get(|state| handlers::delete_urls(state, None)), ) .with_state(state); - let listener = TcpListener::bind("0.0.0.0:8000").await?; + + let bind_addr = format!("0.0.0.0:{}", cfg.port); + let listener = TcpListener::bind(bind_addr).await?; axum::serve(listener, app).await.map_err(AppError::from) } +fn create_repo() -> AppResult { + let config = sled::Config::new().temporary(true); + config.open().map_err(AppError::from) +} + #[tokio::main] async fn main() { env_logger::init(); diff --git a/src/models.rs b/src/models.rs index 8f7f260..b4f7b1a 100644 --- a/src/models.rs +++ b/src/models.rs @@ -1,28 +1,29 @@ -use std::{collections::HashMap, sync::Arc}; +use std::{collections::HashSet, sync::Arc}; -use axum::http::{uri::Scheme, Uri}; +use axum::http::Uri; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use tokio::sync::{RwLock, Semaphore}; -pub type AppStateShared = Arc>; +use crate::repo::Repo; -pub struct AppState { - /// url: content - pub cache: HashMap, +pub type AppStateShared = Arc>>; + +pub struct AppState { + pub repo: R, pub url_handling_semaphore: Semaphore, } -impl AppState { - pub fn new(url_handlers_pool_size: usize) -> Self { +impl AppState { + pub fn new(url_handlers_pool_size: usize, repo: R) -> Self { Self { - cache: HashMap::new(), + repo, url_handling_semaphore: Semaphore::new(url_handlers_pool_size), } } } #[derive(Serialize)] -pub struct UrlResponse { +pub struct UrlResult { pub status: Option, #[serde(serialize_with = "serialize_uri")] pub url: Uri, @@ -36,26 +37,26 @@ fn serialize_uri(uri: &Uri, serializer: S) -> Result, - pub failures: Vec, + pub successes: Vec, + pub failures: Vec, } #[derive(Deserialize)] pub struct UrlRequest { #[serde(deserialize_with = "deser_uri")] - pub urls: Vec, + pub urls: HashSet, } -fn deser_uri<'de, D>(deserializer: D) -> Result, D::Error> +fn deser_uri<'de, D>(deserializer: D) -> Result, D::Error> where D: Deserializer<'de>, { - let raw_urls = Vec::::deserialize(deserializer)?; + let raw_urls = HashSet::::deserialize(deserializer)?; raw_urls .into_iter() .map(|url| { Uri::try_from(&url) .map_err(|e| serde::de::Error::custom(format!("{}, url: '{url}'", e.to_string()))) }) - .collect::, _>>() + .collect::, _>>() } diff --git a/src/repo.rs b/src/repo.rs index e69de29..e8aa4ab 100644 --- a/src/repo.rs +++ b/src/repo.rs @@ -0,0 +1,67 @@ +use std::collections::{HashMap, HashSet}; + +use sled::{Batch, Db}; + +use crate::{ + error::{AppError, AppResult}, + models::UrlResult, +}; + +pub trait Repo: Send + Sync + 'static { + fn insert_urls(&self, urls: &[UrlResult]) -> AppResult<()>; + + fn delete_urls(&self, urls: Option<&HashSet>) -> AppResult<()>; + + fn list_urls(&self, urls: Option<&HashSet>) -> AppResult>; +} + +impl Repo for Db { + fn insert_urls(&self, urls: &[UrlResult]) -> AppResult<()> { + let mut batch = Batch::default(); + for url in urls { + batch.insert( + url.url.to_string().as_bytes(), + bincode::serialize(&url.content_length)?, + ); + } + self.apply_batch(batch).map_err(AppError::from) + } + + fn delete_urls(&self, urls: Option<&HashSet>) -> AppResult<()> { + match urls { + Some(urls) => { + for url in urls { + self.remove(url.as_bytes())?; + } + + Ok(()) + } + None => self.clear().map_err(AppError::from), + } + } + + fn list_urls(&self, urls: Option<&HashSet>) -> AppResult> { + match urls { + Some(urls) => { + let mut result = HashMap::new(); + + for url in urls { + let Some(val) = self.get(url.as_bytes())? else { + continue; + }; + result.insert(url.to_owned(), bincode::deserialize(&val)?); + } + Ok(result) + } + None => self + .iter() + .map(|result| { + let (k, v) = result?; + let key = String::from_utf8(k.to_vec())?; + let val = bincode::deserialize(&v)?; + Ok((key, val)) + }) + .collect::>(), + } + } +}