implement tasks with axum

master
plazmoid 1 month ago
parent dfff391ebc
commit b587cde27b
  1. 1
      .env
  2. 1837
      Cargo.lock
  3. 11
      Cargo.toml
  4. 10
      src/configs.rs
  5. 21
      src/error.rs
  6. 73
      src/handlers.rs
  7. 99
      src/main.rs
  8. 61
      src/models.rs
  9. 0
      src/repo.rs

@ -0,0 +1 @@
export URL_HANDLERS_POOL_SIZE=10

1837
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -4,3 +4,14 @@ version = "0.1.0"
edition = "2021"
[dependencies]
axum = { version = "0.8.1", features = ["macros"] }
dotenv = "0.15.0"
env_logger = "0.11.6"
envy = "0.4.2"
futures = "0.3.31"
log = "0.4.25"
reqwest = "0.12.12"
serde = { version = "1.0.217", features = ["derive"] }
thiserror = "2.0.11"
tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread"] }
url = "2.5.4"

@ -0,0 +1,10 @@
use serde::Deserialize;
#[derive(Debug, Deserialize)]
pub struct Config {
pub url_handlers_pool_size: usize,
}
pub fn get_configs() -> Result<Config, envy::Error> {
envy::from_env()
}

@ -0,0 +1,21 @@
pub type AppResult<R> = Result<R, AppError>;
#[derive(Debug, thiserror::Error)]
pub enum AppError {
#[error(transparent)]
ConfigError(#[from] envy::Error),
#[error(
"Usage:\n\t url_parser -f <file with urls splitted by \\n> \n\t url_parser -u url1 url2 url3"
)]
UsageError,
#[error(transparent)]
IOError(#[from] std::io::Error),
#[error("Bad url '{0}': {1}")]
InvalidUrl(String, url::ParseError),
#[error(transparent)]
ReqwestError(#[from] reqwest::Error),
}

@ -0,0 +1,73 @@
use axum::{extract::State, response::IntoResponse, Json};
use crate::{
error::AppResult,
models::{AppStateShared, UrlReportResponse, UrlRequest, UrlResponse},
};
pub async fn get_urls() {}
pub async fn upload_urls(
State(state): State<AppStateShared>,
Json(payload): Json<UrlRequest>,
) -> impl IntoResponse {
let urls = payload.urls;
let tasks = {
urls.into_iter()
.map(|url| {
let state = state.clone();
tokio::spawn(async move {
let r_state = state.read().await;
let _sem = r_state.url_handling_semaphore.acquire().await.unwrap();
info!("fetching {url}");
let response: AppResult<_> = (|| async {
let resp = reqwest::get(url.to_string()).await?;
let status = resp.status().as_u16();
let result = resp.text().await?;
Ok((status, result))
})()
.await;
match response {
Ok((status, resp)) => UrlResponse {
status: Some(status),
url,
error_msg: None,
content_length: resp.len(),
},
Err(err) => UrlResponse {
status: None,
url,
error_msg: Some(err.to_string()),
content_length: 0,
},
}
})
})
.collect::<Vec<_>>()
};
let mut successes = vec![];
let mut failures = vec![];
for task in tasks {
let result = task.await.unwrap();
let status = result.status.unwrap_or(0);
if status >= 200 && status < 400 {
successes.push(result);
} else {
failures.push(result);
}
}
Json(UrlReportResponse {
successes,
failures,
})
}
pub async fn delete_urls() {}

@ -1,3 +1,98 @@
fn main() {
println!("Hello, world!");
mod configs;
mod error;
mod handlers;
mod models;
mod repo;
use std::{fs, sync::Arc};
use axum::{routing::get, Router};
use configs::Config;
use error::{AppError, AppResult};
use models::AppState;
use reqwest::Url;
use tokio::{net::TcpListener, sync::RwLock};
#[macro_use]
extern crate log;
/// Parse urls and return 2 lists: valid urls and errors
fn parse_urls(url_list: &[String]) -> (Vec<Url>, Vec<String>) {
let (valid, invalid): (Vec<_>, Vec<_>) = url_list
.into_iter()
.map(|url| Url::parse(&url).map_err(|e| AppError::InvalidUrl(url.to_string(), e)))
.partition(Result::is_ok);
(
valid.into_iter().map(Result::unwrap).collect(),
invalid
.into_iter()
.map(|e| e.unwrap_err().to_string())
.collect(),
)
}
/// Extract raw urls from different sources:
/// - from args list
/// - from file
fn parse_urls_from_args(
args: impl IntoIterator<Item = String>,
) -> AppResult<(Vec<Url>, Vec<String>)> {
let mut args_iter = args.into_iter();
match args_iter.nth(1) {
Some(action) => match action.as_str() {
"-f" => {
let Some(source) = args_iter.next() else {
return Err(AppError::UsageError);
};
println!("Reading file '{source}'");
let raw_urls = fs::read_to_string(source)?
.split('\n')
.map(|s| s.to_owned())
.collect::<Vec<_>>();
Ok(parse_urls(&raw_urls))
}
"-u" => {
let urls_list = args_iter.collect::<Vec<_>>();
Ok(parse_urls(&urls_list))
}
_ => return Err(AppError::UsageError),
},
None => return Err(AppError::UsageError),
}
}
async fn process_urls(config: &Config, urls: &[Url]) {}
async fn run() -> AppResult<()> {
dotenv::dotenv().ok();
let cfg = configs::get_configs()?;
info!("{cfg:?}");
// let (valid_urls, errors) = parse_urls_from_args(args)?;
let state = Arc::new(RwLock::new(AppState::new(cfg.url_handlers_pool_size)));
let app = Router::new()
.route(
"/urls",
get(handlers::get_urls)
.post(handlers::upload_urls)
.delete(handlers::delete_urls),
)
.with_state(state);
let listener = TcpListener::bind("0.0.0.0:8000").await?;
axum::serve(listener, app).await.map_err(AppError::from)
}
#[tokio::main]
async fn main() {
env_logger::init();
if let Err(e) = run().await {
println!("{e}");
}
}

@ -0,0 +1,61 @@
use std::{collections::HashMap, sync::Arc};
use axum::http::{uri::Scheme, Uri};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use tokio::sync::{RwLock, Semaphore};
pub type AppStateShared = Arc<RwLock<AppState>>;
pub struct AppState {
/// url: content
pub cache: HashMap<String, String>,
pub url_handling_semaphore: Semaphore,
}
impl AppState {
pub fn new(url_handlers_pool_size: usize) -> Self {
Self {
cache: HashMap::new(),
url_handling_semaphore: Semaphore::new(url_handlers_pool_size),
}
}
}
#[derive(Serialize)]
pub struct UrlResponse {
pub status: Option<u16>,
#[serde(serialize_with = "serialize_uri")]
pub url: Uri,
pub content_length: usize,
pub error_msg: Option<String>,
}
fn serialize_uri<S: Serializer>(uri: &Uri, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(&uri.to_string())
}
#[derive(Serialize)]
pub struct UrlReportResponse {
pub successes: Vec<UrlResponse>,
pub failures: Vec<UrlResponse>,
}
#[derive(Deserialize)]
pub struct UrlRequest {
#[serde(deserialize_with = "deser_uri")]
pub urls: Vec<Uri>,
}
fn deser_uri<'de, D>(deserializer: D) -> Result<Vec<Uri>, D::Error>
where
D: Deserializer<'de>,
{
let raw_urls = Vec::<String>::deserialize(deserializer)?;
raw_urls
.into_iter()
.map(|url| {
Uri::try_from(&url)
.map_err(|e| serde::de::Error::custom(format!("{}, url: '{url}'", e.to_string())))
})
.collect::<Result<Vec<_>, _>>()
}
Loading…
Cancel
Save