parent
dfff391ebc
commit
b587cde27b
9 changed files with 2111 additions and 2 deletions
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,10 @@ |
||||
use serde::Deserialize; |
||||
|
||||
#[derive(Debug, Deserialize)] |
||||
pub struct Config { |
||||
pub url_handlers_pool_size: usize, |
||||
} |
||||
|
||||
pub fn get_configs() -> Result<Config, envy::Error> { |
||||
envy::from_env() |
||||
} |
@ -0,0 +1,21 @@ |
||||
pub type AppResult<R> = Result<R, AppError>; |
||||
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum AppError { |
||||
#[error(transparent)] |
||||
ConfigError(#[from] envy::Error), |
||||
|
||||
#[error(
|
||||
"Usage:\n\t url_parser -f <file with urls splitted by \\n> \n\t url_parser -u url1 url2 url3" |
||||
)] |
||||
UsageError, |
||||
|
||||
#[error(transparent)] |
||||
IOError(#[from] std::io::Error), |
||||
|
||||
#[error("Bad url '{0}': {1}")] |
||||
InvalidUrl(String, url::ParseError), |
||||
|
||||
#[error(transparent)] |
||||
ReqwestError(#[from] reqwest::Error), |
||||
} |
@ -0,0 +1,73 @@ |
||||
use axum::{extract::State, response::IntoResponse, Json}; |
||||
|
||||
use crate::{ |
||||
error::AppResult, |
||||
models::{AppStateShared, UrlReportResponse, UrlRequest, UrlResponse}, |
||||
}; |
||||
|
||||
pub async fn get_urls() {} |
||||
|
||||
pub async fn upload_urls( |
||||
State(state): State<AppStateShared>, |
||||
Json(payload): Json<UrlRequest>, |
||||
) -> impl IntoResponse { |
||||
let urls = payload.urls; |
||||
|
||||
let tasks = { |
||||
urls.into_iter() |
||||
.map(|url| { |
||||
let state = state.clone(); |
||||
tokio::spawn(async move { |
||||
let r_state = state.read().await; |
||||
let _sem = r_state.url_handling_semaphore.acquire().await.unwrap(); |
||||
info!("fetching {url}"); |
||||
|
||||
let response: AppResult<_> = (|| async { |
||||
let resp = reqwest::get(url.to_string()).await?; |
||||
let status = resp.status().as_u16(); |
||||
let result = resp.text().await?; |
||||
|
||||
Ok((status, result)) |
||||
})() |
||||
.await; |
||||
|
||||
match response { |
||||
Ok((status, resp)) => UrlResponse { |
||||
status: Some(status), |
||||
url, |
||||
error_msg: None, |
||||
content_length: resp.len(), |
||||
}, |
||||
Err(err) => UrlResponse { |
||||
status: None, |
||||
url, |
||||
error_msg: Some(err.to_string()), |
||||
content_length: 0, |
||||
}, |
||||
} |
||||
}) |
||||
}) |
||||
.collect::<Vec<_>>() |
||||
}; |
||||
|
||||
let mut successes = vec![]; |
||||
let mut failures = vec![]; |
||||
|
||||
for task in tasks { |
||||
let result = task.await.unwrap(); |
||||
let status = result.status.unwrap_or(0); |
||||
|
||||
if status >= 200 && status < 400 { |
||||
successes.push(result); |
||||
} else { |
||||
failures.push(result); |
||||
} |
||||
} |
||||
|
||||
Json(UrlReportResponse { |
||||
successes, |
||||
failures, |
||||
}) |
||||
} |
||||
|
||||
pub async fn delete_urls() {} |
@ -1,3 +1,98 @@ |
||||
fn main() { |
||||
println!("Hello, world!"); |
||||
mod configs; |
||||
mod error; |
||||
mod handlers; |
||||
mod models; |
||||
mod repo; |
||||
|
||||
use std::{fs, sync::Arc}; |
||||
|
||||
use axum::{routing::get, Router}; |
||||
use configs::Config; |
||||
use error::{AppError, AppResult}; |
||||
use models::AppState; |
||||
use reqwest::Url; |
||||
use tokio::{net::TcpListener, sync::RwLock}; |
||||
|
||||
#[macro_use] |
||||
extern crate log; |
||||
|
||||
/// Parse urls and return 2 lists: valid urls and errors
|
||||
fn parse_urls(url_list: &[String]) -> (Vec<Url>, Vec<String>) { |
||||
let (valid, invalid): (Vec<_>, Vec<_>) = url_list |
||||
.into_iter() |
||||
.map(|url| Url::parse(&url).map_err(|e| AppError::InvalidUrl(url.to_string(), e))) |
||||
.partition(Result::is_ok); |
||||
|
||||
( |
||||
valid.into_iter().map(Result::unwrap).collect(), |
||||
invalid |
||||
.into_iter() |
||||
.map(|e| e.unwrap_err().to_string()) |
||||
.collect(), |
||||
) |
||||
} |
||||
|
||||
/// Extract raw urls from different sources:
|
||||
/// - from args list
|
||||
/// - from file
|
||||
fn parse_urls_from_args( |
||||
args: impl IntoIterator<Item = String>, |
||||
) -> AppResult<(Vec<Url>, Vec<String>)> { |
||||
let mut args_iter = args.into_iter(); |
||||
|
||||
match args_iter.nth(1) { |
||||
Some(action) => match action.as_str() { |
||||
"-f" => { |
||||
let Some(source) = args_iter.next() else { |
||||
return Err(AppError::UsageError); |
||||
}; |
||||
println!("Reading file '{source}'"); |
||||
let raw_urls = fs::read_to_string(source)? |
||||
.split('\n') |
||||
.map(|s| s.to_owned()) |
||||
.collect::<Vec<_>>(); |
||||
|
||||
Ok(parse_urls(&raw_urls)) |
||||
} |
||||
"-u" => { |
||||
let urls_list = args_iter.collect::<Vec<_>>(); |
||||
Ok(parse_urls(&urls_list)) |
||||
} |
||||
_ => return Err(AppError::UsageError), |
||||
}, |
||||
None => return Err(AppError::UsageError), |
||||
} |
||||
} |
||||
|
||||
async fn process_urls(config: &Config, urls: &[Url]) {} |
||||
|
||||
async fn run() -> AppResult<()> { |
||||
dotenv::dotenv().ok(); |
||||
|
||||
let cfg = configs::get_configs()?; |
||||
info!("{cfg:?}"); |
||||
|
||||
// let (valid_urls, errors) = parse_urls_from_args(args)?;
|
||||
let state = Arc::new(RwLock::new(AppState::new(cfg.url_handlers_pool_size))); |
||||
|
||||
let app = Router::new() |
||||
.route( |
||||
"/urls", |
||||
get(handlers::get_urls) |
||||
.post(handlers::upload_urls) |
||||
.delete(handlers::delete_urls), |
||||
) |
||||
.with_state(state); |
||||
let listener = TcpListener::bind("0.0.0.0:8000").await?; |
||||
|
||||
axum::serve(listener, app).await.map_err(AppError::from) |
||||
} |
||||
|
||||
#[tokio::main] |
||||
async fn main() { |
||||
env_logger::init(); |
||||
|
||||
if let Err(e) = run().await { |
||||
println!("{e}"); |
||||
} |
||||
} |
||||
|
@ -0,0 +1,61 @@ |
||||
use std::{collections::HashMap, sync::Arc}; |
||||
|
||||
use axum::http::{uri::Scheme, Uri}; |
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer}; |
||||
use tokio::sync::{RwLock, Semaphore}; |
||||
|
||||
pub type AppStateShared = Arc<RwLock<AppState>>; |
||||
|
||||
pub struct AppState { |
||||
/// url: content
|
||||
pub cache: HashMap<String, String>, |
||||
pub url_handling_semaphore: Semaphore, |
||||
} |
||||
|
||||
impl AppState { |
||||
pub fn new(url_handlers_pool_size: usize) -> Self { |
||||
Self { |
||||
cache: HashMap::new(), |
||||
url_handling_semaphore: Semaphore::new(url_handlers_pool_size), |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[derive(Serialize)] |
||||
pub struct UrlResponse { |
||||
pub status: Option<u16>, |
||||
#[serde(serialize_with = "serialize_uri")] |
||||
pub url: Uri, |
||||
pub content_length: usize, |
||||
pub error_msg: Option<String>, |
||||
} |
||||
|
||||
fn serialize_uri<S: Serializer>(uri: &Uri, serializer: S) -> Result<S::Ok, S::Error> { |
||||
serializer.serialize_str(&uri.to_string()) |
||||
} |
||||
|
||||
#[derive(Serialize)] |
||||
pub struct UrlReportResponse { |
||||
pub successes: Vec<UrlResponse>, |
||||
pub failures: Vec<UrlResponse>, |
||||
} |
||||
|
||||
#[derive(Deserialize)] |
||||
pub struct UrlRequest { |
||||
#[serde(deserialize_with = "deser_uri")] |
||||
pub urls: Vec<Uri>, |
||||
} |
||||
|
||||
fn deser_uri<'de, D>(deserializer: D) -> Result<Vec<Uri>, D::Error> |
||||
where |
||||
D: Deserializer<'de>, |
||||
{ |
||||
let raw_urls = Vec::<String>::deserialize(deserializer)?; |
||||
raw_urls |
||||
.into_iter() |
||||
.map(|url| { |
||||
Uri::try_from(&url) |
||||
.map_err(|e| serde::de::Error::custom(format!("{}, url: '{url}'", e.to_string()))) |
||||
}) |
||||
.collect::<Result<Vec<_>, _>>() |
||||
} |
Loading…
Reference in new issue