You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

268 lines
6.8 KiB

#[cfg(test)]
mod tests;
use std::iter::Peekable;
use std::ops::{Deref, RangeInclusive};
use std::usize;
use std::vec::IntoIter;
#[derive(Clone, Debug)]
pub enum Location {
Char {
line: usize,
col: usize,
},
String {
lines: RangeInclusive<usize>,
cols: RangeInclusive<usize>,
},
All,
}
#[derive(Debug)]
pub enum ParseError {
UnexpectedParenClose(Location),
UnexpectedEof,
}
impl Location {
/// Since [`Localised`] doesn't test for location match, allow for creating simple dummy
/// locations for testing
#[cfg(test)]
pub fn dummy() -> Location {
Self::Char { line: 0, col: 0 }
}
pub fn range(start: &Location, end: &Location) -> Location {
Location::String {
lines: start.line_start()..=end.line_end(),
cols: start.col_start()..=end.col_end(),
}
}
pub fn line_start(&self) -> usize {
match self {
Location::Char { line, .. } => *line,
Location::String { lines, .. } => *lines.start(),
Location::All => 0,
}
}
pub fn col_start(&self) -> usize {
match self {
Location::Char { col, .. } => *col,
Location::String { cols, .. } => *cols.start(),
Location::All => 0,
}
}
pub fn line_end(&self) -> usize {
match self {
Location::Char { line, .. } => *line,
Location::String { lines, .. } => *lines.end(),
Location::All => usize::MAX,
}
}
pub fn col_end(&self) -> usize {
match self {
Location::Char { col, .. } => *col,
Location::String { cols, .. } => *cols.end(),
Location::All => usize::MAX,
}
}
pub fn tail(&self) -> Self {
match self {
Location::Char { line, col } => Location::Char {
line: *line,
col: *col,
},
Location::String { lines, cols } => Location::Char {
line: *lines.end(),
col: *cols.end(),
},
Location::All => Location::Char {
line: usize::MAX,
col: usize::MAX,
},
}
}
}
#[derive(Debug, Clone)]
pub struct Localised<T: Clone> {
pub location: Location,
pub item: T,
}
impl<T: Clone> Localised<T> {
pub fn into_inner(self) -> T {
self.item
}
}
impl<T: Clone> Localised<T> {
#[cfg(test)]
pub fn dummy_location(item: T) -> Self {
Self {
location: Location::dummy(),
item,
}
}
}
impl<T: Clone> Deref for Localised<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
&self.item
}
}
impl<T: PartialEq + Clone> PartialEq for Localised<T> {
fn eq(&self, other: &Self) -> bool {
self.item.eq(&other.item)
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
LeftParen,
RightParen,
Symbol(String),
}
#[derive(Debug, PartialEq, Clone)]
pub enum Ast {
Symbol(Localised<String>),
List(Localised<Vec<Ast>>),
Root(Vec<Ast>),
}
impl Ast {
pub fn symbol(self) -> Option<String> {
match self {
Ast::Symbol(Localised { item, .. }) => Some(item),
_ => None,
}
}
pub fn list(self) -> Option<Vec<Ast>> {
match self {
Ast::List(Localised { item, .. }) => Some(item),
_ => None,
}
}
pub fn location(&self) -> &Location {
match self {
Ast::Symbol(Localised { location, .. }) => location,
Ast::List(Localised { location, .. }) => location,
Ast::Root(_) => &Location::All,
}
}
}
pub fn tokenize(input: &str) -> Vec<Localised<Token>> {
let mut tokens = Vec::new();
// let mut chars = input.chars().peekable();
let mut chars = (1..)
.zip(input.split('\n'))
.flat_map(|(l_num, l)| {
(1..).zip(l.chars()).map(move |(c_num, c)| Localised {
location: Location::Char {
line: l_num,
col: c_num,
},
item: c,
})
})
.peekable();
while let Some(Localised { location, item: c }) = chars.next() {
match c {
'(' => tokens.push(Localised {
location,
item: Token::LeftParen,
}),
')' => tokens.push(Localised {
location,
item: Token::RightParen,
}),
_ if c.is_whitespace() => (),
_ => {
let start = location.clone();
let mut end = location;
let mut symbol = c.to_string();
while let Some(Localised { item: c, .. }) = chars.peek() {
if c.is_whitespace() || *c == '(' || *c == ')' {
break;
}
symbol.push(*c);
let Localised { location, .. } = chars.next().unwrap();
end = location;
}
tokens.push(Localised {
location: Location::range(&start, &end),
item: Token::Symbol(symbol),
});
}
}
}
tokens
}
fn parse_expr(tokens: &mut Peekable<IntoIter<Localised<Token>>>) -> Result<Ast, ParseError> {
match tokens.next() {
Some(Localised {
location: start,
item: Token::LeftParen,
}) => {
let mut list = Vec::new();
while !matches!(
tokens.peek(),
Some(Localised {
item: Token::RightParen,
..
})
) {
list.push(parse_expr(tokens)?);
}
let Some(Localised {
location: end,
item: Token::RightParen,
}) = tokens.next()
else {
unreachable!()
};
Ok(Ast::List(Localised {
location: Location::range(&start, &end),
item: list,
}))
}
Some(Localised {
location,
item: Token::RightParen,
}) => Err(ParseError::UnexpectedParenClose(location)),
Some(Localised {
location,
item: Token::Symbol(s),
}) => Ok(Ast::Symbol(Localised { location, item: s })),
None => Err(ParseError::UnexpectedEof),
}
}
pub fn parse(tokens: Vec<Localised<Token>>) -> Result<Ast, ParseError> {
let mut tokens = tokens.into_iter().peekable();
let mut ast: Vec<Ast> = Vec::new();
while tokens.peek().is_some() {
ast.push(parse_expr(&mut tokens)?);
}
Ok(Ast::Root(ast))
}
pub fn parse_string(src: &str) -> Result<Ast, ParseError> {
let tokens = tokenize(src);
parse(tokens)
}