You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
268 lines
6.8 KiB
268 lines
6.8 KiB
#[cfg(test)]
|
|
mod tests;
|
|
|
|
use std::iter::Peekable;
|
|
use std::ops::{Deref, RangeInclusive};
|
|
use std::usize;
|
|
use std::vec::IntoIter;
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub enum Location {
|
|
Char {
|
|
line: usize,
|
|
col: usize,
|
|
},
|
|
String {
|
|
lines: RangeInclusive<usize>,
|
|
cols: RangeInclusive<usize>,
|
|
},
|
|
All,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum ParseError {
|
|
UnexpectedParenClose(Location),
|
|
UnexpectedEof,
|
|
}
|
|
|
|
impl Location {
|
|
/// Since [`Localised`] doesn't test for location match, allow for creating simple dummy
|
|
/// locations for testing
|
|
#[cfg(test)]
|
|
pub fn dummy() -> Location {
|
|
Self::Char { line: 0, col: 0 }
|
|
}
|
|
|
|
pub fn range(start: &Location, end: &Location) -> Location {
|
|
Location::String {
|
|
lines: start.line_start()..=end.line_end(),
|
|
cols: start.col_start()..=end.col_end(),
|
|
}
|
|
}
|
|
|
|
pub fn line_start(&self) -> usize {
|
|
match self {
|
|
Location::Char { line, .. } => *line,
|
|
Location::String { lines, .. } => *lines.start(),
|
|
Location::All => 0,
|
|
}
|
|
}
|
|
|
|
pub fn col_start(&self) -> usize {
|
|
match self {
|
|
Location::Char { col, .. } => *col,
|
|
Location::String { cols, .. } => *cols.start(),
|
|
Location::All => 0,
|
|
}
|
|
}
|
|
|
|
pub fn line_end(&self) -> usize {
|
|
match self {
|
|
Location::Char { line, .. } => *line,
|
|
Location::String { lines, .. } => *lines.end(),
|
|
Location::All => usize::MAX,
|
|
}
|
|
}
|
|
|
|
pub fn col_end(&self) -> usize {
|
|
match self {
|
|
Location::Char { col, .. } => *col,
|
|
Location::String { cols, .. } => *cols.end(),
|
|
Location::All => usize::MAX,
|
|
}
|
|
}
|
|
|
|
pub fn tail(&self) -> Self {
|
|
match self {
|
|
Location::Char { line, col } => Location::Char {
|
|
line: *line,
|
|
col: *col,
|
|
},
|
|
Location::String { lines, cols } => Location::Char {
|
|
line: *lines.end(),
|
|
col: *cols.end(),
|
|
},
|
|
Location::All => Location::Char {
|
|
line: usize::MAX,
|
|
col: usize::MAX,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct Localised<T: Clone> {
|
|
pub location: Location,
|
|
pub item: T,
|
|
}
|
|
|
|
impl<T: Clone> Localised<T> {
|
|
pub fn into_inner(self) -> T {
|
|
self.item
|
|
}
|
|
}
|
|
|
|
impl<T: Clone> Localised<T> {
|
|
#[cfg(test)]
|
|
pub fn dummy_location(item: T) -> Self {
|
|
Self {
|
|
location: Location::dummy(),
|
|
item,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<T: Clone> Deref for Localised<T> {
|
|
type Target = T;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.item
|
|
}
|
|
}
|
|
|
|
impl<T: PartialEq + Clone> PartialEq for Localised<T> {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.item.eq(&other.item)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Clone)]
|
|
pub enum Token {
|
|
LeftParen,
|
|
RightParen,
|
|
Symbol(String),
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Clone)]
|
|
pub enum Ast {
|
|
Symbol(Localised<String>),
|
|
List(Localised<Vec<Ast>>),
|
|
Root(Vec<Ast>),
|
|
}
|
|
|
|
impl Ast {
|
|
pub fn symbol(self) -> Option<String> {
|
|
match self {
|
|
Ast::Symbol(Localised { item, .. }) => Some(item),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
pub fn list(self) -> Option<Vec<Ast>> {
|
|
match self {
|
|
Ast::List(Localised { item, .. }) => Some(item),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
pub fn location(&self) -> &Location {
|
|
match self {
|
|
Ast::Symbol(Localised { location, .. }) => location,
|
|
Ast::List(Localised { location, .. }) => location,
|
|
Ast::Root(_) => &Location::All,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn tokenize(input: &str) -> Vec<Localised<Token>> {
|
|
let mut tokens = Vec::new();
|
|
// let mut chars = input.chars().peekable();
|
|
let mut chars = (1..)
|
|
.zip(input.split('\n'))
|
|
.flat_map(|(l_num, l)| {
|
|
(1..).zip(l.chars()).map(move |(c_num, c)| Localised {
|
|
location: Location::Char {
|
|
line: l_num,
|
|
col: c_num,
|
|
},
|
|
item: c,
|
|
})
|
|
})
|
|
.peekable();
|
|
while let Some(Localised { location, item: c }) = chars.next() {
|
|
match c {
|
|
'(' => tokens.push(Localised {
|
|
location,
|
|
item: Token::LeftParen,
|
|
}),
|
|
')' => tokens.push(Localised {
|
|
location,
|
|
item: Token::RightParen,
|
|
}),
|
|
_ if c.is_whitespace() => (),
|
|
_ => {
|
|
let start = location.clone();
|
|
let mut end = location;
|
|
let mut symbol = c.to_string();
|
|
while let Some(Localised { item: c, .. }) = chars.peek() {
|
|
if c.is_whitespace() || *c == '(' || *c == ')' {
|
|
break;
|
|
}
|
|
symbol.push(*c);
|
|
let Localised { location, .. } = chars.next().unwrap();
|
|
end = location;
|
|
}
|
|
tokens.push(Localised {
|
|
location: Location::range(&start, &end),
|
|
item: Token::Symbol(symbol),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
tokens
|
|
}
|
|
|
|
fn parse_expr(tokens: &mut Peekable<IntoIter<Localised<Token>>>) -> Result<Ast, ParseError> {
|
|
match tokens.next() {
|
|
Some(Localised {
|
|
location: start,
|
|
item: Token::LeftParen,
|
|
}) => {
|
|
let mut list = Vec::new();
|
|
while !matches!(
|
|
tokens.peek(),
|
|
Some(Localised {
|
|
item: Token::RightParen,
|
|
..
|
|
})
|
|
) {
|
|
list.push(parse_expr(tokens)?);
|
|
}
|
|
let Some(Localised {
|
|
location: end,
|
|
item: Token::RightParen,
|
|
}) = tokens.next()
|
|
else {
|
|
unreachable!()
|
|
};
|
|
Ok(Ast::List(Localised {
|
|
location: Location::range(&start, &end),
|
|
item: list,
|
|
}))
|
|
}
|
|
Some(Localised {
|
|
location,
|
|
item: Token::RightParen,
|
|
}) => Err(ParseError::UnexpectedParenClose(location)),
|
|
Some(Localised {
|
|
location,
|
|
item: Token::Symbol(s),
|
|
}) => Ok(Ast::Symbol(Localised { location, item: s })),
|
|
None => Err(ParseError::UnexpectedEof),
|
|
}
|
|
}
|
|
|
|
pub fn parse(tokens: Vec<Localised<Token>>) -> Result<Ast, ParseError> {
|
|
let mut tokens = tokens.into_iter().peekable();
|
|
let mut ast: Vec<Ast> = Vec::new();
|
|
while tokens.peek().is_some() {
|
|
ast.push(parse_expr(&mut tokens)?);
|
|
}
|
|
Ok(Ast::Root(ast))
|
|
}
|
|
|
|
pub fn parse_string(src: &str) -> Result<Ast, ParseError> {
|
|
let tokens = tokenize(src);
|
|
parse(tokens)
|
|
}
|