Add error handling and location tracking to the tokenizer and parser

pull/3/head
Avery 3 weeks ago committed by itycodes
parent 5f041f491a
commit b4ab40f10e

@ -1,10 +1,10 @@
use std::fmt::Write; use std::fmt::Write;
pub mod parser;
pub mod compiler; pub mod compiler;
pub mod parser;
fn main() { fn main() {
let mut ops: Vec<(Option<String>, Vec<String>)> = Vec::new(); let mut ops: Vec<(Option<String>, Vec<String>)> = Vec::new();
// OpMemoryModel Logical GLSL450 // OpMemoryModel Logical GLSL450
// OpEntryPoint Fragment %main "main" // OpEntryPoint Fragment %main "main"
// OpExecutionMode %main OriginUpperLeft // OpExecutionMode %main OriginUpperLeft
@ -18,23 +18,88 @@ fn main() {
//%5 = OpLabel //%5 = OpLabel
// OpReturn // OpReturn
// OpFunctionEnd // OpFunctionEnd
ops.push((None, vec!["OpCapability".to_string(), "Shader".to_string()])); ops.push((None, vec!["OpCapability".to_string(), "Shader".to_string()]));
ops.push((Some("%1".to_string()), vec!["OpExtInstImport".to_string(), "\"GLSL.std.450\"".to_string()])); ops.push((
ops.push((None, vec!["OpMemoryModel".to_string(), "Logical".to_string(), "GLSL450".to_string()])); Some("%1".to_string()),
ops.push((None, vec!["OpEntryPoint".to_string(), "Fragment".to_string(), "%main".to_string(), "\"main\"".to_string()])); vec![
ops.push((None, vec!["OpExecutionMode".to_string(), "%main".to_string(), "OriginUpperLeft".to_string()])); "OpExtInstImport".to_string(),
ops.push((None, vec!["OpSource".to_string(), "GLSL".to_string(), "450".to_string()])); "\"GLSL.std.450\"".to_string(),
ops.push((None, vec!["OpSourceExtension".to_string(), "\"GL_GOOGLE_cpp_style_line_directive\"".to_string()])); ],
ops.push((None, vec!["OpSourceExtension".to_string(), "\"GL_GOOGLE_include_directive\"".to_string()])); ));
ops.push((None, vec!["OpName".to_string(), "%main".to_string(), "\"main\"".to_string()])); ops.push((
None,
vec![
"OpMemoryModel".to_string(),
"Logical".to_string(),
"GLSL450".to_string(),
],
));
ops.push((
None,
vec![
"OpEntryPoint".to_string(),
"Fragment".to_string(),
"%main".to_string(),
"\"main\"".to_string(),
],
));
ops.push((
None,
vec![
"OpExecutionMode".to_string(),
"%main".to_string(),
"OriginUpperLeft".to_string(),
],
));
ops.push((
None,
vec![
"OpSource".to_string(),
"GLSL".to_string(),
"450".to_string(),
],
));
ops.push((
None,
vec![
"OpSourceExtension".to_string(),
"\"GL_GOOGLE_cpp_style_line_directive\"".to_string(),
],
));
ops.push((
None,
vec![
"OpSourceExtension".to_string(),
"\"GL_GOOGLE_include_directive\"".to_string(),
],
));
ops.push((
None,
vec![
"OpName".to_string(),
"%main".to_string(),
"\"main\"".to_string(),
],
));
ops.push((Some("%void".to_string()), vec!["OpTypeVoid".to_string()])); ops.push((Some("%void".to_string()), vec!["OpTypeVoid".to_string()]));
ops.push((Some("%3".to_string()), vec!["OpTypeFunction".to_string(), "%void".to_string()])); ops.push((
ops.push((Some("%main".to_string()), vec!["OpFunction".to_string(), "%void".to_string(), "None".to_string(), "%3".to_string()])); Some("%3".to_string()),
vec!["OpTypeFunction".to_string(), "%void".to_string()],
));
ops.push((
Some("%main".to_string()),
vec![
"OpFunction".to_string(),
"%void".to_string(),
"None".to_string(),
"%3".to_string(),
],
));
ops.push((Some("%5".to_string()), vec!["OpLabel".to_string()])); ops.push((Some("%5".to_string()), vec!["OpLabel".to_string()]));
ops.push((None, vec!["OpReturn".to_string()])); ops.push((None, vec!["OpReturn".to_string()]));
ops.push((None, vec!["OpFunctionEnd".to_string()])); ops.push((None, vec!["OpFunctionEnd".to_string()]));
let mut out: String = String::new(); let mut out: String = String::new();
for op in ops { for op in ops {

@ -2,9 +2,94 @@
mod tests; mod tests;
use std::iter::Peekable; use std::iter::Peekable;
use std::ops::RangeInclusive;
use std::vec::IntoIter; use std::vec::IntoIter;
#[derive(Debug, PartialEq)] #[derive(Clone, Debug)]
enum Location {
Char {
line: usize,
col: usize,
},
String {
lines: RangeInclusive<usize>,
cols: RangeInclusive<usize>,
},
}
#[derive(Debug)]
pub enum ParseError {
UnexpectedParenClose(Location),
UnexpectedEof,
}
impl Location {
/// Since [`Localised`] doesn't test for location match, allow for creating simple dummy
/// locations for testing
#[cfg(test)]
pub fn dummy() -> Location {
Self::Char { line: 0, col: 0 }
}
pub fn range(start: &Location, end: &Location) -> Location {
Location::String {
lines: start.line_start()..=end.line_end(),
cols: start.col_start()..=end.col_end(),
}
}
pub fn line_start(&self) -> usize {
match self {
Location::Char { line, .. } => *line,
Location::String { lines, .. } => *lines.start(),
}
}
pub fn col_start(&self) -> usize {
match self {
Location::Char { col, .. } => *col,
Location::String { cols, .. } => *cols.start(),
}
}
pub fn line_end(&self) -> usize {
match self {
Location::Char { line, .. } => *line,
Location::String { lines, .. } => *lines.end(),
}
}
pub fn col_end(&self) -> usize {
match self {
Location::Char { col, .. } => *col,
Location::String { cols, .. } => *cols.end(),
}
}
}
#[derive(Debug, Clone)]
struct Localised<T: Clone> {
location: Location,
item: T,
}
impl<T: Clone> Localised<T> {
#[cfg(test)]
pub fn dummy_location(item: T) -> Self {
Self {
location: Location::dummy(),
item,
}
}
}
impl<T: PartialEq + Clone> PartialEq for Localised<T> {
fn eq(&self, other: &Self) -> bool {
self.item.eq(&other.item)
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Token { pub enum Token {
LeftParen, LeftParen,
RightParen, RightParen,
@ -13,56 +98,109 @@ pub enum Token {
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum Ast { pub enum Ast {
Symbol(String), Symbol(Localised<String>),
List(Vec<Ast>), List(Localised<Vec<Ast>>),
Root(Vec<Ast>), Root(Vec<Ast>),
} }
pub fn tokenize(input: &str) -> Vec<Token> { pub fn tokenize(input: &str) -> Vec<Localised<Token>> {
let mut tokens = Vec::new(); let mut tokens = Vec::new();
let mut chars = input.chars().peekable(); // let mut chars = input.chars().peekable();
while let Some(c) = chars.next() { let mut chars = (1..)
.zip(input.split('\n'))
.flat_map(|(l_num, l)| {
(1..).zip(l.chars()).map(move |(c_num, c)| Localised {
location: Location::Char {
line: l_num,
col: c_num,
},
item: c,
})
})
.peekable();
while let Some(Localised { location, item: c }) = chars.next() {
match c { match c {
'(' => tokens.push(Token::LeftParen), '(' => tokens.push(Localised {
')' => tokens.push(Token::RightParen), location,
item: Token::LeftParen,
}),
')' => tokens.push(Localised {
location,
item: Token::RightParen,
}),
_ if c.is_whitespace() => (), _ if c.is_whitespace() => (),
_ => { _ => {
let start = location.clone();
let mut end = location;
let mut symbol = c.to_string(); let mut symbol = c.to_string();
while let Some(&c) = chars.peek() { while let Some(Localised { item: c, .. }) = chars.peek() {
if c.is_whitespace() || c == '(' || c == ')' { if c.is_whitespace() || *c == '(' || *c == ')' {
break; break;
} }
symbol.push(c); symbol.push(*c);
chars.next(); let Localised { location, .. } = chars.next().unwrap();
end = location;
} }
tokens.push(Token::Symbol(symbol)); tokens.push(Localised {
location: Location::range(&start, &end),
item: Token::Symbol(symbol),
});
} }
} }
} }
tokens tokens
} }
fn parse_expr(tokens: &mut Peekable<IntoIter<Token>>) -> Ast { fn parse_expr(tokens: &mut Peekable<IntoIter<Localised<Token>>>) -> Result<Ast, ParseError> {
match tokens.next() { match tokens.next() {
Some(Token::LeftParen) => { Some(Localised {
location: start,
item: Token::LeftParen,
}) => {
let mut list = Vec::new(); let mut list = Vec::new();
while tokens.peek() != Some(&Token::RightParen) { while !matches!(
list.push(parse_expr(tokens)); tokens.peek(),
Some(Localised {
item: Token::RightParen,
..
})
) {
list.push(parse_expr(tokens)?);
} }
tokens.next(); let Some(Localised {
Ast::List(list) location: end,
item: Token::RightParen,
}) = tokens.next()
else {
unreachable!()
};
Ok(Ast::List(Localised {
location: Location::range(&start, &end),
item: list,
}))
} }
Some(Token::RightParen) => panic!("unexpected )"), Some(Localised {
Some(Token::Symbol(s)) => Ast::Symbol(s), location,
None => panic!("unexpected EOF"), item: Token::RightParen,
}) => Err(ParseError::UnexpectedParenClose(location)),
Some(Localised {
location,
item: Token::Symbol(s),
}) => Ok(Ast::Symbol(Localised { location, item: s })),
None => Err(ParseError::UnexpectedEof),
} }
} }
pub fn parse(tokens: Vec<Token>) -> Ast { pub fn parse(tokens: Vec<Localised<Token>>) -> Result<Ast, ParseError> {
let mut tokens = tokens.into_iter().peekable(); let mut tokens = tokens.into_iter().peekable();
let mut ast: Vec<Ast> = Vec::new(); let mut ast: Vec<Ast> = Vec::new();
while tokens.peek().is_some() { while tokens.peek().is_some() {
ast.push(parse_expr(&mut tokens)); ast.push(parse_expr(&mut tokens)?);
} }
Ast::Root(ast) Ok(Ast::Root(ast))
} }
pub fn parse_string(src: &str) -> Result<Ast, ParseError> {
let tokens = tokenize(src);
parse(tokens)
}

@ -1,22 +1,38 @@
use crate::parser::{tokenize, parse, Token, Ast}; use crate::parser::{Ast, Localised, Token, parse, tokenize};
#[test] #[test]
fn test_tokenize() { fn test_tokenize() {
let input = "(+ (* 1:u8 2) 2)"; let input = "(+ (* 1:u8 2) 2)";
let expected = vec![ let expected = vec![
Token::LeftParen, Localised::dummy_location(Token::LeftParen),
Token::Symbol("+".to_string()), Localised::dummy_location(Token::Symbol("+".to_string())),
Token::LeftParen, Localised::dummy_location(Token::LeftParen),
Token::Symbol("*".to_string()), Localised::dummy_location(Token::Symbol("*".to_string())),
Token::Symbol("1:u8".to_string()), Localised::dummy_location(Token::Symbol("1:u8".to_string())),
Token::Symbol("2".to_string()), Localised::dummy_location(Token::Symbol("2".to_string())),
Token::RightParen, Localised::dummy_location(Token::RightParen),
Token::Symbol("2".to_string()), Localised::dummy_location(Token::Symbol("2".to_string())),
Token::RightParen, Localised::dummy_location(Token::RightParen),
]; ];
assert_eq!(tokenize(input), expected); assert_eq!(tokenize(input), expected);
} }
#[test]
#[should_panic]
fn test_unexpected_pclose() {
let input = "())";
let tokens = tokenize(input);
let _ast = parse(tokens).unwrap();
}
#[test]
#[should_panic]
fn test_unexpected_eof() {
let input = "(1 2 3 4";
let tokens = tokenize(input);
let _ast = parse(tokens).unwrap();
}
#[test] #[test]
fn test_parse() { fn test_parse() {
let src = " let src = "
@ -34,92 +50,92 @@ fn test_parse() {
1.0 1.0
1.0))) 1.0)))
"; ";
let ast = parse(tokenize(src)); let ast = parse(tokenize(src)).unwrap();
println!("{:?}", ast); println!("{:?}", ast);
let test_ast: Ast = Ast::Root(vec![ let test_ast: Ast = Ast::Root(vec![
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("module".to_string()), Ast::Symbol(Localised::dummy_location("module".to_string())),
Ast::Symbol("Shader".to_string()), Ast::Symbol(Localised::dummy_location("Shader".to_string())),
Ast::Symbol("Logical".to_string()), Ast::Symbol(Localised::dummy_location("Logical".to_string())),
Ast::Symbol("GLSL450".to_string()), Ast::Symbol(Localised::dummy_location("GLSL450".to_string())),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("import".to_string()), Ast::Symbol(Localised::dummy_location("import".to_string())),
Ast::Symbol(":std".to_string()), Ast::Symbol(Localised::dummy_location(":std".to_string())),
Ast::Symbol("GLSL.std.450".to_string()), Ast::Symbol(Localised::dummy_location("GLSL.std.450".to_string())),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("bind".to_string()), Ast::Symbol(Localised::dummy_location("bind".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Ast::Symbol("frag-coord:*v4f32i".to_string()), Localised::dummy_location("frag-coord:*v4f32i".to_string()),
]), )])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("BuiltIn".to_string()), Ast::Symbol(Localised::dummy_location("Builtin".to_string())),
Ast::Symbol("FragCoord".to_string()), Ast::Symbol(Localised::dummy_location("FragCoord".to_string())),
]), ])),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("bind".to_string()), Ast::Symbol(Localised::dummy_location("bind".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Ast::Symbol("out-color:*v4f32o".to_string()), Localised::dummy_location("out-color:*v4f32o".to_string()),
]), )])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("Location".to_string()), Ast::Symbol(Localised::dummy_location("Location".to_string())),
Ast::Symbol("0".to_string()), Ast::Symbol(Localised::dummy_location("0".to_string())),
]), ])),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("dec".to_string()), Ast::Symbol(Localised::dummy_location("dec".to_string())),
Ast::Symbol("frag-coord:*v4f32i".to_string()), Ast::Symbol(Localised::dummy_location("frag-coord:*v4f32i".to_string())),
Ast::Symbol("Input".to_string()), Ast::Symbol(Localised::dummy_location("Input".to_string())),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("dec".to_string()), Ast::Symbol(Localised::dummy_location("dec".to_string())),
Ast::Symbol("out-color:*v4f32o".to_string()), Ast::Symbol(Localised::dummy_location("out-color:*v4f32o".to_string())),
Ast::Symbol("Output".to_string()), Ast::Symbol(Localised::dummy_location("Output".to_string())),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("entry".to_string()), Ast::Symbol(Localised::dummy_location("entry".to_string())),
Ast::Symbol("main".to_string()), Ast::Symbol(Localised::dummy_location("main".to_string())),
Ast::Symbol("Fragment".to_string()), Ast::Symbol(Localised::dummy_location("Fragment".to_string())),
Ast::Symbol("OriginUpperLeft".to_string()), Ast::Symbol(Localised::dummy_location("OriginUpperLeft".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol(":frag-coord".to_string()), Ast::Symbol(Localised::dummy_location(":frag-coord".to_string())),
Ast::Symbol(":out-color".to_string()), Ast::Symbol(Localised::dummy_location(":out-color".to_string())),
]), ])),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("fun".to_string()), Ast::Symbol(Localised::dummy_location("fun".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Ast::Symbol("main".to_string()), Localised::dummy_location("main".to_string()),
]), )])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("store-ptr".to_string()), Ast::Symbol(Localised::dummy_location("store-ptr".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Ast::Symbol("out-color".to_string()), Localised::dummy_location("out-color".to_string()),
]), )])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("v4f32i".to_string()), Ast::Symbol(Localised::dummy_location("v4f23i".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("/".to_string()), Ast::Symbol(Localised::dummy_location("/".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol(".xy".to_string()), Ast::Symbol(Localised::dummy_location(".xy".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("load-ptr".to_string()), Ast::Symbol(Localised::dummy_location("load-ptr".to_string())),
Ast::Symbol("frag-coord".to_string()), Ast::Symbol(Localised::dummy_location("frag-coord".to_string())),
]), ])),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("v2f32".to_string()), Ast::Symbol(Localised::dummy_location("v2f32".to_string())),
Ast::Symbol("1920.0".to_string()), Ast::Symbol(Localised::dummy_location("1920.0".to_string())),
Ast::Symbol("1080.0".to_string()), Ast::Symbol(Localised::dummy_location("1080.0".to_string())),
]), ])),
]), ])),
Ast::Symbol("1.0".to_string()), Ast::Symbol(Localised::dummy_location("1.0".to_string())),
Ast::Symbol("1.0".to_string()), Ast::Symbol(Localised::dummy_location("1.0".to_string())),
]), ])),
]), ])),
]), ])),
]); ]);
assert_eq!(ast, test_ast); assert_eq!(ast, test_ast);
} }

Loading…
Cancel
Save