Add error handling and location tracking to the tokenizer and parser

master
Avery 10 months ago committed by Avery
parent 8de6ef1ec3
commit 38fc11590b
Signed by untrusted user who does not match committer: Avery
GPG Key ID: 4E53F4CB69B2CC8D

@ -1,10 +1,10 @@
use std::fmt::Write; use std::fmt::Write;
pub mod parser;
pub mod compiler; pub mod compiler;
pub mod parser;
fn main() { fn main() {
let mut ops: Vec<(Option<String>, Vec<String>)> = Vec::new(); let mut ops: Vec<(Option<String>, Vec<String>)> = Vec::new();
// OpMemoryModel Logical GLSL450 // OpMemoryModel Logical GLSL450
// OpEntryPoint Fragment %main "main" // OpEntryPoint Fragment %main "main"
// OpExecutionMode %main OriginUpperLeft // OpExecutionMode %main OriginUpperLeft
@ -18,23 +18,88 @@ fn main() {
//%5 = OpLabel //%5 = OpLabel
// OpReturn // OpReturn
// OpFunctionEnd // OpFunctionEnd
ops.push((None, vec!["OpCapability".to_string(), "Shader".to_string()])); ops.push((None, vec!["OpCapability".to_string(), "Shader".to_string()]));
ops.push((Some("%1".to_string()), vec!["OpExtInstImport".to_string(), "\"GLSL.std.450\"".to_string()])); ops.push((
ops.push((None, vec!["OpMemoryModel".to_string(), "Logical".to_string(), "GLSL450".to_string()])); Some("%1".to_string()),
ops.push((None, vec!["OpEntryPoint".to_string(), "Fragment".to_string(), "%main".to_string(), "\"main\"".to_string()])); vec![
ops.push((None, vec!["OpExecutionMode".to_string(), "%main".to_string(), "OriginUpperLeft".to_string()])); "OpExtInstImport".to_string(),
ops.push((None, vec!["OpSource".to_string(), "GLSL".to_string(), "450".to_string()])); "\"GLSL.std.450\"".to_string(),
ops.push((None, vec!["OpSourceExtension".to_string(), "\"GL_GOOGLE_cpp_style_line_directive\"".to_string()])); ],
ops.push((None, vec!["OpSourceExtension".to_string(), "\"GL_GOOGLE_include_directive\"".to_string()])); ));
ops.push((None, vec!["OpName".to_string(), "%main".to_string(), "\"main\"".to_string()])); ops.push((
None,
vec![
"OpMemoryModel".to_string(),
"Logical".to_string(),
"GLSL450".to_string(),
],
));
ops.push((
None,
vec![
"OpEntryPoint".to_string(),
"Fragment".to_string(),
"%main".to_string(),
"\"main\"".to_string(),
],
));
ops.push((
None,
vec![
"OpExecutionMode".to_string(),
"%main".to_string(),
"OriginUpperLeft".to_string(),
],
));
ops.push((
None,
vec![
"OpSource".to_string(),
"GLSL".to_string(),
"450".to_string(),
],
));
ops.push((
None,
vec![
"OpSourceExtension".to_string(),
"\"GL_GOOGLE_cpp_style_line_directive\"".to_string(),
],
));
ops.push((
None,
vec![
"OpSourceExtension".to_string(),
"\"GL_GOOGLE_include_directive\"".to_string(),
],
));
ops.push((
None,
vec![
"OpName".to_string(),
"%main".to_string(),
"\"main\"".to_string(),
],
));
ops.push((Some("%void".to_string()), vec!["OpTypeVoid".to_string()])); ops.push((Some("%void".to_string()), vec!["OpTypeVoid".to_string()]));
ops.push((Some("%3".to_string()), vec!["OpTypeFunction".to_string(), "%void".to_string()])); ops.push((
ops.push((Some("%main".to_string()), vec!["OpFunction".to_string(), "%void".to_string(), "None".to_string(), "%3".to_string()])); Some("%3".to_string()),
vec!["OpTypeFunction".to_string(), "%void".to_string()],
));
ops.push((
Some("%main".to_string()),
vec![
"OpFunction".to_string(),
"%void".to_string(),
"None".to_string(),
"%3".to_string(),
],
));
ops.push((Some("%5".to_string()), vec!["OpLabel".to_string()])); ops.push((Some("%5".to_string()), vec!["OpLabel".to_string()]));
ops.push((None, vec!["OpReturn".to_string()])); ops.push((None, vec!["OpReturn".to_string()]));
ops.push((None, vec!["OpFunctionEnd".to_string()])); ops.push((None, vec!["OpFunctionEnd".to_string()]));
let mut out: String = String::new(); let mut out: String = String::new();
for op in ops { for op in ops {

@ -2,9 +2,94 @@
mod tests; mod tests;
use std::iter::Peekable; use std::iter::Peekable;
use std::ops::RangeInclusive;
use std::vec::IntoIter; use std::vec::IntoIter;
#[derive(Debug, PartialEq)] #[derive(Clone, Debug)]
enum Location {
Char {
line: usize,
col: usize,
},
String {
lines: RangeInclusive<usize>,
cols: RangeInclusive<usize>,
},
}
#[derive(Debug)]
pub enum ParseError {
UnexpectedParenClose(Location),
UnexpectedEof,
}
impl Location {
/// Since [`Localised`] doesn't test for location match, allow for creating simple dummy
/// locations for testing
#[cfg(test)]
pub fn dummy() -> Location {
Self::Char { line: 0, col: 0 }
}
pub fn range(start: &Location, end: &Location) -> Location {
Location::String {
lines: start.line_start()..=end.line_end(),
cols: start.col_start()..=end.col_end(),
}
}
pub fn line_start(&self) -> usize {
match self {
Location::Char { line, .. } => *line,
Location::String { lines, .. } => *lines.start(),
}
}
pub fn col_start(&self) -> usize {
match self {
Location::Char { col, .. } => *col,
Location::String { cols, .. } => *cols.start(),
}
}
pub fn line_end(&self) -> usize {
match self {
Location::Char { line, .. } => *line,
Location::String { lines, .. } => *lines.end(),
}
}
pub fn col_end(&self) -> usize {
match self {
Location::Char { col, .. } => *col,
Location::String { cols, .. } => *cols.end(),
}
}
}
#[derive(Debug, Clone)]
struct Localised<T: Clone> {
location: Location,
item: T,
}
impl<T: Clone> Localised<T> {
#[cfg(test)]
pub fn dummy_location(item: T) -> Self {
Self {
location: Location::dummy(),
item,
}
}
}
impl<T: PartialEq + Clone> PartialEq for Localised<T> {
fn eq(&self, other: &Self) -> bool {
self.item.eq(&other.item)
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Token { pub enum Token {
LeftParen, LeftParen,
RightParen, RightParen,
@ -13,56 +98,109 @@ pub enum Token {
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum Ast { pub enum Ast {
Symbol(String), Symbol(Localised<String>),
List(Vec<Ast>), List(Localised<Vec<Ast>>),
Root(Vec<Ast>), Root(Vec<Ast>),
} }
pub fn tokenize(input: &str) -> Vec<Token> { pub fn tokenize(input: &str) -> Vec<Localised<Token>> {
let mut tokens = Vec::new(); let mut tokens = Vec::new();
let mut chars = input.chars().peekable(); // let mut chars = input.chars().peekable();
while let Some(c) = chars.next() { let mut chars = (1..)
.zip(input.split('\n'))
.flat_map(|(l_num, l)| {
(1..).zip(l.chars()).map(move |(c_num, c)| Localised {
location: Location::Char {
line: l_num,
col: c_num,
},
item: c,
})
})
.peekable();
while let Some(Localised { location, item: c }) = chars.next() {
match c { match c {
'(' => tokens.push(Token::LeftParen), '(' => tokens.push(Localised {
')' => tokens.push(Token::RightParen), location,
item: Token::LeftParen,
}),
')' => tokens.push(Localised {
location,
item: Token::RightParen,
}),
_ if c.is_whitespace() => (), _ if c.is_whitespace() => (),
_ => { _ => {
let start = location.clone();
let mut end = location;
let mut symbol = c.to_string(); let mut symbol = c.to_string();
while let Some(&c) = chars.peek() { while let Some(Localised { item: c, .. }) = chars.peek() {
if c.is_whitespace() || c == '(' || c == ')' { if c.is_whitespace() || *c == '(' || *c == ')' {
break; break;
} }
symbol.push(c); symbol.push(*c);
chars.next(); let Localised { location, .. } = chars.next().unwrap();
end = location;
} }
tokens.push(Token::Symbol(symbol)); tokens.push(Localised {
location: Location::range(&start, &end),
item: Token::Symbol(symbol),
});
} }
} }
} }
tokens tokens
} }
fn parse_expr(tokens: &mut Peekable<IntoIter<Token>>) -> Ast { fn parse_expr(tokens: &mut Peekable<IntoIter<Localised<Token>>>) -> Result<Ast, ParseError> {
match tokens.next() { match tokens.next() {
Some(Token::LeftParen) => { Some(Localised {
location: start,
item: Token::LeftParen,
}) => {
let mut list = Vec::new(); let mut list = Vec::new();
while tokens.peek() != Some(&Token::RightParen) { while !matches!(
list.push(parse_expr(tokens)); tokens.peek(),
Some(Localised {
item: Token::RightParen,
..
})
) {
list.push(parse_expr(tokens)?);
} }
tokens.next(); let Some(Localised {
Ast::List(list) location: end,
item: Token::RightParen,
}) = tokens.next()
else {
unreachable!()
};
Ok(Ast::List(Localised {
location: Location::range(&start, &end),
item: list,
}))
} }
Some(Token::RightParen) => panic!("unexpected )"), Some(Localised {
Some(Token::Symbol(s)) => Ast::Symbol(s), location,
None => panic!("unexpected EOF"), item: Token::RightParen,
}) => Err(ParseError::UnexpectedParenClose(location)),
Some(Localised {
location,
item: Token::Symbol(s),
}) => Ok(Ast::Symbol(Localised { location, item: s })),
None => Err(ParseError::UnexpectedEof),
} }
} }
pub fn parse(tokens: Vec<Token>) -> Ast { pub fn parse(tokens: Vec<Localised<Token>>) -> Result<Ast, ParseError> {
let mut tokens = tokens.into_iter().peekable(); let mut tokens = tokens.into_iter().peekable();
let mut ast: Vec<Ast> = Vec::new(); let mut ast: Vec<Ast> = Vec::new();
while tokens.peek().is_some() { while tokens.peek().is_some() {
ast.push(parse_expr(&mut tokens)); ast.push(parse_expr(&mut tokens)?);
} }
Ast::Root(ast) Ok(Ast::Root(ast))
} }
pub fn parse_string(src: &str) -> Result<Ast, ParseError> {
let tokens = tokenize(src);
parse(tokens)
}

@ -1,22 +1,38 @@
use crate::parser::{tokenize, parse, Token, Ast}; use crate::parser::{Ast, Localised, Token, parse, tokenize};
#[test] #[test]
fn test_tokenize() { fn test_tokenize() {
let input = "(+ (* 1:u8 2) 2)"; let input = "(+ (* 1:u8 2) 2)";
let expected = vec![ let expected = vec![
Token::LeftParen, Localised::dummy_location(Token::LeftParen),
Token::Symbol("+".to_string()), Localised::dummy_location(Token::Symbol("+".to_string())),
Token::LeftParen, Localised::dummy_location(Token::LeftParen),
Token::Symbol("*".to_string()), Localised::dummy_location(Token::Symbol("*".to_string())),
Token::Symbol("1:u8".to_string()), Localised::dummy_location(Token::Symbol("1:u8".to_string())),
Token::Symbol("2".to_string()), Localised::dummy_location(Token::Symbol("2".to_string())),
Token::RightParen, Localised::dummy_location(Token::RightParen),
Token::Symbol("2".to_string()), Localised::dummy_location(Token::Symbol("2".to_string())),
Token::RightParen, Localised::dummy_location(Token::RightParen),
]; ];
assert_eq!(tokenize(input), expected); assert_eq!(tokenize(input), expected);
} }
#[test]
#[should_panic]
fn test_unexpected_pclose() {
let input = "())";
let tokens = tokenize(input);
let _ast = parse(tokens).unwrap();
}
#[test]
#[should_panic]
fn test_unexpected_eof() {
let input = "(1 2 3 4";
let tokens = tokenize(input);
let _ast = parse(tokens).unwrap();
}
#[test] #[test]
fn test_parse() { fn test_parse() {
let src = " let src = "
@ -34,92 +50,92 @@ fn test_parse() {
1.0 1.0
1.0))) 1.0)))
"; ";
let ast = parse(tokenize(src)); let ast = parse(tokenize(src)).unwrap();
println!("{:?}", ast); println!("{:?}", ast);
let test_ast: Ast = Ast::Root(vec![ let test_ast: Ast = Ast::Root(vec![
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("module".to_string()), Ast::Symbol(Localised::dummy_location("module".to_string())),
Ast::Symbol("Shader".to_string()), Ast::Symbol(Localised::dummy_location("Shader".to_string())),
Ast::Symbol("Logical".to_string()), Ast::Symbol(Localised::dummy_location("Logical".to_string())),
Ast::Symbol("GLSL450".to_string()), Ast::Symbol(Localised::dummy_location("GLSL450".to_string())),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("import".to_string()), Ast::Symbol(Localised::dummy_location("import".to_string())),
Ast::Symbol(":std".to_string()), Ast::Symbol(Localised::dummy_location(":std".to_string())),
Ast::Symbol("GLSL.std.450".to_string()), Ast::Symbol(Localised::dummy_location("GLSL.std.450".to_string())),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("bind".to_string()), Ast::Symbol(Localised::dummy_location("bind".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Ast::Symbol("frag-coord:*v4f32i".to_string()), Localised::dummy_location("frag-coord:*v4f32i".to_string()),
]), )])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("BuiltIn".to_string()), Ast::Symbol(Localised::dummy_location("Builtin".to_string())),
Ast::Symbol("FragCoord".to_string()), Ast::Symbol(Localised::dummy_location("FragCoord".to_string())),
]), ])),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("bind".to_string()), Ast::Symbol(Localised::dummy_location("bind".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Ast::Symbol("out-color:*v4f32o".to_string()), Localised::dummy_location("out-color:*v4f32o".to_string()),
]), )])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("Location".to_string()), Ast::Symbol(Localised::dummy_location("Location".to_string())),
Ast::Symbol("0".to_string()), Ast::Symbol(Localised::dummy_location("0".to_string())),
]), ])),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("dec".to_string()), Ast::Symbol(Localised::dummy_location("dec".to_string())),
Ast::Symbol("frag-coord:*v4f32i".to_string()), Ast::Symbol(Localised::dummy_location("frag-coord:*v4f32i".to_string())),
Ast::Symbol("Input".to_string()), Ast::Symbol(Localised::dummy_location("Input".to_string())),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("dec".to_string()), Ast::Symbol(Localised::dummy_location("dec".to_string())),
Ast::Symbol("out-color:*v4f32o".to_string()), Ast::Symbol(Localised::dummy_location("out-color:*v4f32o".to_string())),
Ast::Symbol("Output".to_string()), Ast::Symbol(Localised::dummy_location("Output".to_string())),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("entry".to_string()), Ast::Symbol(Localised::dummy_location("entry".to_string())),
Ast::Symbol("main".to_string()), Ast::Symbol(Localised::dummy_location("main".to_string())),
Ast::Symbol("Fragment".to_string()), Ast::Symbol(Localised::dummy_location("Fragment".to_string())),
Ast::Symbol("OriginUpperLeft".to_string()), Ast::Symbol(Localised::dummy_location("OriginUpperLeft".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol(":frag-coord".to_string()), Ast::Symbol(Localised::dummy_location(":frag-coord".to_string())),
Ast::Symbol(":out-color".to_string()), Ast::Symbol(Localised::dummy_location(":out-color".to_string())),
]), ])),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("fun".to_string()), Ast::Symbol(Localised::dummy_location("fun".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Ast::Symbol("main".to_string()), Localised::dummy_location("main".to_string()),
]), )])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("store-ptr".to_string()), Ast::Symbol(Localised::dummy_location("store-ptr".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Ast::Symbol("out-color".to_string()), Localised::dummy_location("out-color".to_string()),
]), )])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("v4f32i".to_string()), Ast::Symbol(Localised::dummy_location("v4f23i".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("/".to_string()), Ast::Symbol(Localised::dummy_location("/".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol(".xy".to_string()), Ast::Symbol(Localised::dummy_location(".xy".to_string())),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("load-ptr".to_string()), Ast::Symbol(Localised::dummy_location("load-ptr".to_string())),
Ast::Symbol("frag-coord".to_string()), Ast::Symbol(Localised::dummy_location("frag-coord".to_string())),
]), ])),
]), ])),
Ast::List(vec![ Ast::List(Localised::dummy_location(vec![
Ast::Symbol("v2f32".to_string()), Ast::Symbol(Localised::dummy_location("v2f32".to_string())),
Ast::Symbol("1920.0".to_string()), Ast::Symbol(Localised::dummy_location("1920.0".to_string())),
Ast::Symbol("1080.0".to_string()), Ast::Symbol(Localised::dummy_location("1080.0".to_string())),
]), ])),
]), ])),
Ast::Symbol("1.0".to_string()), Ast::Symbol(Localised::dummy_location("1.0".to_string())),
Ast::Symbol("1.0".to_string()), Ast::Symbol(Localised::dummy_location("1.0".to_string())),
]), ])),
]), ])),
]), ])),
]); ]);
assert_eq!(ast, test_ast); assert_eq!(ast, test_ast);
} }

Loading…
Cancel
Save