Add error handling and location tracking to the tokenizer and parser

pull/3/head
Avery 3 weeks ago committed by itycodes
parent 5f041f491a
commit b4ab40f10e

@ -1,10 +1,10 @@
use std::fmt::Write;
pub mod parser;
pub mod compiler;
pub mod parser;
fn main() {
let mut ops: Vec<(Option<String>, Vec<String>)> = Vec::new();
// OpMemoryModel Logical GLSL450
// OpEntryPoint Fragment %main "main"
// OpExecutionMode %main OriginUpperLeft
@ -18,23 +18,88 @@ fn main() {
//%5 = OpLabel
// OpReturn
// OpFunctionEnd
ops.push((None, vec!["OpCapability".to_string(), "Shader".to_string()]));
ops.push((Some("%1".to_string()), vec!["OpExtInstImport".to_string(), "\"GLSL.std.450\"".to_string()]));
ops.push((None, vec!["OpMemoryModel".to_string(), "Logical".to_string(), "GLSL450".to_string()]));
ops.push((None, vec!["OpEntryPoint".to_string(), "Fragment".to_string(), "%main".to_string(), "\"main\"".to_string()]));
ops.push((None, vec!["OpExecutionMode".to_string(), "%main".to_string(), "OriginUpperLeft".to_string()]));
ops.push((None, vec!["OpSource".to_string(), "GLSL".to_string(), "450".to_string()]));
ops.push((None, vec!["OpSourceExtension".to_string(), "\"GL_GOOGLE_cpp_style_line_directive\"".to_string()]));
ops.push((None, vec!["OpSourceExtension".to_string(), "\"GL_GOOGLE_include_directive\"".to_string()]));
ops.push((None, vec!["OpName".to_string(), "%main".to_string(), "\"main\"".to_string()]));
ops.push((
Some("%1".to_string()),
vec![
"OpExtInstImport".to_string(),
"\"GLSL.std.450\"".to_string(),
],
));
ops.push((
None,
vec![
"OpMemoryModel".to_string(),
"Logical".to_string(),
"GLSL450".to_string(),
],
));
ops.push((
None,
vec![
"OpEntryPoint".to_string(),
"Fragment".to_string(),
"%main".to_string(),
"\"main\"".to_string(),
],
));
ops.push((
None,
vec![
"OpExecutionMode".to_string(),
"%main".to_string(),
"OriginUpperLeft".to_string(),
],
));
ops.push((
None,
vec![
"OpSource".to_string(),
"GLSL".to_string(),
"450".to_string(),
],
));
ops.push((
None,
vec![
"OpSourceExtension".to_string(),
"\"GL_GOOGLE_cpp_style_line_directive\"".to_string(),
],
));
ops.push((
None,
vec![
"OpSourceExtension".to_string(),
"\"GL_GOOGLE_include_directive\"".to_string(),
],
));
ops.push((
None,
vec![
"OpName".to_string(),
"%main".to_string(),
"\"main\"".to_string(),
],
));
ops.push((Some("%void".to_string()), vec!["OpTypeVoid".to_string()]));
ops.push((Some("%3".to_string()), vec!["OpTypeFunction".to_string(), "%void".to_string()]));
ops.push((Some("%main".to_string()), vec!["OpFunction".to_string(), "%void".to_string(), "None".to_string(), "%3".to_string()]));
ops.push((
Some("%3".to_string()),
vec!["OpTypeFunction".to_string(), "%void".to_string()],
));
ops.push((
Some("%main".to_string()),
vec![
"OpFunction".to_string(),
"%void".to_string(),
"None".to_string(),
"%3".to_string(),
],
));
ops.push((Some("%5".to_string()), vec!["OpLabel".to_string()]));
ops.push((None, vec!["OpReturn".to_string()]));
ops.push((None, vec!["OpFunctionEnd".to_string()]));
let mut out: String = String::new();
for op in ops {

@ -2,9 +2,94 @@
mod tests;
use std::iter::Peekable;
use std::ops::RangeInclusive;
use std::vec::IntoIter;
#[derive(Debug, PartialEq)]
#[derive(Clone, Debug)]
enum Location {
Char {
line: usize,
col: usize,
},
String {
lines: RangeInclusive<usize>,
cols: RangeInclusive<usize>,
},
}
#[derive(Debug)]
pub enum ParseError {
UnexpectedParenClose(Location),
UnexpectedEof,
}
impl Location {
/// Since [`Localised`] doesn't test for location match, allow for creating simple dummy
/// locations for testing
#[cfg(test)]
pub fn dummy() -> Location {
Self::Char { line: 0, col: 0 }
}
pub fn range(start: &Location, end: &Location) -> Location {
Location::String {
lines: start.line_start()..=end.line_end(),
cols: start.col_start()..=end.col_end(),
}
}
pub fn line_start(&self) -> usize {
match self {
Location::Char { line, .. } => *line,
Location::String { lines, .. } => *lines.start(),
}
}
pub fn col_start(&self) -> usize {
match self {
Location::Char { col, .. } => *col,
Location::String { cols, .. } => *cols.start(),
}
}
pub fn line_end(&self) -> usize {
match self {
Location::Char { line, .. } => *line,
Location::String { lines, .. } => *lines.end(),
}
}
pub fn col_end(&self) -> usize {
match self {
Location::Char { col, .. } => *col,
Location::String { cols, .. } => *cols.end(),
}
}
}
#[derive(Debug, Clone)]
struct Localised<T: Clone> {
location: Location,
item: T,
}
impl<T: Clone> Localised<T> {
#[cfg(test)]
pub fn dummy_location(item: T) -> Self {
Self {
location: Location::dummy(),
item,
}
}
}
impl<T: PartialEq + Clone> PartialEq for Localised<T> {
fn eq(&self, other: &Self) -> bool {
self.item.eq(&other.item)
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
LeftParen,
RightParen,
@ -13,56 +98,109 @@ pub enum Token {
#[derive(Debug, PartialEq, Clone)]
pub enum Ast {
Symbol(String),
List(Vec<Ast>),
Symbol(Localised<String>),
List(Localised<Vec<Ast>>),
Root(Vec<Ast>),
}
pub fn tokenize(input: &str) -> Vec<Token> {
pub fn tokenize(input: &str) -> Vec<Localised<Token>> {
let mut tokens = Vec::new();
let mut chars = input.chars().peekable();
while let Some(c) = chars.next() {
// let mut chars = input.chars().peekable();
let mut chars = (1..)
.zip(input.split('\n'))
.flat_map(|(l_num, l)| {
(1..).zip(l.chars()).map(move |(c_num, c)| Localised {
location: Location::Char {
line: l_num,
col: c_num,
},
item: c,
})
})
.peekable();
while let Some(Localised { location, item: c }) = chars.next() {
match c {
'(' => tokens.push(Token::LeftParen),
')' => tokens.push(Token::RightParen),
'(' => tokens.push(Localised {
location,
item: Token::LeftParen,
}),
')' => tokens.push(Localised {
location,
item: Token::RightParen,
}),
_ if c.is_whitespace() => (),
_ => {
let start = location.clone();
let mut end = location;
let mut symbol = c.to_string();
while let Some(&c) = chars.peek() {
if c.is_whitespace() || c == '(' || c == ')' {
while let Some(Localised { item: c, .. }) = chars.peek() {
if c.is_whitespace() || *c == '(' || *c == ')' {
break;
}
symbol.push(c);
chars.next();
symbol.push(*c);
let Localised { location, .. } = chars.next().unwrap();
end = location;
}
tokens.push(Token::Symbol(symbol));
tokens.push(Localised {
location: Location::range(&start, &end),
item: Token::Symbol(symbol),
});
}
}
}
tokens
}
fn parse_expr(tokens: &mut Peekable<IntoIter<Token>>) -> Ast {
fn parse_expr(tokens: &mut Peekable<IntoIter<Localised<Token>>>) -> Result<Ast, ParseError> {
match tokens.next() {
Some(Token::LeftParen) => {
Some(Localised {
location: start,
item: Token::LeftParen,
}) => {
let mut list = Vec::new();
while tokens.peek() != Some(&Token::RightParen) {
list.push(parse_expr(tokens));
while !matches!(
tokens.peek(),
Some(Localised {
item: Token::RightParen,
..
})
) {
list.push(parse_expr(tokens)?);
}
tokens.next();
Ast::List(list)
let Some(Localised {
location: end,
item: Token::RightParen,
}) = tokens.next()
else {
unreachable!()
};
Ok(Ast::List(Localised {
location: Location::range(&start, &end),
item: list,
}))
}
Some(Token::RightParen) => panic!("unexpected )"),
Some(Token::Symbol(s)) => Ast::Symbol(s),
None => panic!("unexpected EOF"),
Some(Localised {
location,
item: Token::RightParen,
}) => Err(ParseError::UnexpectedParenClose(location)),
Some(Localised {
location,
item: Token::Symbol(s),
}) => Ok(Ast::Symbol(Localised { location, item: s })),
None => Err(ParseError::UnexpectedEof),
}
}
pub fn parse(tokens: Vec<Token>) -> Ast {
pub fn parse(tokens: Vec<Localised<Token>>) -> Result<Ast, ParseError> {
let mut tokens = tokens.into_iter().peekable();
let mut ast: Vec<Ast> = Vec::new();
while tokens.peek().is_some() {
ast.push(parse_expr(&mut tokens));
ast.push(parse_expr(&mut tokens)?);
}
Ast::Root(ast)
}
Ok(Ast::Root(ast))
}
pub fn parse_string(src: &str) -> Result<Ast, ParseError> {
let tokens = tokenize(src);
parse(tokens)
}

@ -1,22 +1,38 @@
use crate::parser::{tokenize, parse, Token, Ast};
use crate::parser::{Ast, Localised, Token, parse, tokenize};
#[test]
fn test_tokenize() {
let input = "(+ (* 1:u8 2) 2)";
let expected = vec![
Token::LeftParen,
Token::Symbol("+".to_string()),
Token::LeftParen,
Token::Symbol("*".to_string()),
Token::Symbol("1:u8".to_string()),
Token::Symbol("2".to_string()),
Token::RightParen,
Token::Symbol("2".to_string()),
Token::RightParen,
Localised::dummy_location(Token::LeftParen),
Localised::dummy_location(Token::Symbol("+".to_string())),
Localised::dummy_location(Token::LeftParen),
Localised::dummy_location(Token::Symbol("*".to_string())),
Localised::dummy_location(Token::Symbol("1:u8".to_string())),
Localised::dummy_location(Token::Symbol("2".to_string())),
Localised::dummy_location(Token::RightParen),
Localised::dummy_location(Token::Symbol("2".to_string())),
Localised::dummy_location(Token::RightParen),
];
assert_eq!(tokenize(input), expected);
}
#[test]
#[should_panic]
fn test_unexpected_pclose() {
let input = "())";
let tokens = tokenize(input);
let _ast = parse(tokens).unwrap();
}
#[test]
#[should_panic]
fn test_unexpected_eof() {
let input = "(1 2 3 4";
let tokens = tokenize(input);
let _ast = parse(tokens).unwrap();
}
#[test]
fn test_parse() {
let src = "
@ -34,92 +50,92 @@ fn test_parse() {
1.0
1.0)))
";
let ast = parse(tokenize(src));
let ast = parse(tokenize(src)).unwrap();
println!("{:?}", ast);
let test_ast: Ast = Ast::Root(vec![
Ast::List(vec![
Ast::Symbol("module".to_string()),
Ast::Symbol("Shader".to_string()),
Ast::Symbol("Logical".to_string()),
Ast::Symbol("GLSL450".to_string()),
]),
Ast::List(vec![
Ast::Symbol("import".to_string()),
Ast::Symbol(":std".to_string()),
Ast::Symbol("GLSL.std.450".to_string()),
]),
Ast::List(vec![
Ast::Symbol("bind".to_string()),
Ast::List(vec![
Ast::Symbol("frag-coord:*v4f32i".to_string()),
]),
Ast::List(vec![
Ast::Symbol("BuiltIn".to_string()),
Ast::Symbol("FragCoord".to_string()),
]),
]),
Ast::List(vec![
Ast::Symbol("bind".to_string()),
Ast::List(vec![
Ast::Symbol("out-color:*v4f32o".to_string()),
]),
Ast::List(vec![
Ast::Symbol("Location".to_string()),
Ast::Symbol("0".to_string()),
]),
]),
Ast::List(vec![
Ast::Symbol("dec".to_string()),
Ast::Symbol("frag-coord:*v4f32i".to_string()),
Ast::Symbol("Input".to_string()),
]),
Ast::List(vec![
Ast::Symbol("dec".to_string()),
Ast::Symbol("out-color:*v4f32o".to_string()),
Ast::Symbol("Output".to_string()),
]),
Ast::List(vec![
Ast::Symbol("entry".to_string()),
Ast::Symbol("main".to_string()),
Ast::Symbol("Fragment".to_string()),
Ast::Symbol("OriginUpperLeft".to_string()),
Ast::List(vec![
Ast::Symbol(":frag-coord".to_string()),
Ast::Symbol(":out-color".to_string()),
]),
]),
Ast::List(vec![
Ast::Symbol("fun".to_string()),
Ast::List(vec![
Ast::Symbol("main".to_string()),
]),
Ast::List(vec![
Ast::Symbol("store-ptr".to_string()),
Ast::List(vec![
Ast::Symbol("out-color".to_string()),
]),
Ast::List(vec![
Ast::Symbol("v4f32i".to_string()),
Ast::List(vec![
Ast::Symbol("/".to_string()),
Ast::List(vec![
Ast::Symbol(".xy".to_string()),
Ast::List(vec![
Ast::Symbol("load-ptr".to_string()),
Ast::Symbol("frag-coord".to_string()),
]),
]),
Ast::List(vec![
Ast::Symbol("v2f32".to_string()),
Ast::Symbol("1920.0".to_string()),
Ast::Symbol("1080.0".to_string()),
]),
]),
Ast::Symbol("1.0".to_string()),
Ast::Symbol("1.0".to_string()),
]),
]),
]),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("module".to_string())),
Ast::Symbol(Localised::dummy_location("Shader".to_string())),
Ast::Symbol(Localised::dummy_location("Logical".to_string())),
Ast::Symbol(Localised::dummy_location("GLSL450".to_string())),
])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("import".to_string())),
Ast::Symbol(Localised::dummy_location(":std".to_string())),
Ast::Symbol(Localised::dummy_location("GLSL.std.450".to_string())),
])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("bind".to_string())),
Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Localised::dummy_location("frag-coord:*v4f32i".to_string()),
)])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("Builtin".to_string())),
Ast::Symbol(Localised::dummy_location("FragCoord".to_string())),
])),
])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("bind".to_string())),
Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Localised::dummy_location("out-color:*v4f32o".to_string()),
)])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("Location".to_string())),
Ast::Symbol(Localised::dummy_location("0".to_string())),
])),
])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("dec".to_string())),
Ast::Symbol(Localised::dummy_location("frag-coord:*v4f32i".to_string())),
Ast::Symbol(Localised::dummy_location("Input".to_string())),
])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("dec".to_string())),
Ast::Symbol(Localised::dummy_location("out-color:*v4f32o".to_string())),
Ast::Symbol(Localised::dummy_location("Output".to_string())),
])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("entry".to_string())),
Ast::Symbol(Localised::dummy_location("main".to_string())),
Ast::Symbol(Localised::dummy_location("Fragment".to_string())),
Ast::Symbol(Localised::dummy_location("OriginUpperLeft".to_string())),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location(":frag-coord".to_string())),
Ast::Symbol(Localised::dummy_location(":out-color".to_string())),
])),
])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("fun".to_string())),
Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Localised::dummy_location("main".to_string()),
)])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("store-ptr".to_string())),
Ast::List(Localised::dummy_location(vec![Ast::Symbol(
Localised::dummy_location("out-color".to_string()),
)])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("v4f23i".to_string())),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("/".to_string())),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location(".xy".to_string())),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("load-ptr".to_string())),
Ast::Symbol(Localised::dummy_location("frag-coord".to_string())),
])),
])),
Ast::List(Localised::dummy_location(vec![
Ast::Symbol(Localised::dummy_location("v2f32".to_string())),
Ast::Symbol(Localised::dummy_location("1920.0".to_string())),
Ast::Symbol(Localised::dummy_location("1080.0".to_string())),
])),
])),
Ast::Symbol(Localised::dummy_location("1.0".to_string())),
Ast::Symbol(Localised::dummy_location("1.0".to_string())),
])),
])),
])),
]);
assert_eq!(ast, test_ast);
}
}

Loading…
Cancel
Save