From 305e95846d61c3cab0c39e361413d44d2f69e2bc Mon Sep 17 00:00:00 2001 From: Avery Date: Sun, 16 Mar 2025 12:54:29 +0100 Subject: [PATCH] New parser and grammar --- Cargo.lock | 618 ++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 5 + README.md | 4 +- build.rs | 3 + src/exec/builtins.rs | 8 +- src/exec/mod.rs | 12 +- src/exec/test.rs | 14 +- src/inference/mod.rs | 18 +- src/inference/test.rs | 4 +- src/lib.rs | 8 +- src/main.rs | 23 +- src/parse/mod.rs | 251 +++++------------ src/parse/sexpr.rs | 92 ------- src/parse/test.rs | 276 +++++++++++++++---- src/parse/tokenize.rs | 141 ++++++++++ src/parser.lalrpop | 86 ++++++ src/types/mod.rs | 9 + 17 files changed, 1188 insertions(+), 384 deletions(-) create mode 100644 build.rs delete mode 100644 src/parse/sexpr.rs create mode 100644 src/parse/tokenize.rs create mode 100644 src/parser.lalrpop diff --git a/Cargo.lock b/Cargo.lock index f4b76a5..fca689d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,624 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "ascii-canvas" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1e3e699d84ab1b0911a1010c5c106aa34ae89aeac103be5ce0c3859db1e891" +dependencies = [ + "term", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "ena" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d248bdd43ce613d87415282f69b9bb99d947d290b10962dd6c56233312c2ad5" +dependencies = [ + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + +[[package]] +name = "home" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "indexmap" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "keccak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc2af9a1119c51f12a14607e783cb977bde58bc069ff0c3da1095e635d70654" +dependencies = [ + "cpufeatures", +] + +[[package]] +name = "lalrpop" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7047a26de42016abf8f181b46b398aef0b77ad46711df41847f6ed869a2a1d5b" +dependencies = [ + "ascii-canvas", + "bit-set", + "ena", + "itertools", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax", + "sha3", + "string_cache", + "term", + "unicode-xid", + "walkdir", +] + +[[package]] +name = "lalrpop-util" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d05b3fe34b8bd562c338db725dfa9beb9451a48f65f129ccb9538b48d2c93b" +dependencies = [ + "regex-automata", + "rustversion", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.171" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" + +[[package]] +name = "logos" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab6f536c1af4c7cc81edf73da1f8029896e7e1e16a219ef09b184e76a296f3db" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "189bbfd0b61330abea797e5e9276408f2edbe4f822d7ad08685d67419aafb34e" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax", + "rustc_version", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebfe8e1a19049ddbfccbd14ac834b215e11b85b90bab0c2dba7c7b92fb5d5cba" +dependencies = [ + "logos-codegen", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro2" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustversion" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" + +[[package]] +name = "sha3" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" +dependencies = [ + "digest", + "keccak", +] + +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + +[[package]] +name = "smallvec" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" + [[package]] name = "stlc_type_inference" version = "0.1.0" +dependencies = [ + "lalrpop", + "lalrpop-util", + "logos", +] + +[[package]] +name = "string_cache" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938d512196766101d333398efde81bc1f37b00cb42c2f8350e5df639f040bbbe" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", +] + +[[package]] +name = "syn" +version = "2.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "term" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3bb6001afcea98122260987f8b7b5da969ecad46dbf0b5453702f776b491a41" +dependencies = [ + "home", + "windows-sys 0.52.0", +] + +[[package]] +name = "typenum" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml index 6e03258..c214c82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,8 @@ version = "0.1.0" edition = "2024" [dependencies] +lalrpop-util = "0.22.1" +logos = "0.15.0" + +[build-dependencies] +lalrpop = "0.22.1" diff --git a/README.md b/README.md index 18f3806..aac85be 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ ### Abstractions: ``` -(\ (x T) (e)) +\x:T.e ``` Where `x` is an identifier, `T` a type, and `e` an expression ### Aplication: ``` -(lhs rhs) +lhs rhs ``` Where `lhs` and `rhs` are expressions diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..ca5c283 --- /dev/null +++ b/build.rs @@ -0,0 +1,3 @@ +fn main() { + lalrpop::process_root().unwrap(); +} diff --git a/src/exec/builtins.rs b/src/exec/builtins.rs index 08400e7..419d842 100644 --- a/src/exec/builtins.rs +++ b/src/exec/builtins.rs @@ -9,11 +9,11 @@ use super::DeBrujinAst; #[derive(Clone)] pub enum DeBrujinBuiltInAst { - Abstraction(Ident, Type, Box), // \:1.2 + Abstraction(Ident, TaggedType, Box), // \:1.2 Application(Box, Box), // 0 1 - FreeVariable(String), // x - BoundVariable(usize), // 1 - Constant(Constant), // true | false | n + FreeVariable(String), // x + BoundVariable(usize), // 1 + Constant(Constant), // true | false | n Builtin(Rc), } diff --git a/src/exec/mod.rs b/src/exec/mod.rs index b7d6c9a..c8a746e 100644 --- a/src/exec/mod.rs +++ b/src/exec/mod.rs @@ -9,8 +9,8 @@ pub use builtins::Builtin; use std::{collections::HashMap, rc::Rc}; use crate::{ - Ident, Type, parse::{Ast, Constant}, + types::{Ident, TaggedType}, vec_map::VecMap, }; @@ -48,11 +48,11 @@ impl Ast { #[derive(Debug, Clone, PartialEq)] pub enum DeBrujinAst { - Abstraction(Ident, Type, Box), // \:1.2 - Application(Box, Box), // 0 1 - FreeVariable(String), // x - BoundVariable(usize), // 1 - Constant(Constant), // true | false | n + Abstraction(Ident, TaggedType, Box), // \:1.2 + Application(Box, Box), // 0 1 + FreeVariable(String), // x + BoundVariable(usize), // 1 + Constant(Constant), // true | false | n } impl Into for DeBrujinAst { diff --git a/src/exec/test.rs b/src/exec/test.rs index f11a310..7552893 100644 --- a/src/exec/test.rs +++ b/src/exec/test.rs @@ -1,9 +1,9 @@ use std::{collections::HashMap, rc::Rc}; use crate::{ - PrimitiveType, Type, exec::{DeBrujinAst as DBAst, builtin_definitions::AddOp}, parse::{Ast, Constant}, + types::{PrimitiveType, Type}, }; use super::builtins::Builtin; @@ -12,10 +12,10 @@ use super::builtins::Builtin; fn to_de_brujin_ast_simple() { let input = Ast::Abstraction( "x".to_string(), - PrimitiveType::Nat.into(), + Type::Primitive(PrimitiveType::Nat).into(), Box::new(Ast::Abstraction( "x".to_string(), - PrimitiveType::Nat.into(), + Type::Primitive(PrimitiveType::Nat).into(), Box::new(Ast::Variable("x".to_string())), )), ); @@ -24,10 +24,10 @@ fn to_de_brujin_ast_simple() { de_brujin, DBAst::Abstraction( "x".to_string(), - PrimitiveType::Nat.into(), + Type::Primitive(PrimitiveType::Nat).into(), Box::new(DBAst::Abstraction( "x".to_string(), - PrimitiveType::Nat.into(), + Type::Primitive(PrimitiveType::Nat).into(), Box::new(DBAst::BoundVariable(1)) )) ) @@ -39,7 +39,7 @@ fn de_brujin_beta_reduce() { let input = Ast::Application( Box::new(Ast::Abstraction( "x".to_string(), - Type::arrow(PrimitiveType::Nat, PrimitiveType::Nat), + Type::arrow(PrimitiveType::Nat, PrimitiveType::Nat).into(), Box::new(Ast::Application( Box::new(Ast::Variable("x".to_string())), Box::new(Ast::Constant(Constant::Nat(5))), @@ -63,7 +63,7 @@ fn to_and_from_de_brujin_is_id() { let input = Ast::Application( Box::new(Ast::Abstraction( "x".to_string(), - Type::arrow(PrimitiveType::Nat, PrimitiveType::Nat), + Type::arrow(PrimitiveType::Nat, PrimitiveType::Nat).into(), Box::new(Ast::Application( Box::new(Ast::Variable("x".to_string())), Box::new(Ast::Constant(Constant::Nat(5))), diff --git a/src/inference/mod.rs b/src/inference/mod.rs index 204c348..a005a5e 100644 --- a/src/inference/mod.rs +++ b/src/inference/mod.rs @@ -1,9 +1,9 @@ use std::{collections::HashMap, convert::Infallible, error::Error, rc::Rc}; use crate::{ - DeBrujinAst, Ident, PrimitiveType, Type, + DeBrujinAst, parse::{Ast, Constant}, - types::{TaggedType, TypeTag}, + types::{Ident, PrimitiveType, TaggedType, Type, TypeTag}, vec_map::VecMap, }; @@ -38,15 +38,13 @@ fn infer_type_debrujin_int( gamma_ref.insert(1, arg_type.clone().into()); let out_type = infer_type_debrujin_int(gamma_free, gamma_bound, *ast)?; - let typ = Type::Arrow( - Box::new(arg_type), - Box::new( - out_type - .to_concrete() - .ok_or(InferError::ExpectedConreteType)?, - ), + // TODO: Fix this hack + let typ = arg_type.make_arrow( + out_type + .to_concrete() + .ok_or(InferError::ExpectedConreteType)?, ); - Ok(typ.into()) + Ok(typ) } DeBrujinAst::Application(lhs, rhs) => { let left_type = infer_type_debrujin_int(gamma_free, gamma_bound.clone(), *lhs)?; diff --git a/src/inference/test.rs b/src/inference/test.rs index ad50c5e..130ad96 100644 --- a/src/inference/test.rs +++ b/src/inference/test.rs @@ -1,10 +1,10 @@ use std::{collections::HashMap, rc::Rc}; use crate::{ - DeBrujinAst, PrimitiveType, Type, + DeBrujinAst, inference::infer_type, parse::{Ast, Constant}, - types::{TaggedType, TypeTag}, + types::{PrimitiveType, TaggedType, Type, TypeTag}, }; #[test] diff --git a/src/lib.rs b/src/lib.rs index f1dc7e8..6b18129 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,9 +8,7 @@ mod parse; mod types; mod vec_map; -pub use exec::Builtin; -pub use exec::DeBrujinAst; -pub use exec::builtin; +pub use exec::{Builtin, DeBrujinAst, builtin}; pub use inference::infer_type; -pub use parse::{Ast, ParseError, is_ident, parse, parse_type, sexpr::parse_string}; -use types::{Ident, PrimitiveType, Type}; +use lalrpop_util::lalrpop_mod; +pub use parse::{Ast, parse_ast_str, parse_type_str}; diff --git a/src/main.rs b/src/main.rs index eb3ddb1..fe364d7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,7 @@ use std::{ }; use stlc_type_inference::{ - Ast, Builtin, DeBrujinAst, builtin, infer_type, is_ident, parse, parse_string, parse_type, + Ast, Builtin, DeBrujinAst, builtin, infer_type, parse_ast_str, parse_type_str, }; macro_rules! repl_err { @@ -34,12 +34,12 @@ fn main() { if let Some(tl) = line.strip_prefix(':') { if let Some((cmd, expr)) = tl.split_once(' ') { match cmd { - "t" => match parse(expr) { + "t" => match parse_ast_str(expr) { Ok(a) => match infer_type(&gamma, a.into()) { Ok(t) => println!("{t}"), Err(e) => repl_err!("Could not infer type {e:?}"), }, - Err(e) => repl_err!("Parse error {e:?}"), + Err(e) => repl_err!("Parse error {e}"), }, "ctx" => { if let Some((ident, typ)) = expr.split_once(':') { @@ -48,14 +48,9 @@ fn main() { if !is_ident(&ident) { repl_err!("{ident} is not a valid identifer"); } - let typ_ast = match parse_string(typ) { + let typ = match parse_type_str(&typ) { Ok(t) => t, - Err(e) => repl_err!("Could not parse the type: {e:?}"), - }; - - let typ = match parse_type(&typ_ast) { - Ok(t) => t, - Err(e) => repl_err!("type could not be parsed {e:?}"), + Err(e) => repl_err!("type could not be parsed {e}"), }; if !gamma.contains_key(&ident) { @@ -76,9 +71,9 @@ fn main() { } } } else { - let ast = match parse(&line) { + let ast = match parse_ast_str(&line) { Ok(a) => a, - Err(e) => repl_err!("Parse error {e:?}"), + Err(e) => repl_err!("Parse error {e}"), }; let ast: DeBrujinAst = ast.into(); @@ -95,3 +90,7 @@ fn main() { stdout().flush().unwrap(); } } + +fn is_ident(s: &str) -> bool { + s.starts_with(|c: char| c.is_alphabetic()) && s.chars().all(|c| c.is_alphanumeric()) +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 6141b0b..9c3eee5 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,32 +1,34 @@ -use std::fmt::Display; +use std::{ + fmt::Display, + num::{ParseFloatError, ParseIntError}, + str::ParseBoolError, +}; -use sexpr::{Sexpr, parse_string}; +use crate::types::{Ident, PrimitiveType, TaggedType, Type, TypeTag}; +use lalrpop_util::{ParseError as LALRPopError, lalrpop_mod}; +use tokenize::Lexer; -use crate::{PrimitiveType, Type, types::Ident}; - -pub mod sexpr; #[cfg(test)] mod test; -#[derive(Debug)] +mod tokenize; + +lalrpop_mod!(parser); + +#[derive(Default, Debug, Clone, PartialEq)] pub enum ParseError { - UnexpectedParenClose, - UnexpectedEof, - TrailingTokens, - TrailingExpr, - ToplevelSymbol, - InvalidSymbol, - UnexpectedEndOfList, - UnknownType, - ExpectedList, - ExpectedSymbol, - ExpectedLambda, - ExpectedIdent, - ExpectedType, - NotAType, - ExpectedBody, - ExpectedArrow, - ExpectedOneOf(Vec, String), + UnknownTypeTag(String), + InvalidInteger(ParseIntError), + InvalidFloat(ParseFloatError), + InvalidBool(ParseBoolError), + #[default] + InvalidToken, +} + +impl Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{self:?}") //TODO better impl + } } #[derive(Debug, Clone, PartialEq)] @@ -38,185 +40,64 @@ pub enum Constant { #[derive(Debug, Clone, PartialEq)] pub enum Ast { - Abstraction(Ident, Type, Box), // \0:1.2 - Application(Box, Box), // 0 1 - Variable(Ident), // x - Constant(Constant), // true | false | n -} - -impl Display for Ast { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Ast::Abstraction(var, typ, ast) => write!(f, "(\\ ({var} {typ}) ({ast}))"), - Ast::Application(lhs, rhs) => write!(f, "({lhs} {rhs})"), - Ast::Variable(v) => write!(f, "{v}"), - Ast::Constant(constant) => write!(f, "{constant}"), - } - } -} - -impl Display for Constant { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Constant::Nat(n) => write!(f, "{n}"), - Constant::Bool(b) => write!(f, "{b}"), - Constant::Float(fl) => write!(f, "{fl}"), - } - } -} - -fn expect_symbol(ast: Option) -> Result { - match ast { - Some(Sexpr::Symbol(s)) => Ok(s), - Some(l) => Err(ParseError::ExpectedSymbol), - None => Err(ParseError::ExpectedSymbol), - } + Abstraction(Ident, TaggedType, Box), // \0:1.2 + Application(Box, Box), // 0 1 + Variable(Ident), // x + Constant(Constant), // true | false | n } -fn expect_ident(ast: Option) -> Result { - let sym = expect_symbol(ast)?; - if is_ident(&sym) { - Ok(sym) - } else { - Err(ParseError::ExpectedIdent) - } -} - -fn expect_list(ast: Option) -> Result, ParseError> { - match ast { - Some(Sexpr::List(l)) => Ok(l), - Some(l) => Err(ParseError::ExpectedList), - None => Err(ParseError::ExpectedList), - } +pub fn parse_ast_str(src: &str) -> Result> { + let parser = parser::AstParser::new(); + parser.parse(Lexer::new(src)) } -fn expect_one_of(options: &[T], item: String) -> Result -where - T: PartialEq + Into + Clone, -{ - if options.iter().find(|e| **e == item).is_some() { - Ok(item) - } else { - Err(ParseError::ExpectedOneOf( - options - .iter() - .map(|t| Into::::into(t.clone())) - .collect(), - item, - )) - } +pub fn parse_type_str( + src: &str, +) -> Result> { + let parser = parser::TaggedTypeParser::new(); + parser.parse(Lexer::new(src)) } -fn expect_empty>(mut iter: I) -> Result<(), ParseError> { - match iter.next() { - Some(_) => Err(ParseError::TrailingTokens), - None => Ok(()), - } -} +impl TryFrom for TypeTag { + type Error = ParseError; -pub fn parse(input: &str) -> Result { - let ast = parse_string(input)?; - match ast { - Sexpr::Symbol(s) => parse_symbol(s), - list => parse_intern(list), - } -} - -fn parse_intern(ast: Sexpr) -> Result { - match ast { - Sexpr::Symbol(s) => parse_symbol(s), - Sexpr::List(sexprs) => { - let mut iter = sexprs.into_iter(); - match iter.next() { - Some(Sexpr::Symbol(sym)) => { - if sym == "\\" { - let bind = expect_list(iter.next())?; - let mut bind = bind.into_iter(); - let ident = expect_ident(bind.next())?; - let typ = parse_type(&bind.next().ok_or(ParseError::ExpectedType)?)?; - expect_empty(bind)?; - let ast = Ast::Abstraction( - ident, - typ, - Box::new(parse_intern(iter.next().ok_or(ParseError::ExpectedBody)?)?), - ); - expect_empty(iter)?; - Ok(ast) - } else { - let ast = parse_symbol(sym)?; - if let Some(e) = iter.next() { - let rhs = parse_intern(e)?; - expect_empty(iter)?; - Ok(Ast::Application(Box::new(ast), Box::new(rhs))) - } else { - Ok(ast) - } - } - } - Some(app_left) => { - if let Some(app_right) = iter.next() { - expect_empty(iter)?; - Ok(Ast::Application( - Box::new(parse_intern(app_left)?), - // Make it back into an Sexpr so we can feed it to parse intern - Box::new(parse_intern(app_right)?), - )) - } else { - Err(ParseError::UnexpectedEndOfList) - } - } - None => Err(ParseError::UnexpectedEndOfList), - } + fn try_from(value: String) -> Result { + match value.as_str() { + "Num" => Ok(TypeTag::Num), + "Any" => Ok(TypeTag::Any), + _ => Err(ParseError::UnknownTypeTag(value)), } } } -fn parse_symbol(s: String) -> Result { - if let Ok(n) = s.parse::() { - Ok(Ast::Constant(Constant::Nat(n))) - } else if let Ok(f) = s.parse::() { - Ok(Ast::Constant(Constant::Float(f))) - } else if let Ok(b) = s.parse::() { - Ok(Ast::Constant(Constant::Bool(b))) - } else if is_ident(&s) { - Ok(Ast::Variable(s)) - } else { - Err(ParseError::InvalidSymbol) - } -} - -pub fn is_ident(s: &str) -> bool { - s.starts_with(|c: char| c.is_alphabetic()) && s.chars().all(|c| c.is_alphanumeric()) -} - -pub fn parse_type(ast: &Sexpr) -> Result { - match ast { - Sexpr::Symbol(s) => parse_prim_type(s), - Sexpr::List(sexprs) => parse_type_list(sexprs), +impl Type { + pub fn from_name(name: String) -> Self { + match name.as_str() { + "Nat" => Self::Primitive(PrimitiveType::Nat), + "Float" => Self::Primitive(PrimitiveType::Float), + "Bool" => Self::Primitive(PrimitiveType::Bool), + a => a.into(), + } } } -fn parse_type_list(typ: &[Sexpr]) -> Result { - let Some(t) = typ.get(0) else { todo!() }; - - if typ.get(1).is_some() { - let arr = expect_symbol(typ.get(1).cloned())?; - if arr != "->" { - return Err(ParseError::ExpectedArrow); +impl Display for Ast { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Ast::Abstraction(var, typ, ast) => write!(f, "(\\{var}:{typ}.{ast})"), + Ast::Application(lhs, rhs) => write!(f, "{lhs} {rhs}"), + Ast::Variable(v) => write!(f, "{v}"), + Ast::Constant(constant) => write!(f, "{constant}"), } - Ok(Type::Arrow( - Box::new(parse_type(t)?), - Box::new(parse_type_list(&typ[2..])?), - )) - } else { - parse_type(t) } } -fn parse_prim_type(typ: &str) -> Result { - match typ { - "Bool" => Ok(Type::Primitive(PrimitiveType::Bool)), - "Nat" => Ok(Type::Primitive(PrimitiveType::Nat)), - _ => Err(ParseError::UnknownType), +impl Display for Constant { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Constant::Nat(n) => write!(f, "{n}"), + Constant::Bool(b) => write!(f, "{b}"), + Constant::Float(fl) => write!(f, "{fl}"), + } } } diff --git a/src/parse/sexpr.rs b/src/parse/sexpr.rs deleted file mode 100644 index 60d0110..0000000 --- a/src/parse/sexpr.rs +++ /dev/null @@ -1,92 +0,0 @@ -use std::iter::Peekable; -use std::ops::{Deref, RangeInclusive}; -use std::usize; -use std::vec::IntoIter; - -use super::ParseError; - -#[derive(Debug, PartialEq, Clone)] -pub enum Token { - LeftParen, - RightParen, - Symbol(String), -} - -#[derive(Debug, PartialEq, Clone)] -pub enum Sexpr { - Symbol(String), - List(Vec), -} - -impl Sexpr { - pub fn symbol(self) -> Option { - match self { - Sexpr::Symbol(item) => Some(item), - _ => None, - } - } - - pub fn list(self) -> Option> { - match self { - Sexpr::List(item) => Some(item), - _ => None, - } - } -} - -fn tokenize(input: &str) -> Vec { - let mut tokens = Vec::new(); - // let mut chars = input.chars().peekable(); - let mut chars = input.chars().peekable(); - while let Some(c) = chars.next() { - match c { - '(' => tokens.push(Token::LeftParen), - ')' => tokens.push(Token::RightParen), - _ if c.is_whitespace() => (), - _ => { - let mut symbol = c.to_string(); - while let Some(c) = chars.peek() { - if c.is_whitespace() || *c == '(' || *c == ')' { - break; - } - symbol.push(*c); - chars.next(); - } - tokens.push(Token::Symbol(symbol)); - } - } - } - tokens -} - -fn parse_expr(tokens: &mut Peekable>) -> Result { - match tokens.next() { - Some(Token::LeftParen) => { - let mut list = Vec::new(); - while !matches!(tokens.peek(), Some(Token::RightParen,)) { - list.push(parse_expr(tokens)?); - } - let Some(Token::RightParen) = tokens.next() else { - unreachable!() - }; - Ok(Sexpr::List(list)) - } - Some(Token::RightParen) => Err(ParseError::UnexpectedParenClose), - Some(Token::Symbol(s)) => Ok(Sexpr::Symbol(s)), - None => Err(ParseError::UnexpectedEof), - } -} - -fn parse(tokens: Vec) -> Result { - let mut tokens = tokens.into_iter().peekable(); - let ast = parse_expr(&mut tokens)?; - if tokens.peek().is_some() { - return Err(ParseError::TrailingTokens); - }; - Ok(ast) -} - -pub fn parse_string(src: &str) -> Result { - let tokens = tokenize(src); - parse(tokens) -} diff --git a/src/parse/test.rs b/src/parse/test.rs index a250a00..a096ec2 100644 --- a/src/parse/test.rs +++ b/src/parse/test.rs @@ -1,96 +1,254 @@ use crate::{ - PrimitiveType, Type, parse::{ Ast, Constant, - sexpr::{Sexpr, parse_string}, + tokenize::{self, Lexer}, }, + types::{PrimitiveType, TaggedType, Type, TypeTag}, }; -use super::{parse, parse_type}; +use super::parser; #[test] -fn parse_to_sexpr() { - let input = "((\\x:Nat.x) (5))"; - let parsed = parse_string(input).unwrap(); - assert_eq!( - parsed, - Sexpr::List(vec![ - Sexpr::List(vec![Sexpr::Symbol("\\x:Nat.x".to_string())]), - Sexpr::List(vec![Sexpr::Symbol("5".to_string())]) - ]) - ); -} +fn parse_constant() { + let parser = parser::AstParser::new(); + let ast = parser.parse(Lexer::new("5")).unwrap(); + assert_eq!(ast, Ast::Constant(Constant::Nat(5))); -#[test] -fn parse_prim_type() { - let input = Sexpr::Symbol("Nat".to_string()); - let parsed = parse_type(&input).unwrap(); - assert_eq!(parsed, Type::Primitive(PrimitiveType::Nat)) + let ast = parser.parse(Lexer::new("3.14")).unwrap(); + assert_eq!(ast, Ast::Constant(Constant::Float(3.14))); + + let ast = parser.parse(Lexer::new("true")).unwrap(); + assert_eq!(ast, Ast::Constant(Constant::Bool(true))); } #[test] -fn parse_simpl_arr_type() { - let input = Sexpr::List(vec![ - Sexpr::Symbol("Nat".to_string()), - Sexpr::Symbol("->".to_string()), - Sexpr::Symbol("Nat".to_string()), - ]); - let parsed = parse_type(&input).unwrap(); - assert_eq!(parsed, Type::arrow(PrimitiveType::Nat, PrimitiveType::Nat)) +fn parse_variable() { + let parser = parser::AstParser::new(); + let ast = parser.parse(Lexer::new("x")).unwrap(); + assert_eq!(ast, Ast::Variable("x".to_string())); } #[test] -fn parse_apply_arr_type() { - let input = Sexpr::List(vec![ - Sexpr::List(vec![ - Sexpr::Symbol("Nat".to_string()), - Sexpr::Symbol("->".to_string()), - Sexpr::Symbol("Nat".to_string()), - ]), - Sexpr::Symbol("->".to_string()), - Sexpr::Symbol("Nat".to_string()), - Sexpr::Symbol("->".to_string()), - Sexpr::Symbol("Nat".to_string()), - ]); - let parsed = parse_type(&input).unwrap(); +fn parse_type() { + let parser = parser::TaggedTypeParser::new(); + let ast = parser.parse(Lexer::new("Num a => a -> a -> a")).unwrap(); + assert_eq!( + ast, + TaggedType::Tagged( + TypeTag::Num, + "a".to_string(), + Box::new(TaggedType::Concrete(Type::arrow( + "a", + Type::arrow("a", "a") + ))) + ) + ); + + let ast = parser + .parse(Lexer::new("Any a => (a -> a) -> a -> a")) + .unwrap(); assert_eq!( - parsed, - Type::arrow( - Type::arrow(PrimitiveType::Nat, PrimitiveType::Nat), - Type::arrow(PrimitiveType::Nat, PrimitiveType::Nat) + ast, + TaggedType::Tagged( + TypeTag::Any, + "a".to_string(), + Box::new(TaggedType::Concrete(Type::arrow( + Type::arrow("a", "a"), + Type::arrow("a", "a") + ))) ) - ) + ); + + let ast = parser + .parse(Lexer::new("Any a => Any b => (a -> b) -> a -> b")) + .unwrap(); + assert_eq!( + ast, + TaggedType::Tagged( + TypeTag::Any, + "a".to_string(), + Box::new(TaggedType::Tagged( + TypeTag::Any, + "b".to_string(), + Box::new(TaggedType::Concrete(Type::arrow( + Type::arrow("a", "b"), + Type::arrow("a", "b") + ))) + )) + ) + ); + + let ast = parser.parse(Lexer::new("Nat -> Nat")).unwrap(); + assert_eq!( + ast, + TaggedType::Concrete(Type::arrow(PrimitiveType::Nat, PrimitiveType::Nat)) + ); } #[test] fn parse_abstraction() { - let input = "(\\ (x (Nat -> Nat)) (x 5))"; - let parsed = parse(input).unwrap(); + let parser = parser::AstParser::new(); + let ast = parser.parse(Lexer::new(r"\x:Nat.x")).unwrap(); assert_eq!( - parsed, + ast, Ast::Abstraction( "x".to_string(), - Type::arrow(PrimitiveType::Nat, PrimitiveType::Nat), - Box::new(Ast::Application( - Box::new(Ast::Variable("x".to_string())), - Box::new(Ast::Constant(Constant::Nat(5))) + Type::Primitive(PrimitiveType::Nat).into(), + Box::new(Ast::Variable("x".to_string())) + ) + ); + + let ast = parser.parse(Lexer::new(r"\x:Nat.\y:Nat.x")).unwrap(); + assert_eq!( + ast, + Ast::Abstraction( + "x".to_string(), + Type::Primitive(PrimitiveType::Nat).into(), + Box::new(Ast::Abstraction( + "y".to_string(), + Type::Primitive(PrimitiveType::Nat).into(), + Box::new(Ast::Variable("x".to_string())) )) ) - ) + ); + + let ast = parser + .parse(Lexer::new(r"\x:Any a => a.\y:Any b => b.x")) + .unwrap(); + assert_eq!( + ast, + Ast::Abstraction( + "x".to_string(), + TaggedType::Tagged( + TypeTag::Any, + "a".to_string(), + Box::new(TaggedType::Concrete("a".into())) + ), + Box::new(Ast::Abstraction( + "y".to_string(), + TaggedType::Tagged( + TypeTag::Any, + "b".to_string(), + Box::new(TaggedType::Concrete("b".into())) + ), + Box::new(Ast::Variable("x".to_string())) + )) + ) + ); } #[test] fn parse_application() { - let input = "((add 5) 6)"; - let parsed = parse(input).unwrap(); + let parser = parser::AstParser::new(); + let ast = parser.parse(Lexer::new(r"(\x:Nat.x) 5")).unwrap(); assert_eq!( - parsed, + ast, + Ast::Application( + Box::new(Ast::Abstraction( + "x".to_string(), + Type::Primitive(PrimitiveType::Nat).into(), + Box::new(Ast::Variable("x".to_string())) + )), + Box::new(Ast::Constant(Constant::Nat(5))) + ) + ); + + let ast = parser.parse(Lexer::new(r"add 1")).unwrap(); + assert_eq!( + ast, + Ast::Application( + Box::new(Ast::Variable("add".to_string())), + Box::new(Ast::Constant(Constant::Nat(1))) + ), + ); + + let ast = parser.parse(Lexer::new(r"(add 1) 2")).unwrap(); + assert_eq!( + ast, + Ast::Application( + Box::new(Ast::Application( + Box::new(Ast::Variable("add".to_string())), + Box::new(Ast::Constant(Constant::Nat(1))) + )), + Box::new(Ast::Constant(Constant::Nat(2))) + ), + ); + + let ast = parser.parse(Lexer::new(r"add 1 2")).unwrap(); + assert_eq!( + ast, + Ast::Application( + Box::new(Ast::Application( + Box::new(Ast::Variable("add".to_string())), + Box::new(Ast::Constant(Constant::Nat(1))) + )), + Box::new(Ast::Constant(Constant::Nat(2))) + ), + ); + + let ast = parser.parse(Lexer::new(r"add 1 (x y)")).unwrap(); + assert_eq!( + ast, Ast::Application( Box::new(Ast::Application( Box::new(Ast::Variable("add".to_string())), - Box::new(Ast::Constant(Constant::Nat(5))) + Box::new(Ast::Constant(Constant::Nat(1))) + )), + Box::new(Ast::Application( + Box::new(Ast::Variable("x".to_string())), + Box::new(Ast::Variable("y".to_string())), )), - Box::new(Ast::Constant(Constant::Nat(6))) + ), + ); + + let ast = parser + .parse(Lexer::new( + r"(\x:Any a => a -> a.\y:Num a => a. add 1 (x y)) (\x:Any a => a.x) 2", + )) + .unwrap(); + assert_eq!( + ast, + Ast::Application( + Box::new(Ast::Application( + Box::new(Ast::Abstraction( + "x".to_string(), + TaggedType::Tagged( + TypeTag::Any, + "a".to_string(), + Box::new(TaggedType::Concrete(Type::arrow("a", "a"))) + ), + Box::new(Ast::Abstraction( + "y".to_string(), + TaggedType::Tagged( + TypeTag::Num, + "a".to_string(), + Box::new(TaggedType::Concrete("a".into())) + ), + Box::new(Ast::Application( + Box::new(Ast::Application( + Box::new(Ast::Variable("add".to_string())), + Box::new(Ast::Constant(Constant::Nat(1))) + )), + Box::new(Ast::Application( + Box::new(Ast::Variable("x".to_string())), + Box::new(Ast::Variable("y".to_string())), + )) + )) + )), + )), + Box::new(Ast::Abstraction( + "x".to_string(), + TaggedType::Tagged( + TypeTag::Any, + "a".to_string(), + Box::new(TaggedType::Concrete("a".into())) + ), + Box::new(Ast::Variable("x".to_string())) + )) + )), + Box::new(Ast::Constant(Constant::Nat(2))) ) - ) + ); } + +// (\x:Any a => a -> a.\y:Num a => a. + 1 (x y)) (\x:Any a => a.x) 2 diff --git a/src/parse/tokenize.rs b/src/parse/tokenize.rs new file mode 100644 index 0000000..a79093f --- /dev/null +++ b/src/parse/tokenize.rs @@ -0,0 +1,141 @@ +use std::{ + fmt::{self, Display, Formatter}, + num::{ParseFloatError, ParseIntError}, + str::ParseBoolError, +}; + +use logos::{Logos, SpannedIter}; + +use super::ParseError; + +impl From for ParseError { + fn from(err: ParseIntError) -> Self { + Self::InvalidInteger(err) + } +} + +impl From for ParseError { + fn from(err: ParseFloatError) -> Self { + Self::InvalidFloat(err) + } +} + +impl From for ParseError { + fn from(err: ParseBoolError) -> Self { + Self::InvalidBool(err) + } +} + +#[derive(Logos, Debug, Clone)] +#[logos(skip r"[ \t\n\r\f]+", error = ParseError)] +pub enum Token { + // #[regex(r#""([^\"\\]|\\.)*""#, |lex| {let slice = lex.slice(); slice[1..slice.len() - 1].to_string()})] + // String(String), + #[regex(r#"\d+"#, |lex| lex.slice().parse::())] + Nat(usize), + #[regex(r#"-?\d+\.\d+"#, |lex| lex.slice().parse::())] + Float(f64), + #[regex(r#"(true|false)"#, |lex| lex.slice().parse::())] + Bool(bool), + + #[regex(r#"[_a-zA-Z][_a-zA-Z0-9]*'*"#, |lex| lex.slice().to_string())] + Ident(String), + + #[token(r"\")] + Lambda, + #[token("=>")] + FatArrow, + #[token("->")] + ThinArrow, + #[token(":")] + Colon, + #[token(".")] + Period, + + #[token("=")] + Equals, + #[token("<>")] + NotEquals, + #[token("+")] + Plus, + #[token("-")] + Minus, + #[token("*")] + Star, + #[token("/")] + Slash, + #[token("^")] + Hat, + #[token("<")] + Less, + #[token(">")] + Greater, + #[token("<=")] + LessEq, + #[token(">=")] + GreaterEq, + #[token(r"/\")] + Conjunction, + #[token(r"\/")] + Disjunction, + + #[token("(")] + ParenOpen, + #[token(")")] + ParenClose, +} + +pub type Spanned = Result<(Loc, Tok, Loc), Error>; + +pub struct Lexer<'input> { + // instead of an iterator over characters, we have a token iterator + token_stream: SpannedIter<'input, Token>, +} +impl<'input> Lexer<'input> { + pub fn new(input: &'input str) -> Self { + // the Token::lexer() method is provided by the Logos trait + Self { + token_stream: Token::lexer(input).spanned(), + } + } +} +impl<'input> Iterator for Lexer<'input> { + type Item = Spanned; + + fn next(&mut self) -> Option { + self.token_stream + .next() + .map(|(token, span)| Ok((span.start, token?, span.end))) + } +} + +impl Display for Token { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Token::Lambda => write!(f, r"\"), + Token::FatArrow => write!(f, "=>"), + Token::ThinArrow => write!(f, "->"), + Token::Colon => write!(f, ":"), + Token::Period => write!(f, "."), + Token::Equals => write!(f, "="), + Token::NotEquals => write!(f, "<>"), + Token::Plus => write!(f, "+"), + Token::Minus => write!(f, "-"), + Token::Star => write!(f, "*"), + Token::Slash => write!(f, "/"), + Token::Hat => write!(f, "^"), + Token::Less => write!(f, "<"), + Token::Greater => write!(f, ">"), + Token::LessEq => write!(f, "<="), + Token::GreaterEq => write!(f, ">="), + Token::Conjunction => write!(f, r"/\"), + Token::Disjunction => write!(f, r"\/"), + Token::ParenOpen => write!(f, "("), + Token::ParenClose => write!(f, ")"), + Token::Nat(n) => write!(f, "{n}"), + Token::Float(n) => write!(f, "{n}"), + Token::Bool(b) => write!(f, "{b}"), + Token::Ident(i) => write!(f, "{i}"), + } + } +} diff --git a/src/parser.lalrpop b/src/parser.lalrpop new file mode 100644 index 0000000..e66b50c --- /dev/null +++ b/src/parser.lalrpop @@ -0,0 +1,86 @@ +use crate::{ + parse::{Ast, Constant, tokenize as lexer, ParseError as PError}, + types::{TaggedType, TypeTag, Type, PrimitiveType}, +}; +use lalrpop_util::ParseError; + +grammar; + +extern { + type Location = usize; + type Error = PError; + + enum lexer::Token { + Nat => lexer::Token::Nat(), + Float => lexer::Token::Float(), + Bool => lexer::Token::Bool(), + + Ident => lexer::Token::Ident(), + + r"\" => lexer::Token::Lambda, + "=>" => lexer::Token::FatArrow, + "->" => lexer::Token::ThinArrow, + ":" => lexer::Token::Colon, + "." => lexer::Token::Period, + + "=" => lexer::Token::Equals, + "<>" => lexer::Token::NotEquals, + "+" => lexer::Token::Plus, + "-" => lexer::Token::Minus, + "*" => lexer::Token::Star, + "/" => lexer::Token::Slash, + "^" => lexer::Token::Hat, + "<" => lexer::Token::Less, + ">" => lexer::Token::Greater, + "<=" => lexer::Token::LessEq, + ">=" => lexer::Token::GreaterEq, + r"/\" => lexer::Token::Conjunction, + r"\/" => lexer::Token::Disjunction, + + "(" => lexer::Token::ParenOpen, + ")" => lexer::Token::ParenClose, + } + +} + +pub Ast: Ast = { + Term => <>, + r"\" ":" "." => Ast::Abstraction(x, t, Box::new(ast)), +}; + + +Term: Ast = { + #[precedence(level="0")] + Ident => Ast::Variable(<>), + #[precedence(level="0")] + Constant => Ast::Constant(<>), + #[precedence(level="1")] + "(" ")" => <>, + #[precedence(level="2")] #[assoc(side = "left")] + => Ast::Application(Box::new(lhs), Box::new(rhs)), +}; + +Constant: Constant = { + Nat => Constant::Nat(<>), + Float => Constant::Float(<>), + Bool => Constant::Bool(<>), +}; + +pub TaggedType: TaggedType = { + "=>" => TaggedType::Tagged(t, i, Box::new(tt)), + => TaggedType::Concrete(t), +}; + +TypeTag: TypeTag = Ident =>? TypeTag::try_from(<>).map_err(|e| ParseError::User{ error: e }); + +Type: Type = { BasicType, ArrowType }; + +BasicType: Type = Ident => Type::from_name(<>); + +ArrowType: Type = { + #[precedence(level="0")] + "(" ")" "->" => Type::Arrow(Box::new(l), Box::new(r)), + #[precedence(level="1")] #[assoc(side = "right")] + "->" => Type::Arrow(Box::new(l), Box::new(r)), +}; + diff --git a/src/types/mod.rs b/src/types/mod.rs index a4aaafc..4a5766f 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -92,6 +92,15 @@ impl TaggedType { } } + pub fn make_arrow(self, rhs: Type) -> Self { + match self { + TaggedType::Tagged(type_tag, ident, tagged_type) => { + TaggedType::Tagged(type_tag, ident, Box::new(tagged_type.make_arrow(rhs))) + } + TaggedType::Concrete(t) => Type::arrow(t, rhs).into(), + } + } + pub fn specialize(self, ident: &str, typ: &Type) -> TaggedType { match self { TaggedType::Tagged(_, i, tagged_type) if i == ident => {