My research is in natural language processing, the subfield of computer science that aims to enable computers to understand and produce human language. I focus mainly on language translation, and am interested in syntactic parsing and other areas as well.
Teaching
Recent and selected publications
Andy Yang and David Chiang.
Counting like transformers: compiling temporal counting logic into softmax transformers.
arXiv:2404.04393.
PDF
BibTeX
@misc{yang-chiang-2024-counting,
author = "Yang, Andy and Chiang, David",
title = "Counting Like Transformers: Compiling Temporal Counting Logic Into Softmax Transformers",
note = "{arXiv}:2404.04393"
}
Lena Strobl, Dana Angluin, David Chiang, Jonathan Rawski, and Ashish Sabharwal.
Transformers as transducers.
2024.
arXiv:2404.02040.
PDF
BibTeX
@misc{strobl-etal-2024-transducers,
author = "Strobl, Lena and Angluin, Dana and Chiang, David and Rawski, Jonathan and Sabharwal, Ashish",
title = "Transformers as Transducers",
year = "2024",
note = "{arXiv}:2404.02040"
}
Fahim Faisal, Orevaoghene Ahia, Aarohi Srivastava, Kabir Ahuja, David Chiang, Yulia Tsvetkov, and Antonios Anastasopoulos.
DIALECTBENCH: a
NLP benchmark for dialects, varieties, and closely-related languages.
2024.
arXiv:2403.11009.
PDF
BibTeX
@misc{faisal+:2023,
author = "Faisal, Fahim and Ahia, Orevaoghene and Srivastava, Aarohi and Ahuja, Kabir and Chiang, David and Tsvetkov, Yulia and Anastasopoulos, Antonios",
title = "{DIALECTBENCH}: A {NLP} Benchmark for Dialects, Varieties, and Closely-Related Languages",
year = "2024",
note = "{arXiv}:2403.11009"
}
Stephen Bothwell, Brian DuSell, David Chiang, and Brian Krostenko.
PILA: a historical-linguistic dataset of
Proto-
Italic and
Latin.
In
Proc. LREC-COLING. 2024.
To appear.
BibTeX
@inproceedings{bothwell+:2024,
author = "Bothwell, Stephen and DuSell, Brian and Chiang, David and Krostenko, Brian",
title = "{PILA}: A Historical-Linguistic Dataset of {P}roto-{I}talic and {L}atin",
booktitle = "Proc. LREC-COLING",
year = "2024",
note = "To appear"
}
Chihiro Taguchi, Jefferson Saransig, Dayana Vel
ásquez, and David Chiang.
KILLKAN: the automatic speech recognition dataset for
Kichwa with morphosyntactic information.
In
Proc. LREC-COLING. 2024.
To appear.
BibTeX
@inproceedings{taguchi+:2024,
author = "Taguchi, Chihiro and Saransig, Jefferson and Vel{\'a}squez, Dayana and Chiang, David",
title = "{KILLKAN}: The Automatic Speech Recognition Dataset for {K}ichwa with Morphosyntactic Information",
booktitle = "Proc. LREC-COLING",
year = "2024",
note = "To appear"
}
Dana Angluin, David Chiang, and Andy Yang.
Masked hard-attention transformers and
Boolean
RASP recognize exactly the star-free languages.
2023.
arXiv:2310.13897.
PDF
BibTeX
@misc{angluin+:2023,
author = "Angluin, Dana and Chiang, David and Yang, Andy",
title = "Masked Hard-Attention Transformers and {B}oolean {RASP} Recognize Exactly the Star-Free Languages",
year = "2023",
note = "{arXiv}:2310.13897"
}
Lena Strobl, William Merrill, Gail Weiss, David Chiang, and Dana Angluin.
What formal languages can transformers express?
A survey.
Transactions of the Association for Computational Linguistics, 2024.
To appear.
PDF
BibTeX
@article{strobl-etal-2024-survey,
author = "Strobl, Lena and Merrill, William and Weiss, Gail and Chiang, David and Angluin, Dana",
title = "What Formal Languages Can Transformers Express? {A} Survey",
year = "2024",
journal = "Transactions of the Association for Computational Linguistics",
note = "To appear"
}
Brian DuSell and David Chiang.
Stack attention: improving the ability of transformers to model hierarchical patterns.
In
Proc. ICLR. 2024.
PDF
BibTeX
@inproceedings{dusell+chiang:2024attention,
author = "DuSell, Brian and Chiang, David",
title = "Stack Attention: Improving the Ability of Transformers to Model Hierarchical Patterns",
year = "2024",
booktitle = "Proc. ICLR",
arxiv_url = "https://arxiv.org/abs/2310.01749"
}
Stephen Bothwell, Justin DeBenedetto, Theresa Crnkovich, Hildegund M
üller, and David Chiang.
Introducing rhetorical parallelism detection: a new task with datasets, metrics, and baselines.
In
Proc. EMNLP, 5007–5039. 2023.
doi:10.18653/v1/2023.emnlp-main.305.
PDF
BibTeX
@inproceedings{bothwell+:2023,
author = {Bothwell, Stephen and DeBenedetto, Justin and Crnkovich, Theresa and M{\"u}ller, Hildegund and Chiang, David},
title = "Introducing Rhetorical Parallelism Detection: A New Task with Datasets, Metrics, and Baselines",
arxiv_url = "https://arxiv.org/abs/2312.00100",
booktitle = "Proc. EMNLP",
year = "2023",
doi = "10.18653/v1/2023.emnlp-main.305",
pages = "5007--5039"
}
Alexandra Butoi, Tim Vieira, Ryan Cotterell, and David Chiang.
Efficient algorithms for recognizing weighted tree-adjoining languages.
In
Proc. EMNLP. 2023.
PDF
BibTeX
@inproceedings{butoi+:2023efficient,
author = "Butoi, Alexandra and Vieira, Tim and Cotterell, Ryan and Chiang, David",
title = "Efficient Algorithms for Recognizing Weighted Tree-Adjoining Languages",
booktitle = "Proc. EMNLP",
year = "2023"
}
Aarohi Srivastava and David Chiang.
BERTwich: extending
BERT's capabilities to model dialectal and noisy text.
In
Findings of ACL: EMNLP. 2023.
PDF
BibTeX
@inproceedings{srivastava+chiang:2023,
author = "Srivastava, Aarohi and Chiang, David",
title = "{BERTwich}: Extending {BERT}'s Capabilities to Model Dialectal and Noisy Text",
booktitle = "Findings of ACL: EMNLP",
year = "2023"
}
Chihiro Taguchi, Yusuke Sakai, Parisa Haghani, and David Chiang.
Universal automatic phonetic transcription into the
International
Phonetic
Alphabet.
In
Proc. INTERSPEECH. 2023.
doi:10.21437/Interspeech.2023-2584.
PDF
BibTeX
@inproceedings{taguchi+:2023,
author = "Taguchi, Chihiro and Sakai, Yusuke and Haghani, Parisa and Chiang, David",
title = "Universal Automatic Phonetic Transcription into the {I}nternational {P}honetic {A}lphabet",
booktitle = "Proc. INTERSPEECH",
year = "2023",
doi = "10.21437/Interspeech.2023-2584"
}
Alexandra Butoi, Ryan Cotterell, and David Chiang.
Convergence and diversity in the control hierarchy.
In
Proc. ACL. 2023.
PDF
BibTeX
@inproceedings{butoi+:2023convergence,
author = "Butoi, Alexandra and Cotterell, Ryan and Chiang, David",
title = "Convergence and Diversity in the Control Hierarchy",
booktitle = "Proc. ACL",
year = "2023"
}
David Chiang, Peter Cholak, and Anand Pillay.
Tighter bounds on the expressivity of transformer encoders.
In
Proc. ICML, 5544–5562. 2023.
PDF
BibTeX
@inproceedings{chiang+cholak+pillay:2023,
author = "Chiang, David and Cholak, Peter and Pillay, Anand",
title = "Tighter Bounds on the Expressivity of Transformer Encoders",
booktitle = "Proc. ICML",
year = "2023",
pages = "5544--5562"
}
Aarohi Srivastava and David Chiang.
Fine-tuning
BERT with character-level noise for zero-shot transfer to dialects and closely-related languages.
In
Proc. Workshop on NLP for Similar Languages, Varieties and Dialects. 2023.
PDF
BibTeX
@inproceedings{srivastava+chiang:2023fine,
author = "Srivastava, Aarohi and Chiang, David",
title = "Fine-Tuning {BERT} with Character-Level Noise for Zero-Shot Transfer to Dialects and Closely-Related Languages",
year = "2023",
booktitle = "Proc. Workshop on NLP for Similar Languages, Varieties and Dialects"
}
Patrick Soga and David Chiang.
Bridging graph position encodings for transformers with weighted graph-walking automata.
Transactions on Machine Learning Research, 2023.
PDF
BibTeX
@article{soga+chiang:2023,
author = "Soga, Patrick and Chiang, David",
title = "Bridging Graph Position Encodings for Transformers with Weighted Graph-Walking Automata",
year = "2023",
journal = "Transactions on Machine Learning Research"
}
Brian DuSell and David Chiang.
The surprising computational power of nondeterministic stack
RNNs.
In
Proc. ICLR. 2023.
PDF
BibTeX
@inproceedings{dusell+chiang:2023surprising,
author = "DuSell, Brian and Chiang, David",
title = "The Surprising Computational Power of Nondeterministic Stack {RNN}s",
booktitle = "Proc. ICLR",
year = "2023"
}
David Chiang, Colin McDonald, and Chung-chieh Shan.
Exact recursive probabilistic programming.
PACMPL, 2023.
doi:10.1145/3586050.
PDF
BibTeX
@article{chiang+mcdonald+shan:2023,
author = "Chiang, David and McDonald, Colin and Shan, Chung-chieh",
title = "Exact Recursive Probabilistic Programming",
journal = "PACMPL",
volume = "7",
number = "OOPSLA1",
article = "98",
xmonth = "April",
year = "2023",
doi = "10.1145/3586050"
}
Chihiro Taguchi and David Chiang.
Introducing morphology in
Universal
Dependencies
Japanese.
In
Proc. Workshop on Universal Dependencies, 65–72. 2023.
PDF
BibTeX
@inproceedings{taguchi+chiang:2023,
author = "Taguchi, Chihiro and Chiang, David",
title = "Introducing Morphology in {U}niversal {D}ependencies {J}apanese",
year = "2023",
booktitle = "Proc. Workshop on Universal Dependencies",
pages = "65--72"
}
David Chiang, Alexander M. Rush, and Boaz Barak.
Named tensor notation.
Transactions on Machine Learning Research, 2023.
PDF
BibTeX
@article{chiang+rush+barak:2023,
author = "Chiang, David and Rush, Alexander M. and Barak, Boaz",
title = "Named Tensor Notation",
year = "2023",
xmonth = "January",
journal = "Transactions on Machine Learning Research"
}
Darcey Riley and David Chiang.
A continuum of generation tasks for investigating length bias and degenerate repetition.
In
Proc. BlackboxNLP. 2022.
PDF
BibTeX
@inproceedings{riley+chiang:2022,
author = "Riley, Darcey and Chiang, David",
title = "A Continuum of Generation Tasks for Investigating Length Bias and Degenerate Repetition",
booktitle = "Proc. BlackboxNLP",
year = "2022"
}
Alexandra Butoi, Brian DuSell, Tim Vieira, Ryan Cotterell, and David Chiang.
Algorithms for weighted pushdown automata.
In
Proc. EMNLP. 2022.
PDF
BibTeX
@inproceedings{butoi+:2022,
author = "Butoi, Alexandra and DuSell, Brian and Vieira, Tim and Cotterell, Ryan and Chiang, David",
title = "Algorithms for Weighted Pushdown Automata",
year = "2022",
booktitle = "Proc. EMNLP"
}
David Chiang and Peter Cholak.
Overcoming a theoretical limitation of self-attention.
In
Proc. ACL. 2022.
PDF
BibTeX
@inproceedings{chiang+cholak:2022,
author = "Chiang, David and Cholak, Peter",
title = "Overcoming a Theoretical Limitation of Self-Attention",
booktitle = "Proc. ACL",
year = "2022"
}
Brian DuSell and David Chiang.
Learning hierarchical structures with differentiable nondeterministic stacks.
In
Proc. ICLR. 2022.
PDF
BibTeX
@inproceedings{dusell+chiang:iclr2022,
author = "DuSell, Brian and Chiang, David",
title = "Learning Hierarchical Structures with Differentiable Nondeterministic Stacks",
booktitle = "Proc. ICLR",
year = "2022"
}
full list