@article{c8ebbe2396e24a21bf67148770682379,
title = "Unlocking inaccessible historical genomes preserved in formalin",
abstract = "Museum specimens represent an unparalleled record of historical genomic data. However, the widespread practice of formalin preservation has thus far impeded genomic analysis of a large proportion of specimens. Limited DNA sequencing from formalin-preserved specimens has yielded low genomic coverage with unpredictable success. We set out to refine sample processing methods and to identify specimen characteristics predictive of sequencing success. With a set of taxonomically diverse specimens collected between 1962 and 2006 and ranging in preservation quality, we compared the efficacy of several end-to-end whole genome sequencing workflows alongside a k-mer-based trimming-free read alignment approach to maximize mapping of endogenous sequence. We recovered complete mitochondrial genomes and up to 3× nuclear genome coverage from formalin-preserved tissues. Hot alkaline lysis coupled with phenol-chloroform extraction out-performed proteinase K digestion in recovering DNA, while library preparation method had little impact on sequencing success. The strongest predictor of DNA yield was overall specimen condition, which additively interacts with preservation conditions to accelerate DNA degradation. Here, we demonstrate a significant advance in capability beyond limited recovery of a small number of loci via PCR or target-capture sequencing. To facilitate strategic selection of suitable specimens for genomic sequencing, we present a decision-making framework that utilizes independent and nondestructive assessment criteria. Sequencing of formalin-preserved specimens will contribute to a greater understanding of temporal trends in genetic adaptation, including those associated with a changing climate. Our work enhances the value of museum collections worldwide by unlocking genomes of specimens that have been disregarded as a valid molecular resource.",
keywords = "formalin, genome sequencing, hot alkaline lysis, museum, fixed",
author = "Hahn, {Erin E.} and Alexander, {Marina R.} and Alicia Grealy and Jiri Stiller and Gardiner, {Donald M.} and Holleley, {Clare E.}",
note = "Funding Information: We thank Olly Berry and Andrew Young for their leadership within the Environomics Future Science Platform. We thank the director of the Australian National Wildlife Collection, Leo Joseph, and the ANWC staff (specifically, Margaret Cawsey, Alex Drew, Tonya Haff, Dave Spratt and Chris Wilson) for their contributions of curatorial expertise, metadata management and sampling assistance. We thank Kerensa McElroy for her assistance and guidance in data management. We thank Ondrej Hlinka and CSIRO IM&T Client Services for their assistance in utilising the CSIRO Pearcey supercomputing system. We thank Niccy Aitkin for her guidance in utilising the Australian National University{\textquoteright}s Ecogenomics and Bioinformatics Laboratory for library preparation. We thank the Australian Genome Research Facility for their conversations around sequencing. We thank Sharon Appleyard, Meghan Castelli, Andrew George, Peter Grewe, Michael Hope, Safia Maher, Annette McGrath, Corinna Paeper, Cheng Soon-Ong, Andrew Spriggs, Jen Taylor and Christfried Webers for their valuable comments on the study design and implementation. We would like to acknowledge the contribution of Bioplatforms Australia in the generation of data used in this publication. Bioplatforms Australia is enabled by NCRIS. We kindly thank editors Sebastien Calvignac-Spencer and Benjamin Sibbett as well as Jim McGuire and two anonymous reviewers for their very helpful comments contributing to revision of the manuscript for publication. Funding for this study was provided by the Environomics CSIRO Future Science Platform (grants R-10011 and R-14486) awarded to CEH. Funding Information: We thank Olly Berry and Andrew Young for their leadership within the Environomics Future Science Platform. We thank the director of the Australian National Wildlife Collection, Leo Joseph, and the ANWC staff (specifically, Margaret Cawsey, Alex Drew, Tonya Haff, Dave Spratt and Chris Wilson) for their contributions of curatorial expertise, metadata management and sampling assistance. We thank Kerensa McElroy for her assistance and guidance in data management. We thank Ondrej Hlinka and CSIRO IM&T Client Services for their assistance in utilising the CSIRO Pearcey supercomputing system. We thank Niccy Aitkin for her guidance in utilising the Australian National University{\textquoteright}s Ecogenomics and Bioinformatics Laboratory for library preparation. We thank the Australian Genome Research Facility for their conversations around sequencing. We thank Sharon Appleyard, Meghan Castelli, Andrew George, Peter Grewe, Michael Hope, Safia Maher, Annette McGrath, Corinna Paeper, Cheng Soon‐Ong, Andrew Spriggs, Jen Taylor and Christfried Webers for their valuable comments on the study design and implementation. We would like to acknowledge the contribution of Bioplatforms Australia in the generation of data used in this publication. Bioplatforms Australia is enabled by NCRIS. We kindly thank editors Sebastien Calvignac‐Spencer and Benjamin Sibbett as well as Jim McGuire and two anonymous reviewers for their very helpful comments contributing to revision of the manuscript for publication. Funding for this study was provided by the Environomics CSIRO Future Science Platform (grants R‐10011 and R‐14486) awarded to CEH. Publisher Copyright: {\textcopyright} 2021 The Authors. Molecular Ecology Resources published by John Wiley & Sons Ltd.",
year = "2022",
month = aug,
doi = "10.1111/1755-0998.13505",
language = "English",
volume = "22",
pages = "2130--2147",
journal = "Molecular Ecology Resources",
issn = "1755-098X",
publisher = "Wiley-Blackwell",
number = "6",
}