BEGIN:VCALENDAR
VERSION:2.0
PRODID:-//Dana-Farber Cancer Institute - ECPv6.15.20//NONSGML v1.0//EN
CALSCALE:GREGORIAN
METHOD:PUBLISH
X-WR-CALNAME:Dana-Farber Cancer Institute
X-ORIGINAL-URL:https://ds.dfci.harvard.edu
X-WR-CALDESC:Events for Dana-Farber Cancer Institute
REFRESH-INTERVAL;VALUE=DURATION:PT1H
X-Robots-Tag:noindex
X-PUBLISHED-TTL:PT1H
BEGIN:VTIMEZONE
TZID:America/New_York
BEGIN:DAYLIGHT
TZOFFSETFROM:-0500
TZOFFSETTO:-0400
TZNAME:EDT
DTSTART:20230312T070000
END:DAYLIGHT
BEGIN:STANDARD
TZOFFSETFROM:-0400
TZOFFSETTO:-0500
TZNAME:EST
DTSTART:20231105T060000
END:STANDARD
BEGIN:DAYLIGHT
TZOFFSETFROM:-0500
TZOFFSETTO:-0400
TZNAME:EDT
DTSTART:20240310T070000
END:DAYLIGHT
BEGIN:STANDARD
TZOFFSETFROM:-0400
TZOFFSETTO:-0500
TZNAME:EST
DTSTART:20241103T060000
END:STANDARD
BEGIN:DAYLIGHT
TZOFFSETFROM:-0500
TZOFFSETTO:-0400
TZNAME:EDT
DTSTART:20250309T070000
END:DAYLIGHT
BEGIN:STANDARD
TZOFFSETFROM:-0400
TZOFFSETTO:-0500
TZNAME:EST
DTSTART:20251102T060000
END:STANDARD
BEGIN:DAYLIGHT
TZOFFSETFROM:-0500
TZOFFSETTO:-0400
TZNAME:EDT
DTSTART:20260308T070000
END:DAYLIGHT
BEGIN:STANDARD
TZOFFSETFROM:-0400
TZOFFSETTO:-0500
TZNAME:EST
DTSTART:20261101T060000
END:STANDARD
BEGIN:DAYLIGHT
TZOFFSETFROM:-0500
TZOFFSETTO:-0400
TZNAME:EDT
DTSTART:20270314T070000
END:DAYLIGHT
BEGIN:STANDARD
TZOFFSETFROM:-0400
TZOFFSETTO:-0500
TZNAME:EST
DTSTART:20271107T060000
END:STANDARD
END:VTIMEZONE
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20241219T120000
DTEND;TZID=America/New_York:20241219T130000
DTSTAMP:20260606T230347
CREATED:20241209T183348Z
LAST-MODIFIED:20250114T202112Z
UID:5733-1734609600-1734613200@ds.dfci.harvard.edu
SUMMARY:Interpretable and Data-driven Machine Learning Models for Analyzing High-dimensional Biological Data
DESCRIPTION:Data Science Seminar \nThursday December 19\, 2024 – 12pm-1pm\nCenter for Life Sciences Building\, room 11081 \nJunchen Yang\nDepartment of Computational Biology and Bioinformatics\, Yale University
URL:https://ds.dfci.harvard.edu/event/interpretable-and-data-driven-machine-learning-models-for-analyzing-high-dimensional-biological-data/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2024/12/junchen-scaled-e1733769209863.jpeg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250205T120000
DTEND;TZID=America/New_York:20250205T130000
DTSTAMP:20260606T230347
CREATED:20250128T174809Z
LAST-MODIFIED:20250205T180933Z
UID:5793-1738756800-1738760400@ds.dfci.harvard.edu
SUMMARY:Nice-Driven Cell Identities in the Self-Renewing Stomach Corpus Epithelium
DESCRIPTION:CompBio Connections Seminar\nWednesday February 5 at 12:00pm\nCenter for Life Sciences Building\, Zelen Commons\nKe Li\, PhD\, Research Fellow\, Dana-Farber Cancer Institute and Harvard Medical School \nLunch is provided.
URL:https://ds.dfci.harvard.edu/event/nice-driven-cell-identities-in-the-self-renewing-stomach-corpus-epithelium/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/png:https://ds.dfci.harvard.edu/wp-content/uploads/2025/01/Ke_headshot_square.png
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250206T160000
DTEND;TZID=America/New_York:20250206T170000
DTSTAMP:20260606T230347
CREATED:20250114T202055Z
LAST-MODIFIED:20250207T140322Z
UID:5776-1738857600-1738861200@ds.dfci.harvard.edu
SUMMARY:Choosing Good Subsamples for Regression Modelling: Nearly-True Models?
DESCRIPTION:Harvard Biostatistics Colloquium Series\nThursday February 6th\n4:00-5:00PM\nHarvard TH Chan School of Public Health\, FXB G12 \nThomas Lumley\, PhD\, Chair in Biostatistics\, University of Aukland\, New Zealand; Affiliate Professor\, University of Washington\, Department of Biostatistics
URL:https://ds.dfci.harvard.edu/event/choosing-good-subsamples-for-regression-modelling-nearly-true-models/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/png:https://ds.dfci.harvard.edu/wp-content/uploads/2025/01/lumley.png
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250213T110000
DTEND;TZID=America/New_York:20250213T120000
DTSTAMP:20260606T230347
CREATED:20250207T182809Z
LAST-MODIFIED:20250211T172627Z
UID:5831-1739444400-1739448000@ds.dfci.harvard.edu
SUMMARY:Analysis and Design of RNA sequences with Deep Learning
DESCRIPTION:Data Science Seminar\nThursday February 13th at 11am\nCenter for Life Sciences Building\, 111081 \nJoseph Valencia\, Oregon State University
URL:https://ds.dfci.harvard.edu/event/analysis-and-design-of-rna-sequences-with-deep-learning/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/02/joseph-e1738952849381.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250219T120000
DTEND;TZID=America/New_York:20250219T130000
DTSTAMP:20260606T230347
CREATED:20250211T172609Z
LAST-MODIFIED:20250219T130227Z
UID:5846-1739966400-1739970000@ds.dfci.harvard.edu
SUMMARY:Current Methods in Single Cell FFPE Analysis
DESCRIPTION:CompBio Connections\nFebruary 19\, 2025 at 12pm\nDFCI Center for Life Science Building\, Zelen Commons \nAnthony Anselmo\nLead Bioinformatician\nCenter for Cancer Genomics\, DFCI \nLunch is provided.
URL:https://ds.dfci.harvard.edu/event/current-methods-in-single-cell-ffpe-analysis/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/02/anthony_anselmo_phsweb-e1739294749828.jpeg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250227T160000
DTEND;TZID=America/New_York:20250227T170000
DTSTAMP:20260606T230347
CREATED:20250219T130212Z
LAST-MODIFIED:20250228T180912Z
UID:5867-1740672000-1740675600@ds.dfci.harvard.edu
SUMMARY:Single-cell Multi-sample Multi-condition Data Integration to Uncover Disease Signatures
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium\nThursday February 27th at 4pm\nHSPH FXB Room G13 \nYingxin Lin\, PhD\nPostdoctoral Associate in the Department of Biostatistics at the Yale School of Public Health \nThe recent emergence of multi-sample multi-condition single-cell multi cohort studies allows researchers to investigate different cell states. The effective integration of multiple large-cohort studies promises biological insights into cells under different conditions that individual studies cannot provide. In this talk\, I will present scMerge2\, a scalable algorithm that allows data integration of atlas-scale multi-sample multi-condition single-cell studies. scMerge2 is generalized to enable the merging of millions of cells from single-cell studies generated by various single-cell technologies. Using a large data collection with over five million cells from 1000+ individuals\, we demonstrate that the integration of multi-sample multi-condition scRNAseq from multiple cohorts reveals signatures derived from cell-type expression that are more accurate in discriminating disease progression.
URL:https://ds.dfci.harvard.edu/event/single-cell-multi-sample-multi-condition-data-integration-to-uncover-disease-signatures/
LOCATION:Harvard TH Chan School of Public Health\, FXB G13\, 677 Huntington Ave\, Boston\, MA\, United States
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/02/headshot-scaled-e1739970101940.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250303T160000
DTEND;TZID=America/New_York:20250303T170000
DTSTAMP:20260606T230347
CREATED:20250221T180945Z
LAST-MODIFIED:20250304T155554Z
UID:5888-1741017600-1741021200@ds.dfci.harvard.edu
SUMMARY:How Do Neural Networks Learn Features From Data?
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium\nMonday March 3rd at 4:00pm\nHSPH Kresge G2 \nAdityanarayanan Radhakrishnan\nEric and Wendy Schmidt Center Postdoctoral Fellow\, Broad Institute of MIT and Harvard\, Harvard School of Engineering and Applied Sciences \nAbstract: Understanding how neural networks learn features\, or relevant patterns in data\, is key to accelerating scientific discovery. In this talk\, I will present a unifying mechanism that characterizes feature learning in neural network architectures. Namely\, features learned by neural networks are captured by a statistical operator known as the average gradient outer product (AGOP). More generally\, the AGOP enables feature learning in machine learning models that have no built-in feature learning mechanism (e.g.\, kernel methods). I will present two applications of this line of work. First\, I will show how AGOP can be used to steer LLMs and vision-language models\, guiding them towards specified concepts and shedding light on vulnerabilities in these models. I will then discuss how AGOP can be used to discover cellular programs (sets of genes whose expressions exhibit dependencies across cell subpopulations) from millions of sequenced cells. I will show how AGOP identified programs that reflect the heterogeneity found in various cell types\, subtypes\, and states in this data. Overall\, this line of work advances our fundamental understanding of how neural networks extract features from data\, leading to the development of novel\, interpretable\, and effective methods for use in scientific applications.
URL:https://ds.dfci.harvard.edu/event/how-do-neural-networks-learn-features-from-data/
LOCATION:Harvard TH Chan School of Public Health\, 677 Huntington Ave\, Boston\, MA\, 02115
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/02/headshot-e1740161254828.jpeg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250306T160000
DTEND;TZID=America/New_York:20250306T170000
DTSTAMP:20260606T230347
CREATED:20250207T140313Z
LAST-MODIFIED:20250307T160147Z
UID:5819-1741276800-1741280400@ds.dfci.harvard.edu
SUMMARY:Universal Prediction of Cell-cycle Position Using Transfer Learning
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium\nThursday\, March 6\, 2025\n4:00pm\nHarvard TH Chan School of Public Health\, FXB G13 \nKasper Hansen\, PhD\nAssociate Professor\, McKusick-Nathans Insitute of Genetic Medicine\, Department of Biostatistics\, Johns Hopkins University \nA significant barrier to progress in biomedical data science is the development of prediction models that work across contexts such as different instruments\, facilities or hospitals. This is particularly difficult for predictions based on genomics data. Here\, we present an example of a generalizable prediction model. \nThe cell cycle is a highly conserved\, continuous process which controls faithful replication and division of cells. Single-cell technologies have enabled increasingly precise measurements of the cell cycle both as a biological process of interest and as a possible confounding factor. Despite its importance and conservation\, there is no universally applicable approach to infer position in the cell cycle with high-resolution from single-cell RNA-seq data. \nHere\, we present tricycle\, an R/Bioconductor package\, which addresses this challenge by leveraging key features of the biology of the cell cycle\, the mathematical properties of principal component analysis of periodic functions\, and the use of transfer learning. We estimate a cell-cycle embedding using a fixed reference dataset and project new data into this reference embedding\, an approach that overcomes key limitations of learning a dataset-dependent embedding. Tricycle then predicts a cell-specific position in the cell cycle based on the data projection. The accuracy of tricycle compares favorably to gold-standard experimental assays\, which generally require specialized measurements in specifically constructed in vitro systems. Using internal controls which are available for any dataset\, we show that tricycle predictions generalize to datasets with multiple cell types\, across tissues\, species\, and even sequencing assays.
URL:https://ds.dfci.harvard.edu/event/universal-prediction-of-cell-cycle-position-using-transfer-learning/
LOCATION:Harvard TH Chan School of Public Health\, FXB G13\, 677 Huntington Ave\, Boston\, MA\, United States
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/02/khansen.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250310T160000
DTEND;TZID=America/New_York:20250310T170000
DTSTAMP:20260606T230347
CREATED:20250226T134218Z
LAST-MODIFIED:20250311T161944Z
UID:5905-1741622400-1741626000@ds.dfci.harvard.edu
SUMMARY:Decoding Aging at Spatial and Single-cell Resolution with Machine Learning
DESCRIPTION:﻿HSPH Biostatistics and DFCI Data Science Colloquium\nMonday March 10th at 4:00pm\nHSPH Kresge G2 \nEric Sun\nPhD Candidate\, Department of Biomedical Informatics\nStanford University \nAging is a highly complex process and the greatest risk factor for many chronic diseases including cardiovascular disease\, dementia\, stroke\, diabetes\, and cancer. Recent spatial and single-cell omics technologies have enabled the high-dimensional profiling of complex biology including that underlying aging. As such\, new machine learning and computational methods are needed to unlock important insights from spatial and single-cell omics datasets. First\, I present the development of high-resolution machine learning models (‘spatial aging clocks’) that can measure the aging of individual cells in the brain. Using these spatial aging clocks\, I discovered that some cell types can dramatically influence the aging of nearby cells. Next\, I present new computational and statistical methods for overcoming the gene coverage limitations of existing spatially resolved single-cell omics technologies\, which have enabled the discovery of gene pathways underlying the spatial effects of brain aging. \n 
URL:https://ds.dfci.harvard.edu/event/decoding-aging-at-spatial-and-single-cell-resolution-with-machine-learning/
LOCATION:Harvard TH Chan School of Public Health\, 677 Huntington Ave\, Boston\, MA\, 02115
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/02/Eric_Sun-scaled-e1740577294700.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250311T160000
DTEND;TZID=America/New_York:20250311T170000
DTSTAMP:20260606T230347
CREATED:20250226T134543Z
LAST-MODIFIED:20250312T130019Z
UID:5910-1741708800-1741712400@ds.dfci.harvard.edu
SUMMARY:Dissecting Tumor Transcriptional Heterogeneity from Single-cell RNA-seq Data by Generalized Binary Covariance Decomposition
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium\nTuesday March 11th at 4:00pm\nHSPH FXB G12 \nYusha Liu\, PhD\nResearch Assistant Professor\nDepartment of Biostatistics\nThe University of North Carolina at Chapel Hill \nProfiling tumors with single-cell RNA sequencing has the potential to identify recurrent patterns of transcription variation related to cancer progression\, and to produce therapeutically relevant insights. However\, strong inter-tumor heterogeneity can obscure more subtle patterns that are shared across tumors. In this talk\, I will introduce a novel statistical method\, generalized binary covariance decomposition (GBCD)\, to address this problem. GBCD can decompose transcriptional heterogeneity into interpretable components — including patient-specific\, dataset-specific and shared components relevant to disease subtypes — and that\, in the presence of strong inter-tumor heterogeneity\, it can produce more interpretable results than existing methods. Applied to data on pancreatic ductal adenocarcinoma\, GBCD produced a refined characterization of existing tumor subtypes\, and identified a gene expression program prognostic of poor survival independent of tumor stage and subtype. The gene expression program is enriched for genes involved in stress responses\, and suggests a role for the integrated stress response in pancreatic ductal adenocarcinoma.
URL:https://ds.dfci.harvard.edu/event/dissecting-tumor-transcriptional-heterogeneity-from-single-cell-rna-seq-data-by-generalized-binary-covariance-decomposition/
LOCATION:Harvard TH Chan School of Public Health\, 677 Huntington Ave\, Boston\, MA\, 02115
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/png:https://ds.dfci.harvard.edu/wp-content/uploads/2025/02/headshot-copy-e1740577495994.png
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250327T160000
DTEND;TZID=America/New_York:20250327T170000
DTSTAMP:20260606T230347
CREATED:20250314T163931Z
LAST-MODIFIED:20250328T121320Z
UID:5985-1743091200-1743094800@ds.dfci.harvard.edu
SUMMARY:Data Integration in Spatial and Single Cell Omics:  What is Erased\, and Can you Recover it?
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium\nThursday\, March 27\, 2025\n4:00pm\nHarvard TH Chan School of Public Health\, FXB G13 \n\nNancy Zhang\, PhD\nGe Li and Ning Zhao Professor\, Professor of Statistics and Data Science\, Vice Dean of Wharton Doctoral Programs\,  The Wharton School\, University of Pennsylvania \nIn single-cell and spatial biology\, data integration refers to the alignment of cells across samples and modalities\, and is an ubiquitous challenge affecting all downstream analyses. The goal in cell integration is to find cells across data sets that share the same biological state that may be obscured by technical differences. \nIn this talk\, I will cast the cell integration problem on a continuum of weak to strong linkage\, depending on the strength of feature sharing between experiments. First\, I will examine integration across data modalities of weak linkage. This arises when there are few shared features between the data being integrated\, for example\, between single-cell RNA sequencing data and spatial proteomics data. For this\, I will present MaxFuse\, a method that leverages higher order relationships between all features\, including unshared features\, to achieve accurate integration. Next\, we consider the scenario of data alignment across the same modality in clinical scale studies. For this setting\, I will show that existing paradigms are overly aggressive\, erasing disease and treatment effects and introducing severe data distortion. I will introduce a “pool-of-controls” experimental design concept to disentangle biological variation from unwanted variation. Based on this\, I will describe CellANOVA\, a novel statistical model and scalable algorithm that recovers biological signals lost during batch integration and corrects integration related data distortion. Through these two contrasting paradigms\, I will share the key lessons learned and the remaining challenges in this field.
URL:https://ds.dfci.harvard.edu/event/data-integration-in-spatial-and-single-cell-omics-what-is-erased-and-can-you-recover-it/
LOCATION:Harvard TH Chan School of Public Health\, 677 Huntington Ave\, Boston\, MA\, 02115
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/03/zhang-crop-e1741970356597.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250403T160000
DTEND;TZID=America/New_York:20250403T170000
DTSTAMP:20260606T230347
CREATED:20250314T164130Z
LAST-MODIFIED:20250328T121330Z
UID:5991-1743696000-1743699600@ds.dfci.harvard.edu
SUMMARY:Fréchet Regression of Random Objects on Vector Covariates and Its Applications for  Single Cell RNA-seq Data Analysis
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium\nThursday\, April 3\, 2025\n4:00pm\nHarvard TH Chan School of Public Health\, FXB G13 \nHongzhe Li\, PhD\nPerelman Professor of Biostatistics\, Epidemiology and Informatics\nDirector\, Center for Statistics in Big Data Vice Chair for Research Integration\, Department of Biostatistics\, Epidemiology and Informatics\, University of Pennsylvania \nPopulation-level single-cell RNA-seq data captures gene expression profiles across thousands of cells from each individual in a sizable cohort. This data facilitates the construction of cell-type- and individual-specific gene co-expression networks by estimating covariance matrices. Investigating how these co-expression networks relate to individual-level covariates provides critical insights into the interplay between molecular processes and biological or clinical traits. This talk introduces Fréchet regression\, modeling covariance matrices as outcomes and vector covariates as predictors\, using the Wasserstein distance between covariance matrices as a metric instead of the Euclidean distance. A test statistic is proposed based on the Fréchet mean and covariate-weighted Fréchet mean\, with its asymptotic null distribution derived. Analysis of large-scale single-cell RNA-seq data reveals an association between the co-expression network of genes in the nutrient-sensing pathway and age\, highlighting perturbations in gene co-expression networks with aging. Additionally\, a robust local Fréchet regression approach\, leveraging neural unbalanced optimal transport\, is briefly discussed to explore how cells are temporally organized during the differentiation of human embryonic stem cells into embryoid bodies.
URL:https://ds.dfci.harvard.edu/event/frechet-regression-of-random-objects-on-vector-covariates-and-its-applications-for-single-cell-rna-seq-data-analysis/
LOCATION:Harvard TH Chan School of Public Health\, 677 Huntington Ave\, Boston\, MA\, 02115
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/03/li-crop.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250415T160000
DTEND;TZID=America/New_York:20250415T170000
DTSTAMP:20260606T230347
CREATED:20250409T114825Z
LAST-MODIFIED:20250411T114358Z
UID:6027-1744732800-1744736400@ds.dfci.harvard.edu
SUMMARY:Modeling Multiscale Genome and Cellular Organization
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Seminar\nTuesday April 15 at 4:00pm\nDana-Farber Cancer Institute\nCenter for Life Sciences Building\, 11th Floor\, Room 11081 \nJian Ma\, PhD\nRay and Stephanie Lane Professor of Computational Biology\nCarnegie Mellon University \n  \nThe intersection of Al/ML and biomedicine is entering a transformative era\, with growing potential to\nimpact both basic research and translational medicine. Yet\, despite remarkable advances in high-\nthroughput technologies across genomics and cell biology\, our understanding of the diverse cell types\nin the human body and the underlying principles of intracellular molecular organization and\nintercellular spatial interactions remains incomplete. A central challenge lies in developing\ncomputational frameworks that can integrate molecular\, cellular\, and tissue-level data to advance cell\nbiology at an unprecedented scale. In this talk\, I will present our recent work on machine learning\napproaches for regulatory genomics\, with a focus on single-cell 3D epigenomics. We introduce methods\nthat connect different layers of 3D genome architecture and cellular function at single-cell resolution\,\nincluding graph- and hypergraph-based models that capture spatial genome organization. I will also\nhighlight our latest efforts in developing self-supervised learning frameworks to delineate multiscale\ncellular interactions within complex tissues\, enabling the discovery of previously unrecognized spatially\norganized patterns. Together\, these Al-driven models provide a foundation for integrative\, multiscale\nrepresentations of cellular systems\, offering new insights into genome structure\, gene regulation\, and\ncell-cell communication. This line of work opens new opportunities toward building cohesive multiscale\ncellular models applicable across a broad range of contexts in health and disease.
URL:https://ds.dfci.harvard.edu/event/modeling-multiscale-genome-and-cellular-organization/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/png:https://ds.dfci.harvard.edu/wp-content/uploads/2025/04/jian-ma-copy-e1744199268316.png
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250429T110000
DTEND;TZID=America/New_York:20250429T120000
DTSTAMP:20260606T230347
CREATED:20250417T114534Z
LAST-MODIFIED:20250417T114534Z
UID:6047-1745924400-1745928000@ds.dfci.harvard.edu
SUMMARY:Complex Disease Modeling And Efficient Drug Discovery With Large Language Models
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Seminar\nTuesday April 29 from 11:00-12:00pm\nZoom only (Link to be posted shortly) \nYu Li\, PhD\nAssistant Professor\, CSE\nThe Chinese University of Hong Kong \nLarge language models\, which can integrate and process large amounts of data in biomedicine\, have great potential in modeling complex diseases and discovering functional biomolecules for potential therapeutics. To model complex diseases and identify the potential drug targets for such diseases\, we built a language model trained on the insurance claims of around 123 million US people. With the model\, we can give a unified representation of all the common complex diseases\, which enables us to predict the genetic parameters of the diseases and discover unique genetic loci related to them efficiently. Then\, we developed models based on protein language models to efficiently discover remote homologs and functional biomolecules from nature\, such as signal peptides and antimicrobial peptides. With the model\, we can identify remote homologs 22 times faster than PSI-BLAST and discover diverse functional peptides with sequence similarity lower than 20% against the known ones. Finally\, we developed an RNA language model to model the RNA sequence and structure relation\, which enables us to perform RNA structure prediction and reverse design effectively. Within two months\, we designed and experimentally validated 19 RNA aptamers that are structurally similar\, yet sequence dissimilar\, to known light-up aptamers. More importantly\, 10 designed aptamers show higher fluorescence than the native Mango-I. The above projects demonstrate the great potential of large language models in promoting fundamental computational biological research and potential transformational development.
URL:https://ds.dfci.harvard.edu/event/complex-disease-modeling-and-efficient-drug-discovery-with-large-language-models/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/04/headshot.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250911T160000
DTEND;TZID=America/New_York:20250911T170000
DTSTAMP:20260606T230347
CREATED:20250903T113817Z
LAST-MODIFIED:20250912T120312Z
UID:6428-1757606400-1757610000@ds.dfci.harvard.edu
SUMMARY:Preference Inference for Language Models Debiased by Fisher Random Walk Models
DESCRIPTION:﻿HSPH Biostatistics & DFCI Data Science Colloquium Series\nSeptember 11 at 4:00PM\nHarvard TH Chan School of Public Health\, FXB-301 \nJunwei Lu\, PhD\nAssociate Professor of Biostatistics\, Harvard TH Chan School of Public Health \nHuman preference alignment has been shown to be effective in training the large language models (LMs). It allows the LLM to understand human feedback and preferences. Despite the extensive literature dealing with algorithms aligning the rank of human preference\, uncertainty quantification for the ranking estimation still needs to be explored and is of great practical significance. For example\, it is important to overcome the problem of hallucination for LLM in the medical domain\, and an inferential method for the ranking of LM answers becomes necessary. In this talk\, we will present a novel framework called “Fisher random walk” to conduct semi-parametric efficient preference inference for language models and illustrate its application in the language models for medical knowledge.
URL:https://ds.dfci.harvard.edu/event/preference-inference-for-language-models-debiased-by-fisher-random-walk-models/
LOCATION:Harvard TH Chan School of Public Health\, 677 Huntington Ave\, Boston\, MA\, 02115
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/09/junweilarger.jpeg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20250918T160000
DTEND;TZID=America/New_York:20250918T170000
DTSTAMP:20260606T230347
CREATED:20250912T174007Z
LAST-MODIFIED:20250918T235342Z
UID:6513-1758211200-1758214800@ds.dfci.harvard.edu
SUMMARY:Reproducible Research - Tools and a case study with NHANES
DESCRIPTION:HSPH Biostatistics & DFCI Data Science Colloquium Series \nSeptember 18\, 2025\n4:00 PM\nHSPH FXB-301 \nRobert Gentleman\, PhD\nPrincipal Research Scientist\nHarvard T.H. Chan School of Public Health and Dana-Farber Cancer Institute \nI will discuss how new technologies and statistical methodologies can help enhance our ability to perform reproducible research. I will demonstrate how these could be used in a real world setting by examining questions\, primarily of an epidemiological nature\, using data from the NHANES surveys. I will describe one version of an Environment Wide Association Study (EnWAS) and show how this methodology can potentially be employed to interrogate large complex data resources. \n 
URL:https://ds.dfci.harvard.edu/event/reproducible-research-tools-and-a-case-study-with-nhanes/
LOCATION:Harvard TH Chan School of Public Health\, 677 Huntington Ave\, Boston\, MA\, 02115
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/09/Robert-Gentlemen-850x430-2-e1757698738137.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20251002T160000
DTEND;TZID=America/New_York:20251002T170000
DTSTAMP:20260606T230347
CREATED:20251001T170551Z
LAST-MODIFIED:20251003T124253Z
UID:6556-1759420800-1759424400@ds.dfci.harvard.edu
SUMMARY:Navigate the Crossroad of Statistics\, Generative AI and Genomic Health
DESCRIPTION:HSPH Biostatistics & DFCI Data Science Colloquium Series \nThursday October 2\, 2025\n4:00pm ET\nHSPH FXB-301 \nXihong Lin\, PhD\, Department of Biostatistics and Department of Statistics\, Harvard University \nIntegrating statistics with generative Al provides unprecedent opportunities to empower statistical science and accelerate trustworthy scientific discovery by leveraging the potential of generative Al models alongside rigorous statistical principles that account for uncertainty and enhance interpretability. In this talk\, I will discuss the challenges and opportunities as we navigate the crossroad of statistics\, generative Al\, and genomic health science. I will highlight how synthetic data from generative models\, such as diffusion models and transformers\, can be used to enable robust and powerful statistical analyses\, while ensuring valid inference even when generative Al models are misspecified and treated as black-box tools. I will illustrate such synthetic data powered statistical inference with generative ML/Al through large scale analyses of the UK biobank in the presence of missing data\, and discuss its connection with prediction powered inference (PPI). I will also discuss how to build an end-to-end autonomous\, scalable and interpretable large-scale whole genome sequencing (WGS) analysis ecosystem. These efforts will be illustrated using the analysis of the TOPMed WGS samples of 200\,000 samples\, the UK biobank of 500\,000 subjects on the cloud platform RAP and as well the All of Us data of 400\,000 subjects in the NIH cloud platform AnVIL. \n 
URL:https://ds.dfci.harvard.edu/event/navigate-the-crossroad-of-statistics-generative-ai-and-genomic-health/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/png:https://ds.dfci.harvard.edu/wp-content/uploads/2025/10/xihong_lin_crop.png
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20251009T160000
DTEND;TZID=America/New_York:20251009T170000
DTSTAMP:20260606T230347
CREATED:20251001T170830Z
LAST-MODIFIED:20251014T115427Z
UID:6561-1760025600-1760029200@ds.dfci.harvard.edu
SUMMARY:Flexible Adaptive Procedures for Testing Multiple Treatments\, Endpoints or Populations in Confirmatory Clinical Trials
DESCRIPTION:HSPH Biostatistics & DFCI Data Science Colloquium Series \nThursday October 9\, 2025\n4:00pm ET\nHSPH FXB-301 \nCyrus Mehta\, President and Co-Founder of Cytel\, Inc\, Adjunct Professor\, Department of Biostatistics\, Harvard TH Chan School of Public Health \nThe statistical methodology for the classical two-arm group sequential design has advanced vastly over the past three decades to incorporate\, adaptive design changes\, multiple treatments and multiple endpoints\, while nevertheless preserving strong control of the family wise error rate. The graph based approach to multiple testing is an intuitive method that enables a clinical trial study team to represent clearly\, through a directed graph\, its priorities for hierarchical testing of multiple hypotheses\, and for propagating the available type-1 error from rejected or dropped hypotheses to hypotheses yet to be tested. Although originally developed for single stage non-adaptive designs\, we show how it may be extended to two-stage designs that permit early identification of efficacious treatments\, adaptive sample size re-estimation\, dropping of hypotheses\, and changes in the hierarchical testing strategy at the end of stage one. We will present the statistical methodology for controlling the family wise error rate in the presence of these adaptive changes\, and will generate the operating characteristics of different underlying scenarios and adaptive decision rules through a large simulation experiment.
URL:https://ds.dfci.harvard.edu/event/flexible-adaptive-procedures-for-testing-multiple-treatments-endpoints-or-populations-in-confirmatory-clinical-trials/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/10/cyrus-square-e1759338489996.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20251016T160000
DTEND;TZID=America/New_York:20251016T170000
DTSTAMP:20260606T230347
CREATED:20251014T115412Z
LAST-MODIFIED:20251017T111716Z
UID:6602-1760630400-1760634000@ds.dfci.harvard.edu
SUMMARY:Estimation and Inference of Two Doubly Robust Functionals in High Dimensions
DESCRIPTION:HSPH Biostatistics & DFCI Data Science Colloquium Series \nThursday October 16\, 2025\n4:00pm ET\nHSPH FXB-301 \nRajarshi Mukherjee\, Associate Professor of Biostatistics\, Harvard T.H. Chan School of Public Health\nWebsite
URL:https://ds.dfci.harvard.edu/event/estimation-and-inference-of-two-doubly-robust-functionals-in-high-dimensions/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/png:https://ds.dfci.harvard.edu/wp-content/uploads/2025/10/Rajarshi.png
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20251113T160000
DTEND;TZID=America/New_York:20251113T170000
DTSTAMP:20260606T230347
CREATED:20251107T191711Z
LAST-MODIFIED:20251117T185408Z
UID:6649-1763049600-1763053200@ds.dfci.harvard.edu
SUMMARY:Addressing Statistical Challenges in Long COVID Research: Auxiliary Variable-Dependent Sampling Designs and Clustering of Complex Data Types
DESCRIPTION:HSPH Biostatistics & DFCI Data Science Colloquium Seminar Series\nNovember 13\, 2025 at 4:00pm\nHSPH\, FXB 301 \nSpeakers: Joint presentation by Tony Harrison & Thaweethai Reeder
URL:https://ds.dfci.harvard.edu/event/addressing-statistical-challenges-in-long-covid-research-auxiliary-variable-dependent-sampling-designs-and-clustering-of-complex-data-types/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/11/hsph.jpeg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20251120T160000
DTEND;TZID=America/New_York:20251120T170000
DTSTAMP:20260606T230348
CREATED:20251117T185354Z
LAST-MODIFIED:20251117T185503Z
UID:6669-1763654400-1763658000@ds.dfci.harvard.edu
SUMMARY:The Single Arm Changing to Randomized Design (SACRED)
DESCRIPTION:HSPH Biostatistics & DFCI Data Science Colloquium Seminar Series\nHarvard TH Chan School of Public Health\, FXB 301\nNovember 21st\, 4:00-5:00pm \nGlen Laird\, Head of Biostatistics\, Methodology and Innovation\, Vertex Pharmaceuticals
URL:https://ds.dfci.harvard.edu/event/the-single-arm-changing-to-randomized-design-sacred/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2025/11/Glen_Laird-1-e1763405616809.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20260130T080000
DTEND;TZID=America/New_York:20260130T170000
DTSTAMP:20260606T230348
CREATED:20251222T190751Z
LAST-MODIFIED:20260129T193514Z
UID:6752-1769760000-1769792400@ds.dfci.harvard.edu
SUMMARY:Stay tuned for 2026 events!
DESCRIPTION:Please watch our Events page for the schedule of seminars and workshops starting in February 2026!
URL:https://ds.dfci.harvard.edu/event/stay-tuned-for-2026-events/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/png:https://ds.dfci.harvard.edu/wp-content/uploads/2020/09/10221_Facebook_360x360.png
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20260205T160000
DTEND;TZID=America/New_York:20260205T170000
DTSTAMP:20260606T230348
CREATED:20260129T193457Z
LAST-MODIFIED:20260129T193457Z
UID:6823-1770307200-1770310800@ds.dfci.harvard.edu
SUMMARY:Data Integration and Time-informed Methods for the Electronic Health Record
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium \nThursday February 5 at 4PM\nHSPH\, FXB 301 \nSpeaker: Parker Knight\, PhD Candidate\, Harvard TH Chan School of Public Health \nSeminar Website.
URL:https://ds.dfci.harvard.edu/event/data-integration-and-time-informed-methods-for-the-electronic-health-record/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2026/01/feb5-colloquium.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20260212T160000
DTEND;TZID=America/New_York:20260212T170000
DTSTAMP:20260606T230348
CREATED:20260206T123005Z
LAST-MODIFIED:20260206T123005Z
UID:6833-1770912000-1770915600@ds.dfci.harvard.edu
SUMMARY:Efficient Estimation of Causal Effects Under Two-Phase Sampling with Error-Prone Outcome and Treatment Measurements
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium \nHSPH\, FXB 301\nSpeaker: Keith Barnatchez\, Harvard TH Chan School of Public Health \nhttps://hsph.harvard.edu/department/biostatistics/seminars-events/colloquium-seminar-series/
URL:https://ds.dfci.harvard.edu/event/efficient-estimation-of-causal-effects-under-two-phase-sampling-with-error-prone-outcome-and-treatment-measurements/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2026/02/keith-e1770380977292.jpeg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20260219T160000
DTEND;TZID=America/New_York:20260219T170000
DTSTAMP:20260606T230348
CREATED:20260213T170557Z
LAST-MODIFIED:20260213T170557Z
UID:6849-1771516800-1771520400@ds.dfci.harvard.edu
SUMMARY:Chiseling: Powerful and Valid Subgroup Selection via Interactive Machine Learning
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium\nHSPH\, FXB 301 \nNathan Cheng\, PhD Student\, Harvard TH Chan School of Public Health\nhttps://hsph.harvard.edu/department/biostatistics/seminars-events/colloquium-seminar-series/
URL:https://ds.dfci.harvard.edu/event/chiseling-powerful-and-valid-subgroup-selection-via-interactive-machine-learning/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2026/02/nathancheng-e1771002303814.jpeg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20260226T160000
DTEND;TZID=America/New_York:20260226T170000
DTSTAMP:20260606T230348
CREATED:20260220T161310Z
LAST-MODIFIED:20260220T161310Z
UID:6862-1772121600-1772125200@ds.dfci.harvard.edu
SUMMARY:Spectral Methods for Spatial and Multi-omics data
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium \nThursday February 26 at 4:00pm\nHSPH\, FXB 301 \nPhillip Nicol\, PhD Student\, Harvard TH Chan School of Public Health\nhttps://hsph.harvard.edu/department/biostatistics/seminars-events/colloquium-seminar-series/
URL:https://ds.dfci.harvard.edu/event/spectral-methods-for-spatial-and-multi-omics-data/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2026/02/phillip.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20260305T160000
DTEND;TZID=America/New_York:20260305T170000
DTSTAMP:20260606T230348
CREATED:20260227T154510Z
LAST-MODIFIED:20260227T154510Z
UID:6868-1772726400-1772730000@ds.dfci.harvard.edu
SUMMARY:Integrating Pre-Trained Language Models into Topic Modeling
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium\nThursday March 5 at 4:00pm\nHSPH\, FXB 301 \nTracy Ke\, PhD\, Associate Professor of Statistics\, Harvard University\nhttps://hsph.harvard.edu/department/biostatistics/seminars-events/colloquium-seminar-series/
URL:https://ds.dfci.harvard.edu/event/integrating-pre-trained-language-models-into-topic-modeling/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2026/02/ke-tracy-profile-resized-e1772207070866.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20260312T160000
DTEND;TZID=America/New_York:20260312T170000
DTSTAMP:20260606T230348
CREATED:20260306T145513Z
LAST-MODIFIED:20260306T145513Z
UID:6892-1773331200-1773334800@ds.dfci.harvard.edu
SUMMARY:Inference of Tissue Architecture across Space\, Time\, and Modality
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium\nThursday March 12 at 4:00pm\nHSPH\, FXB 301 \nBenjamin Raphael\, PhD\, Professor of Computer Science at Princeton University \n\nColloquium Seminar Series
URL:https://ds.dfci.harvard.edu/event/inference-of-tissue-architecture-across-space-time-and-modality/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2026/03/Ben-Raphael.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20260326T160000
DTEND;TZID=America/New_York:20260326T170000
DTSTAMP:20260606T230348
CREATED:20260313T130013Z
LAST-MODIFIED:20260327T170931Z
UID:6918-1774540800-1774544400@ds.dfci.harvard.edu
SUMMARY:An Example to Illustrate Randomized Trial Estimands and Estimators
DESCRIPTION:HSPH Biostatistics and DFCI Data Science Colloquium\nThursday March 26 at 4:00pm\nHSPH\, FXB 301 \nLinda Harrison\, PhD\, Research Scientist\, Department of Biostatistics\, Harvard T.H. Chan School of Public Health \n\nColloquium Seminar Series
URL:https://ds.dfci.harvard.edu/event/an-example-to-illustrate-randomized-trial-estimands-and-estimators/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/jpeg:https://ds.dfci.harvard.edu/wp-content/uploads/2026/03/Linda_Harrison_photo-e1773406777794.jpg
END:VEVENT
BEGIN:VEVENT
DTSTART;TZID=America/New_York:20260327T130000
DTEND;TZID=America/New_York:20260327T140000
DTSTAMP:20260606T230348
CREATED:20260319T132146Z
LAST-MODIFIED:20260320T112114Z
UID:6928-1774616400-1774620000@ds.dfci.harvard.edu
SUMMARY:An Alternative Estimator to the Cox Hazard Ratio
DESCRIPTION:Data Science Seminar \nFriday\, March 27\, 1:00 PM ET\nCenter for Life Sciences Building\, 11th floor\, room 11081\nAlso will be streamed on Zoom \nStella Karuri\, PhD\nConsulting Statistician \nZoom link: https://bit.ly/DSSeminarMar27
URL:https://ds.dfci.harvard.edu/event/an-alternative-estimator-to-the-cox-hazard-ratio/
CATEGORIES:Seminar
ATTACH;FMTTYPE=image/png:https://ds.dfci.harvard.edu/wp-content/uploads/2020/09/10221_Facebook_360x360.png
END:VEVENT
END:VCALENDAR