@ARTICLE{10.3389/fpls.2018.01359, AUTHOR={Yuan, Yanchao and Wang, Xianlin and Wang, Liyuan and Xing, Huixian and Wang, Qingkang and Saeed, Muhammad and Tao, Jincai and Feng, Wei and Zhang, Guihua and Song, Xian-Liang and Sun, Xue-Zhen}, TITLE={Genome-Wide Association Study Identifies Candidate Genes Related to Seed Oil Composition and Protein Content in Gossypium hirsutum L.}, JOURNAL={Frontiers in Plant Science}, VOLUME={9}, YEAR={2018}, URL={https://www.frontiersin.org/articles/10.3389/fpls.2018.01359}, DOI={10.3389/fpls.2018.01359}, ISSN={1664-462X}, ABSTRACT={Cotton (Gossypium spp.) is a leading natural fiber crop and an important source of vegetable protein and oil for humans and livestock. To investigate the genetic architecture of seed nutrients in upland cotton, a genome-wide association study (GWAS) was conducted in a panel of 196 germplasm resources under three environments using a CottonSNP80K chip of 77,774 loci. Relatively high genetic diversity (average gene diversity being 0.331) and phenotypic variation (coefficient of variation, CV, exceeding 3.9%) were detected in this panel. Correlation analysis revealed that the well-documented negative association between seed protein (PR) and oil may be to some extent attributable to the negative correlation between oleic acid (OA) and PR. Linkage disequilibrium (LD) was unevenly distributed among chromosomes and subgenomes. It ranged from 0.10–0.20 Mb (Chr19) to 5.65–5.75 Mb (Chr25) among the chromosomes and the range of Dt-subgenomes LD decay distances was smaller than At-subgenomes. This panel was divided into two subpopulations based on the information of 41,815 polymorphic single-nucleotide polymorphism (SNP) markers. The mixed linear model considering both Q-matrix and K-matrix [MLM(Q+K)] was employed to estimate the association between the SNP markers and the seed nutrients, considering the false positives caused by population structure and the kinship. A total of 47 SNP markers and 28 candidate quantitative trait loci (QTLs) regions were found to be significantly associated with seven cottonseed nutrients, including protein, total fatty acid, and five main fatty acid compositions. In addition, the candidate genes in these regions were analyzed, which included three genes, Gh_D12G1161, Gh_D12G1162, and Gh_D12G1165 that were most likely involved in the control of cottonseed protein concentration. These results improved our understanding of the genetic control of cottonseed nutrients and provided potential molecular tools to develop cultivars with high protein and improved fatty acid compositions in cotton breeding programs through marker-assisted selection.} }