elf_info.hpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. // Copyright 2014 Renato Tegon Forti, Antony Polukhin.
  2. // Copyright Antony Polukhin, 2015-2025.
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt
  6. // or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_DLL_DETAIL_POSIX_ELF_INFO_HPP
  8. #define BOOST_DLL_DETAIL_POSIX_ELF_INFO_HPP
  9. #include <boost/dll/config.hpp>
  10. #ifdef BOOST_HAS_PRAGMA_ONCE
  11. # pragma once
  12. #endif
  13. #include <cstdint>
  14. #include <cstring>
  15. #include <fstream>
  16. #include <vector>
  17. #include <boost/throw_exception.hpp>
  18. namespace boost { namespace dll { namespace detail {
  19. template <class AddressOffsetT>
  20. struct Elf_Ehdr_template {
  21. unsigned char e_ident[16]; /* Magic number and other info */
  22. std::uint16_t e_type; /* Object file type */
  23. std::uint16_t e_machine; /* Architecture */
  24. std::uint32_t e_version; /* Object file version */
  25. AddressOffsetT e_entry; /* Entry point virtual address */
  26. AddressOffsetT e_phoff; /* Program header table file offset */
  27. AddressOffsetT e_shoff; /* Section header table file offset */
  28. std::uint32_t e_flags; /* Processor-specific flags */
  29. std::uint16_t e_ehsize; /* ELF header size in bytes */
  30. std::uint16_t e_phentsize; /* Program header table entry size */
  31. std::uint16_t e_phnum; /* Program header table entry count */
  32. std::uint16_t e_shentsize; /* Section header table entry size */
  33. std::uint16_t e_shnum; /* Section header table entry count */
  34. std::uint16_t e_shstrndx; /* Section header string table index */
  35. };
  36. using Elf32_Ehdr_ = Elf_Ehdr_template<std::uint32_t>;
  37. using Elf64_Ehdr_ = Elf_Ehdr_template<std::uint64_t>;
  38. template <class AddressOffsetT>
  39. struct Elf_Shdr_template {
  40. std::uint32_t sh_name; /* Section name (string tbl index) */
  41. std::uint32_t sh_type; /* Section type */
  42. AddressOffsetT sh_flags; /* Section flags */
  43. AddressOffsetT sh_addr; /* Section virtual addr at execution */
  44. AddressOffsetT sh_offset; /* Section file offset */
  45. AddressOffsetT sh_size; /* Section size in bytes */
  46. std::uint32_t sh_link; /* Link to another section */
  47. std::uint32_t sh_info; /* Additional section information */
  48. AddressOffsetT sh_addralign; /* Section alignment */
  49. AddressOffsetT sh_entsize; /* Entry size if section holds table */
  50. };
  51. using Elf32_Shdr_ = Elf_Shdr_template<std::uint32_t>;
  52. using Elf64_Shdr_ = Elf_Shdr_template<std::uint64_t>;
  53. template <class AddressOffsetT>
  54. struct Elf_Sym_template;
  55. template <>
  56. struct Elf_Sym_template<std::uint32_t> {
  57. using AddressOffsetT = std::uint32_t;
  58. std::uint32_t st_name; /* Symbol name (string tbl index) */
  59. AddressOffsetT st_value; /* Symbol value */
  60. AddressOffsetT st_size; /* Symbol size */
  61. unsigned char st_info; /* Symbol type and binding */
  62. unsigned char st_other; /* Symbol visibility */
  63. std::uint16_t st_shndx; /* Section index */
  64. };
  65. template <>
  66. struct Elf_Sym_template<std::uint64_t> {
  67. using AddressOffsetT = std::uint64_t;
  68. std::uint32_t st_name; /* Symbol name (string tbl index) */
  69. unsigned char st_info; /* Symbol type and binding */
  70. unsigned char st_other; /* Symbol visibility */
  71. std::uint16_t st_shndx; /* Section index */
  72. AddressOffsetT st_value; /* Symbol value */
  73. AddressOffsetT st_size; /* Symbol size */
  74. };
  75. using Elf32_Sym_ = Elf_Sym_template<std::uint32_t>;
  76. using Elf64_Sym_ = Elf_Sym_template<std::uint64_t>;
  77. template <class AddressOffsetT>
  78. class elf_info {
  79. using header_t = boost::dll::detail::Elf_Ehdr_template<AddressOffsetT>;
  80. using section_t= boost::dll::detail::Elf_Shdr_template<AddressOffsetT>;
  81. using symbol_t = boost::dll::detail::Elf_Sym_template<AddressOffsetT>;
  82. static constexpr std::uint32_t SHT_SYMTAB_ = 2;
  83. static constexpr std::uint32_t SHT_STRTAB_ = 3;
  84. static constexpr std::uint32_t SHT_DYNSYM_ = 11;
  85. static constexpr unsigned char STB_LOCAL_ = 0; /* Local symbol */
  86. static constexpr unsigned char STB_GLOBAL_ = 1; /* Global symbol */
  87. static constexpr unsigned char STB_WEAK_ = 2; /* Weak symbol */
  88. /* Symbol visibility specification encoded in the st_other field. */
  89. static constexpr unsigned char STV_DEFAULT_ = 0; /* Default symbol visibility rules */
  90. static constexpr unsigned char STV_INTERNAL_ = 1; /* Processor specific hidden class */
  91. static constexpr unsigned char STV_HIDDEN_ = 2; /* Sym unavailable in other modules */
  92. static constexpr unsigned char STV_PROTECTED_ = 3; /* Not preemptible, not exported */
  93. public:
  94. static bool parsing_supported(std::ifstream& fs) {
  95. const unsigned char magic_bytes[5] = {
  96. 0x7f, 'E', 'L', 'F', sizeof(std::uint32_t) == sizeof(AddressOffsetT) ? 1 : 2
  97. };
  98. unsigned char ch;
  99. fs.seekg(0);
  100. for (std::size_t i = 0; i < sizeof(magic_bytes); ++i) {
  101. fs >> ch;
  102. if (ch != magic_bytes[i]) {
  103. return false;
  104. }
  105. }
  106. return true;
  107. }
  108. static std::vector<std::string> sections(std::ifstream& fs) {
  109. std::vector<std::string> ret;
  110. std::vector<char> names;
  111. sections_names_raw(fs, names);
  112. const char* name_begin = &names[0];
  113. const char* const name_end = name_begin + names.size();
  114. ret.reserve(header(fs).e_shnum);
  115. do {
  116. if (*name_begin) {
  117. ret.push_back(name_begin);
  118. name_begin += ret.back().size() + 1;
  119. } else {
  120. ++name_begin;
  121. }
  122. } while (name_begin != name_end);
  123. return ret;
  124. }
  125. private:
  126. template <class Integer>
  127. static void checked_seekg(std::ifstream& fs, Integer pos) {
  128. if (pos < 0) {
  129. boost::throw_exception(std::runtime_error("Integral underflow while getting info from ELF file"));
  130. }
  131. if (static_cast<std::streamoff>(pos) < 0) {
  132. boost::throw_exception(std::runtime_error("Integral overflow while getting info from ELF file"));
  133. }
  134. // `seekg` will throw exceptions on an attempt to get outsize of the
  135. // file size.
  136. fs.seekg(static_cast<std::streamoff>(pos));
  137. }
  138. template <class T>
  139. static void read_raw(std::ifstream& fs, T& value, std::size_t size = sizeof(T)) {
  140. fs.read(reinterpret_cast<char*>(&value), size);
  141. }
  142. static header_t header(std::ifstream& fs) {
  143. header_t elf;
  144. fs.seekg(0);
  145. read_raw(fs, elf);
  146. return elf;
  147. }
  148. static void sections_names_raw(std::ifstream& fs, std::vector<char>& sections) {
  149. const header_t elf = header(fs);
  150. section_t section_names_section;
  151. checked_seekg(fs, elf.e_shoff + elf.e_shstrndx * sizeof(section_t));
  152. read_raw(fs, section_names_section);
  153. sections.resize(static_cast<std::size_t>(section_names_section.sh_size) + 1, '\0');
  154. checked_seekg(fs, section_names_section.sh_offset);
  155. read_raw(fs, sections[0], static_cast<std::size_t>(section_names_section.sh_size));
  156. }
  157. static void symbols_text(std::ifstream& fs, std::vector<symbol_t>& symbols, std::vector<char>& text) {
  158. std::vector<char> names;
  159. sections_names_raw(fs, names);
  160. symbols_text(fs, symbols, text, names);
  161. }
  162. static void symbols_text(std::ifstream& fs, std::vector<symbol_t>& symbols, std::vector<char>& text, const std::vector<char>& names) {
  163. const header_t elf = header(fs);
  164. checked_seekg(fs, elf.e_shoff);
  165. // ".dynsym" section may not have info on symbols that could be used while self loading an executable,
  166. // so we prefer ".symtab" section.
  167. AddressOffsetT symtab_size = 0;
  168. AddressOffsetT symtab_offset = 0;
  169. AddressOffsetT strtab_size = 0;
  170. AddressOffsetT strtab_offset = 0;
  171. AddressOffsetT dynsym_size = 0;
  172. AddressOffsetT dynsym_offset = 0;
  173. AddressOffsetT dynstr_size = 0;
  174. AddressOffsetT dynstr_offset = 0;
  175. for (std::size_t i = 0; i < elf.e_shnum; ++i) {
  176. section_t section;
  177. read_raw(fs, section);
  178. if (section.sh_name >= names.size()) {
  179. continue;
  180. }
  181. const char* name = &names[section.sh_name];
  182. if (section.sh_type == SHT_SYMTAB_ && !std::strcmp(name, ".symtab")) {
  183. symtab_size = section.sh_size;
  184. symtab_offset = section.sh_offset;
  185. } else if (section.sh_type == SHT_STRTAB_) {
  186. if (!std::strcmp(name, ".dynstr")) {
  187. dynstr_size = section.sh_size;
  188. dynstr_offset = section.sh_offset;
  189. } else if (!std::strcmp(name, ".strtab")) {
  190. strtab_size = section.sh_size;
  191. strtab_offset = section.sh_offset;
  192. }
  193. } else if (section.sh_type == SHT_DYNSYM_ && !std::strcmp(name, ".dynsym")) {
  194. dynsym_size = section.sh_size;
  195. dynsym_offset = section.sh_offset;
  196. }
  197. }
  198. if (!symtab_size || !strtab_size) {
  199. // ".symtab" stripped from the binary and we have to fallback to ".dynsym"
  200. symtab_size = dynsym_size;
  201. symtab_offset = dynsym_offset;
  202. strtab_size = dynstr_size;
  203. strtab_offset = dynstr_offset;
  204. }
  205. if (!symtab_size || !strtab_size) {
  206. return;
  207. }
  208. text.resize(static_cast<std::size_t>(strtab_size) + 1, '\0');
  209. checked_seekg(fs, strtab_offset);
  210. read_raw(fs, text[0], static_cast<std::size_t>(strtab_size));
  211. symbols.resize(static_cast<std::size_t>(symtab_size / sizeof(symbol_t)));
  212. checked_seekg(fs, symtab_offset);
  213. read_raw(fs, symbols[0], static_cast<std::size_t>(symtab_size - (symtab_size % sizeof(symbol_t))) );
  214. }
  215. static bool is_visible(const symbol_t& sym) noexcept {
  216. const unsigned char visibility = (sym.st_other & 0x03);
  217. // `(sym.st_info >> 4) != STB_LOCAL_ && !!sym.st_size` check also workarounds the
  218. // GCC's issue https://sourceware.org/bugzilla/show_bug.cgi?id=13621
  219. return (visibility == STV_DEFAULT_ || visibility == STV_PROTECTED_)
  220. && (sym.st_info >> 4) != STB_LOCAL_ && !!sym.st_size;
  221. }
  222. public:
  223. static std::vector<std::string> symbols(std::ifstream& fs) {
  224. std::vector<std::string> ret;
  225. std::vector<symbol_t> symbols;
  226. std::vector<char> text;
  227. symbols_text(fs, symbols, text);
  228. ret.reserve(symbols.size());
  229. for (std::size_t i = 0; i < symbols.size(); ++i) {
  230. if (is_visible(symbols[i]) && symbols[i].st_name < text.size()) {
  231. ret.push_back(&text[symbols[i].st_name]);
  232. if (ret.back().empty()) {
  233. ret.pop_back(); // Do not show empty names
  234. }
  235. }
  236. }
  237. return ret;
  238. }
  239. static std::vector<std::string> symbols(std::ifstream& fs, const char* section_name) {
  240. std::vector<std::string> ret;
  241. std::size_t index = 0;
  242. std::size_t ptrs_in_section_count = 0;
  243. std::vector<char> names;
  244. sections_names_raw(fs, names);
  245. const header_t elf = header(fs);
  246. for (; index < elf.e_shnum; ++index) {
  247. section_t section;
  248. checked_seekg(fs, elf.e_shoff + index * sizeof(section_t));
  249. read_raw(fs, section);
  250. if (!std::strcmp(&names.at(section.sh_name), section_name)) {
  251. if (!section.sh_entsize) {
  252. section.sh_entsize = 1;
  253. }
  254. ptrs_in_section_count = static_cast<std::size_t>(section.sh_size / section.sh_entsize);
  255. break;
  256. }
  257. }
  258. std::vector<symbol_t> symbols;
  259. std::vector<char> text;
  260. symbols_text(fs, symbols, text, names);
  261. if (ptrs_in_section_count < symbols.size()) {
  262. ret.reserve(ptrs_in_section_count);
  263. } else {
  264. ret.reserve(symbols.size());
  265. }
  266. for (std::size_t i = 0; i < symbols.size(); ++i) {
  267. if (symbols[i].st_shndx == index && is_visible(symbols[i]) && symbols[i].st_name < text.size()) {
  268. ret.push_back(&text[symbols[i].st_name]);
  269. if (ret.back().empty()) {
  270. ret.pop_back(); // Do not show empty names
  271. }
  272. }
  273. }
  274. return ret;
  275. }
  276. };
  277. using elf_info32 = elf_info<std::uint32_t> ;
  278. using elf_info64 = elf_info<std::uint64_t>;
  279. }}} // namespace boost::dll::detail
  280. #endif // BOOST_DLL_DETAIL_POSIX_ELF_INFO_HPP