macho_info.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. // Copyright 2014 Renato Tegon Forti, Antony Polukhin.
  2. // Copyright Antony Polukhin, 2015-2025.
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt
  6. // or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_DLL_DETAIL_MACHO_INFO_HPP
  8. #define BOOST_DLL_DETAIL_MACHO_INFO_HPP
  9. #include <boost/dll/config.hpp>
  10. #ifdef BOOST_HAS_PRAGMA_ONCE
  11. # pragma once
  12. #endif
  13. #include <algorithm>
  14. #include <cstdint>
  15. #include <fstream>
  16. #include <string> // for std::getline
  17. #include <vector>
  18. namespace boost { namespace dll { namespace detail {
  19. using integer_t = int;
  20. using vm_prot_t = int;
  21. using cpu_type_t = integer_t;
  22. using cpu_subtype_t = integer_t;
  23. template <class AddressOffsetT>
  24. struct mach_header_template {
  25. std::uint32_t magic;
  26. cpu_type_t cputype;
  27. cpu_subtype_t cpusubtype;
  28. std::uint32_t filetype;
  29. std::uint32_t ncmds;
  30. std::uint32_t sizeofcmds;
  31. std::uint32_t flags[sizeof(AddressOffsetT) / sizeof(std::uint32_t)]; // Flags and reserved
  32. };
  33. using mach_header_32_ = mach_header_template<std::uint32_t>;
  34. using mach_header_64_ = mach_header_template<std::uint64_t>;
  35. struct load_command_ {
  36. std::uint32_t cmd; /* type of command */
  37. std::uint32_t cmdsize;
  38. };
  39. struct load_command_types {
  40. static constexpr std::uint32_t LC_SEGMENT_ = 0x1; /* segment of this file to be mapped */
  41. static constexpr std::uint32_t LC_SYMTAB_ = 0x2; /* link-edit stab symbol table info */
  42. static constexpr std::uint32_t LC_SYMSEG_ = 0x3; /* link-edit gdb symbol table info (obsolete) */
  43. static constexpr std::uint32_t LC_THREAD_ = 0x4; /* thread */
  44. static constexpr std::uint32_t LC_UNIXTHREAD_ = 0x5; /* unix thread (includes a stack) */
  45. static constexpr std::uint32_t LC_LOADFVMLIB_ = 0x6; /* load a specified fixed VM shared library */
  46. static constexpr std::uint32_t LC_IDFVMLIB_ = 0x7; /* fixed VM shared library identification */
  47. static constexpr std::uint32_t LC_IDENT_ = 0x8; /* object identification info (obsolete) */
  48. static constexpr std::uint32_t LC_FVMFILE_ = 0x9; /* fixed VM file inclusion (internal use) */
  49. static constexpr std::uint32_t LC_PREPAGE_ = 0xa; /* prepage command (internal use) */
  50. static constexpr std::uint32_t LC_DYSYMTAB_ = 0xb; /* dynamic link-edit symbol table info */
  51. static constexpr std::uint32_t LC_LOAD_DYLIB_ = 0xc; /* load a dynamically linked shared library */
  52. static constexpr std::uint32_t LC_ID_DYLIB_ = 0xd; /* dynamically linked shared lib ident */
  53. static constexpr std::uint32_t LC_LOAD_DYLINKER_ = 0xe; /* load a dynamic linker */
  54. static constexpr std::uint32_t LC_ID_DYLINKER_ = 0xf; /* dynamic linker identification */
  55. static constexpr std::uint32_t LC_PREBOUND_DYLIB_ = 0x10; /* modules prebound for a dynamically linked shared library */
  56. static constexpr std::uint32_t LC_ROUTINES_ = 0x11; /* image routines */
  57. static constexpr std::uint32_t LC_SUB_FRAMEWORK_ = 0x12; /* sub framework */
  58. static constexpr std::uint32_t LC_SUB_UMBRELLA_ = 0x13; /* sub umbrella */
  59. static constexpr std::uint32_t LC_SUB_CLIENT_ = 0x14; /* sub client */
  60. static constexpr std::uint32_t LC_SUB_LIBRARY_ = 0x15; /* sub library */
  61. static constexpr std::uint32_t LC_TWOLEVEL_HINTS_ = 0x16; /* two-level namespace lookup hints */
  62. static constexpr std::uint32_t LC_PREBIND_CKSUM_ = 0x17; /* prebind checksum */
  63. /*
  64. * After MacOS X 10.1 when a new load command is added that is required to be
  65. * understood by the dynamic linker for the image to execute properly the
  66. * LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic
  67. * linker sees such a load command it it does not understand will issue a
  68. * "unknown load command required for execution" error and refuse to use the
  69. * image. Other load commands without this bit that are not understood will
  70. * simply be ignored.
  71. */
  72. static constexpr std::uint32_t LC_REQ_DYLD_ = 0x80000000;
  73. /*
  74. * load a dynamically linked shared library that is allowed to be missing
  75. * (all symbols are weak imported).
  76. */
  77. static constexpr std::uint32_t LC_LOAD_WEAK_DYLIB_ = (0x18 | LC_REQ_DYLD_);
  78. static constexpr std::uint32_t LC_SEGMENT_64_ = 0x19; /* 64-bit segment of this file to be mapped */
  79. static constexpr std::uint32_t LC_ROUTINES_64_ = 0x1a; /* 64-bit image routines */
  80. static constexpr std::uint32_t LC_UUID_ = 0x1b; /* the uuid */
  81. static constexpr std::uint32_t LC_RPATH_ = (0x1c | LC_REQ_DYLD_); /* runpath additions */
  82. static constexpr std::uint32_t LC_CODE_SIGNATURE_ = 0x1d; /* local of code signature */
  83. static constexpr std::uint32_t LC_SEGMENT_SPLIT_INFO_= 0x1e; /* local of info to split segments */
  84. static constexpr std::uint32_t LC_REEXPORT_DYLIB_ = (0x1f | LC_REQ_DYLD_); /* load and re-export dylib */
  85. static constexpr std::uint32_t LC_LAZY_LOAD_DYLIB_ = 0x20; /* delay load of dylib until first use */
  86. static constexpr std::uint32_t LC_ENCRYPTION_INFO_ = 0x21; /* encrypted segment information */
  87. static constexpr std::uint32_t LC_DYLD_INFO_ = 0x22; /* compressed dyld information */
  88. static constexpr std::uint32_t LC_DYLD_INFO_ONLY_ = (0x22|LC_REQ_DYLD_); /* compressed dyld information only */
  89. };
  90. template <class AddressOffsetT>
  91. struct segment_command_template {
  92. std::uint32_t cmd; /* LC_SEGMENT_ */
  93. std::uint32_t cmdsize; /* includes sizeof section structs */
  94. char segname[16]; /* segment name */
  95. AddressOffsetT vmaddr; /* memory address of this segment */
  96. AddressOffsetT vmsize; /* memory size of this segment */
  97. AddressOffsetT fileoff; /* file offset of this segment */
  98. AddressOffsetT filesize; /* amount to map from the file */
  99. vm_prot_t maxprot; /* maximum VM protection */
  100. vm_prot_t initprot; /* initial VM protection */
  101. std::uint32_t nsects; /* number of sections in segment */
  102. std::uint32_t flags; /* flags */
  103. };
  104. using segment_command_32_ = segment_command_template<std::uint32_t>;
  105. using segment_command_64_ = segment_command_template<std::uint64_t>;
  106. template <class AddressOffsetT>
  107. struct section_template {
  108. char sectname[16]; /* name of this section */
  109. char segname[16]; /* segment this section goes in */
  110. AddressOffsetT addr; /* memory address of this section */
  111. AddressOffsetT size; /* size in bytes of this section */
  112. std::uint32_t offset; /* file offset of this section */
  113. std::uint32_t align; /* section alignment (power of 2) */
  114. std::uint32_t reloff; /* file offset of relocation entries */
  115. std::uint32_t nreloc; /* number of relocation entries */
  116. std::uint32_t flags; /* flags (section type and attributes)*/
  117. std::uint32_t reserved[1 + sizeof(AddressOffsetT) / sizeof(std::uint32_t)];
  118. };
  119. using section_32_ = section_template<std::uint32_t> ;
  120. using section_64_ = section_template<std::uint64_t>;
  121. struct symtab_command_ {
  122. std::uint32_t cmd; /* LC_SYMTAB_ */
  123. std::uint32_t cmdsize; /* sizeof(struct symtab_command) */
  124. std::uint32_t symoff; /* symbol table offset */
  125. std::uint32_t nsyms; /* number of symbol table entries */
  126. std::uint32_t stroff; /* string table offset */
  127. std::uint32_t strsize; /* string table size in bytes */
  128. };
  129. template <class AddressOffsetT>
  130. struct nlist_template {
  131. std::uint32_t n_strx;
  132. std::uint8_t n_type;
  133. std::uint8_t n_sect;
  134. std::uint16_t n_desc;
  135. AddressOffsetT n_value;
  136. };
  137. using nlist_32_ = nlist_template<std::uint32_t> ;
  138. using nlist_64_ = nlist_template<std::uint64_t> ;
  139. template <class AddressOffsetT>
  140. class macho_info {
  141. using header_t = boost::dll::detail::mach_header_template<AddressOffsetT>;
  142. using load_command_t = boost::dll::detail::load_command_;
  143. using segment_t = boost::dll::detail::segment_command_template<AddressOffsetT>;
  144. using section_t = boost::dll::detail::section_template<AddressOffsetT>;
  145. using symbol_header_t = boost::dll::detail::symtab_command_;
  146. using nlist_t = boost::dll::detail::nlist_template<AddressOffsetT>;
  147. static constexpr std::uint32_t SEGMENT_CMD_NUMBER = (sizeof(AddressOffsetT) > 4 ? load_command_types::LC_SEGMENT_64_ : load_command_types::LC_SEGMENT_);
  148. public:
  149. static bool parsing_supported(std::ifstream& fs) {
  150. static const uint32_t magic_bytes = (sizeof(AddressOffsetT) <= sizeof(uint32_t) ? 0xfeedface : 0xfeedfacf);
  151. uint32_t magic;
  152. fs.seekg(0);
  153. fs.read(reinterpret_cast<char*>(&magic), sizeof(magic));
  154. return (magic_bytes == magic);
  155. }
  156. private:
  157. template <class T>
  158. static void read_raw(std::ifstream& fs, T& value, std::size_t size = sizeof(T)) {
  159. fs.read(reinterpret_cast<char*>(&value), size);
  160. }
  161. template <class F>
  162. static void command_finder(std::ifstream& fs, uint32_t cmd_num, F callback_f) {
  163. const header_t h = header(fs);
  164. load_command_t command;
  165. fs.seekg(sizeof(header_t));
  166. for (std::size_t i = 0; i < h.ncmds; ++i) {
  167. const std::ifstream::pos_type pos = fs.tellg();
  168. read_raw(fs, command);
  169. if (command.cmd != cmd_num) {
  170. fs.seekg(pos + static_cast<std::ifstream::pos_type>(command.cmdsize));
  171. continue;
  172. }
  173. fs.seekg(pos);
  174. callback_f(fs);
  175. fs.seekg(pos + static_cast<std::ifstream::pos_type>(command.cmdsize));
  176. }
  177. }
  178. struct section_names_gather {
  179. std::vector<std::string>& ret;
  180. void operator()(std::ifstream& fs) const {
  181. segment_t segment;
  182. read_raw(fs, segment);
  183. section_t section;
  184. ret.reserve(ret.size() + segment.nsects);
  185. for (std::size_t j = 0; j < segment.nsects; ++j) {
  186. read_raw(fs, section);
  187. // `segname` goes right after the `sectname`.
  188. // Forcing `sectname` to end on '\0'
  189. section.segname[0] = '\0';
  190. ret.push_back(section.sectname);
  191. if (ret.back().empty()) {
  192. ret.pop_back(); // Do not show empty names
  193. }
  194. }
  195. }
  196. };
  197. struct symbol_names_gather {
  198. std::vector<std::string>& ret;
  199. std::size_t section_index;
  200. void operator()(std::ifstream& fs) const {
  201. symbol_header_t symbh;
  202. read_raw(fs, symbh);
  203. ret.reserve(ret.size() + symbh.nsyms);
  204. nlist_t symbol;
  205. std::string symbol_name;
  206. for (std::size_t j = 0; j < symbh.nsyms; ++j) {
  207. fs.seekg(symbh.symoff + j * sizeof(nlist_t));
  208. read_raw(fs, symbol);
  209. if (!symbol.n_strx) {
  210. continue; // Symbol has no name
  211. }
  212. if ((symbol.n_type & 0x0e) != 0xe || !symbol.n_sect) {
  213. continue; // Symbol has no section
  214. }
  215. if (section_index && section_index != symbol.n_sect) {
  216. continue; // Not in the required section
  217. }
  218. fs.seekg(symbh.stroff + symbol.n_strx);
  219. std::getline(fs, symbol_name, '\0');
  220. if (symbol_name.empty()) {
  221. continue;
  222. }
  223. if (symbol_name[0] == '_') {
  224. // Linker adds additional '_' symbol. Could not find official docs for that case.
  225. ret.push_back(symbol_name.c_str() + 1);
  226. } else {
  227. ret.push_back(symbol_name);
  228. }
  229. }
  230. }
  231. };
  232. public:
  233. static std::vector<std::string> sections(std::ifstream& fs) {
  234. std::vector<std::string> ret;
  235. section_names_gather f = { ret };
  236. command_finder(fs, SEGMENT_CMD_NUMBER, f);
  237. return ret;
  238. }
  239. private:
  240. static header_t header(std::ifstream& fs) {
  241. header_t h;
  242. fs.seekg(0);
  243. read_raw(fs, h);
  244. return h;
  245. }
  246. public:
  247. static std::vector<std::string> symbols(std::ifstream& fs) {
  248. std::vector<std::string> ret;
  249. symbol_names_gather f = { ret, 0 };
  250. command_finder(fs, load_command_types::LC_SYMTAB_, f);
  251. return ret;
  252. }
  253. static std::vector<std::string> symbols(std::ifstream& fs, const char* section_name) {
  254. // Not very optimal solution
  255. std::vector<std::string> ret = sections(fs);
  256. std::vector<std::string>::iterator it = std::find(ret.begin(), ret.end(), section_name);
  257. if (it == ret.end()) {
  258. // No section with such name
  259. ret.clear();
  260. return ret;
  261. }
  262. // section indexes start from 1
  263. symbol_names_gather f = { ret, static_cast<std::size_t>(1 + (it - ret.begin())) };
  264. ret.clear();
  265. command_finder(fs, load_command_types::LC_SYMTAB_, f);
  266. return ret;
  267. }
  268. };
  269. using macho_info32 = macho_info<std::uint32_t>;
  270. using macho_info64 = macho_info<std::uint64_t>;
  271. }}} // namespace boost::dll::detail
  272. #endif // BOOST_DLL_DETAIL_MACHO_INFO_HPP