/* * Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved. * * This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit). * * Use of this source code is governed by MIT-like license that can be found in the * LICENSE file in the root of the source tree. All contributing project authors * may be found in the AUTHORS file in the root of the source tree. */ #ifndef ZLMEDIAKIT_SHELLPARSER_H #define ZLMEDIAKIT_SHELLPARSER_H #include #include #include #include // Shell-like command line parser. // Features: // - Whitespace splitting (space, tab, newline) // - Quotes: single ('...') and double ("...") // - Escapes with backslash (\\) outside quotes // - In single quotes: backslash is literal (like POSIX shell) // - In double quotes: backslash can escape " $ ` \\ and newline (line continuation) // Additionally supports common C-style escapes: \n \t \r \0 .. outside and inside double quotes // - Line continuation: backslash followed by newline is ignored // - Produces argv pointers with stable lifetime backed by std::vector // // Notes: // - This is NOT a full shell (no variable expansion, no globbing, no command substitution). // - Behavior aims to be practical and safe for exec* arguments building. struct ParseResult { ParseResult(bool ok, const char *err, size_t pos, std::vector args) : ok(ok) , error_msg(err) , error_pos(pos) , args(std::move(args)) {} bool ok; std::string error_msg; size_t error_pos = 0; // index in input when error happens std::vector args; // parsed arguments }; namespace detail { inline bool is_space(char c) { return c == ' ' || c == '\t' || c == '\n'; } // Returns true if it handled a line continuation ("\\\n"). inline bool handle_line_continuation(const std::string &s, size_t &i) { if (i + 1 < s.size() && s[i] == '\\' && s[i + 1] == '\n') { i += 2; // consume both and do nothing return true; } return false; } inline bool hex_digit(char c) { return std::isxdigit(static_cast(c)) != 0; } inline int hex_val(char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return 10 + (c - 'a'); if (c >= 'A' && c <= 'F') return 10 + (c - 'A'); return 0; } // Parse C-style escapes: \n, \t, \r, \0..\377 (octal), \xHH (hex). Returns std::nullopt if not a known escape. inline std::pair c_style_escape(const std::string &s, size_t &i) { if (i >= s.size()) return std::make_pair(false, '\0'); char c = s[i]; switch (c) { case 'n': ++i; return std::make_pair(true, '\n'); case 't': ++i; return std::make_pair(true, '\t'); case 'r': ++i; return std::make_pair(true, '\r'); case 'a': ++i; return std::make_pair(true, '\a'); case 'b': ++i; return std::make_pair(true, '\b'); case 'f': ++i; return std::make_pair(true, '\f'); case 'v': ++i; return std::make_pair(true, '\v'); case '\\': ++i; return std::make_pair(true, '\\'); case '"': ++i; return std::make_pair(true, '"'); case '\'': ++i; return std::make_pair(true, '\''); case '0': { // up to 3 octal digits total (including the first 0 already consumed here?) // Here c=='0' means octal sequence starts at current '0'. // We'll parse up to 3 octal digits starting at current pos. int val = 0; int cnt = 0; while (i < s.size() && cnt < 3 && (s[i] >= '0' && s[i] <= '7')) { val = (val << 3) + (s[i] - '0'); ++i; ++cnt; } return std::make_pair(true, static_cast(val & 0xFF)); } case 'x': { ++i; // consume 'x' int val = 0; int cnt = 0; while (i < s.size() && cnt < 2 && hex_digit(s[i])) { val = (val << 4) + hex_val(s[i]); ++i; ++cnt; } if (cnt == 0) return std::make_pair(false, '\0'); // not actually a hex escape return std::make_pair(true, static_cast(val & 0xFF)); } default: return std::make_pair(false, '\0'); } } } ParseResult parse_shell_like(const std::string &input) { using namespace detail; std::vector args; std::string cur; enum class State { Normal, InSingle, InDouble }; State st = State::Normal; size_t i = 0; const size_t N = input.size(); while (i < N) { // line continuation check (\\\n) applies in all states if (handle_line_continuation(input, i)) continue; if (i >= N) break; char c = input[i]; switch (st) { case State::Normal: { if (is_space(c)) { if (!cur.empty()) { args.emplace_back(std::move(cur)); cur.clear(); } ++i; } else if (c == '\'') { st = State::InSingle; ++i; } else if (c == '"') { st = State::InDouble; ++i; } else if (c == '\\') { ++i; // consume backslash if (i >= N) { return {false, "结尾处孤立的反斜杠(未转义任何字符)", i, {}}; } // Try C-style escapes first auto esc = c_style_escape(input, i); if (esc.first) { cur.push_back(esc.second); } else { // Not a known C escape: take the next char literally cur.push_back(input[i]); ++i; } } else { cur.push_back(c); ++i; } } break; case State::InSingle: { if (c == '\'') { st = State::Normal; ++i; } else { cur.push_back(c); ++i; } } break; case State::InDouble: { if (c == '"') { st = State::Normal; ++i; } else if (c == '\\') { ++i; // consume backslash if (i >= N) { return {false, "双引号内以反斜杠结尾,缺少被转义字符", i, {}}; } // In POSIX shell, within double quotes, only certain escapes are special. // Here we support both POSIX subset and common C-style escapes for practicality. auto esc = c_style_escape(input, i); if (esc.first) { cur.push_back(esc.second); } else { // If not a C-style escape, allow escaping one char literally (e.g., $ `) cur.push_back(input[i]); ++i; } } else { cur.push_back(c); ++i; } } break; } } if (st == State::InSingle) { return {false, "缺少配对的单引号(')", i, {}}; } if (st == State::InDouble) { return {false, "缺少配对的双引号(\")", i, {}}; } if (!cur.empty()) args.emplace_back(std::move(cur)); return {true, "", 0, std::move(args)}; } // Helper: build argv pointers backed by the strings' storage. // The returned vector includes a trailing nullptr, suitable for execv*. inline std::vector make_argv(const std::vector& args) { std::vector argv; argv.reserve(args.size() + 1); for (const auto &s : args) argv.push_back(s.c_str()); argv.push_back(nullptr); return argv; } #endif // ZLMEDIAKIT_SHELLPARSER_H