208 lines
7.7 KiB
C++
208 lines
7.7 KiB
C++
|
|
/*
|
|||
|
|
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
|
|||
|
|
*
|
|||
|
|
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
|
|||
|
|
*
|
|||
|
|
* Use of this source code is governed by MIT-like license that can be found in the
|
|||
|
|
* LICENSE file in the root of the source tree. All contributing project authors
|
|||
|
|
* may be found in the AUTHORS file in the root of the source tree.
|
|||
|
|
*/
|
|||
|
|
|
|||
|
|
#ifndef ZLMEDIAKIT_SHELLPARSER_H
|
|||
|
|
#define ZLMEDIAKIT_SHELLPARSER_H
|
|||
|
|
|
|||
|
|
#include <iostream>
|
|||
|
|
#include <string>
|
|||
|
|
#include <vector>
|
|||
|
|
#include <cctype>
|
|||
|
|
|
|||
|
|
// Shell-like command line parser.
|
|||
|
|
// Features:
|
|||
|
|
// - Whitespace splitting (space, tab, newline)
|
|||
|
|
// - Quotes: single ('...') and double ("...")
|
|||
|
|
// - Escapes with backslash (\\) outside quotes
|
|||
|
|
// - In single quotes: backslash is literal (like POSIX shell)
|
|||
|
|
// - In double quotes: backslash can escape " $ ` \\ and newline (line continuation)
|
|||
|
|
// Additionally supports common C-style escapes: \n \t \r \0 .. outside and inside double quotes
|
|||
|
|
// - Line continuation: backslash followed by newline is ignored
|
|||
|
|
// - Produces argv pointers with stable lifetime backed by std::vector<std::string>
|
|||
|
|
//
|
|||
|
|
// Notes:
|
|||
|
|
// - This is NOT a full shell (no variable expansion, no globbing, no command substitution).
|
|||
|
|
// - Behavior aims to be practical and safe for exec* arguments building.
|
|||
|
|
|
|||
|
|
struct ParseResult {
|
|||
|
|
ParseResult(bool ok, const char *err, size_t pos, std::vector<std::string> args)
|
|||
|
|
: ok(ok)
|
|||
|
|
, error_msg(err)
|
|||
|
|
, error_pos(pos)
|
|||
|
|
, args(std::move(args)) {}
|
|||
|
|
|
|||
|
|
bool ok;
|
|||
|
|
std::string error_msg;
|
|||
|
|
size_t error_pos = 0; // index in input when error happens
|
|||
|
|
std::vector<std::string> args; // parsed arguments
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
namespace detail {
|
|||
|
|
|
|||
|
|
inline bool is_space(char c) {
|
|||
|
|
return c == ' ' || c == '\t' || c == '\n';
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Returns true if it handled a line continuation ("\\\n").
|
|||
|
|
inline bool handle_line_continuation(const std::string &s, size_t &i) {
|
|||
|
|
if (i + 1 < s.size() && s[i] == '\\' && s[i + 1] == '\n') {
|
|||
|
|
i += 2; // consume both and do nothing
|
|||
|
|
return true;
|
|||
|
|
}
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
inline bool hex_digit(char c) { return std::isxdigit(static_cast<unsigned char>(c)) != 0; }
|
|||
|
|
inline int hex_val(char c) {
|
|||
|
|
if (c >= '0' && c <= '9') return c - '0';
|
|||
|
|
if (c >= 'a' && c <= 'f') return 10 + (c - 'a');
|
|||
|
|
if (c >= 'A' && c <= 'F') return 10 + (c - 'A');
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Parse C-style escapes: \n, \t, \r, \0..\377 (octal), \xHH (hex). Returns std::nullopt if not a known escape.
|
|||
|
|
inline std::pair<bool, char> c_style_escape(const std::string &s, size_t &i) {
|
|||
|
|
if (i >= s.size()) return std::make_pair(false, '\0');
|
|||
|
|
char c = s[i];
|
|||
|
|
switch (c) {
|
|||
|
|
case 'n': ++i; return std::make_pair(true, '\n');
|
|||
|
|
case 't': ++i; return std::make_pair(true, '\t');
|
|||
|
|
case 'r': ++i; return std::make_pair(true, '\r');
|
|||
|
|
case 'a': ++i; return std::make_pair(true, '\a');
|
|||
|
|
case 'b': ++i; return std::make_pair(true, '\b');
|
|||
|
|
case 'f': ++i; return std::make_pair(true, '\f');
|
|||
|
|
case 'v': ++i; return std::make_pair(true, '\v');
|
|||
|
|
case '\\': ++i; return std::make_pair(true, '\\');
|
|||
|
|
case '"': ++i; return std::make_pair(true, '"');
|
|||
|
|
case '\'': ++i; return std::make_pair(true, '\'');
|
|||
|
|
case '0': {
|
|||
|
|
// up to 3 octal digits total (including the first 0 already consumed here?)
|
|||
|
|
// Here c=='0' means octal sequence starts at current '0'.
|
|||
|
|
// We'll parse up to 3 octal digits starting at current pos.
|
|||
|
|
int val = 0; int cnt = 0;
|
|||
|
|
while (i < s.size() && cnt < 3 && (s[i] >= '0' && s[i] <= '7')) {
|
|||
|
|
val = (val << 3) + (s[i] - '0');
|
|||
|
|
++i; ++cnt;
|
|||
|
|
}
|
|||
|
|
return std::make_pair(true, static_cast<char>(val & 0xFF));
|
|||
|
|
}
|
|||
|
|
case 'x': {
|
|||
|
|
++i; // consume 'x'
|
|||
|
|
int val = 0; int cnt = 0;
|
|||
|
|
while (i < s.size() && cnt < 2 && hex_digit(s[i])) {
|
|||
|
|
val = (val << 4) + hex_val(s[i]);
|
|||
|
|
++i; ++cnt;
|
|||
|
|
}
|
|||
|
|
if (cnt == 0) return std::make_pair(false, '\0'); // not actually a hex escape
|
|||
|
|
return std::make_pair(true, static_cast<char>(val & 0xFF));
|
|||
|
|
}
|
|||
|
|
default:
|
|||
|
|
return std::make_pair(false, '\0');
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
ParseResult parse_shell_like(const std::string &input) {
|
|||
|
|
using namespace detail;
|
|||
|
|
std::vector<std::string> args;
|
|||
|
|
std::string cur;
|
|||
|
|
|
|||
|
|
enum class State { Normal, InSingle, InDouble };
|
|||
|
|
State st = State::Normal;
|
|||
|
|
|
|||
|
|
size_t i = 0; const size_t N = input.size();
|
|||
|
|
while (i < N) {
|
|||
|
|
// line continuation check (\\\n) applies in all states
|
|||
|
|
if (handle_line_continuation(input, i)) continue;
|
|||
|
|
if (i >= N) break;
|
|||
|
|
|
|||
|
|
char c = input[i];
|
|||
|
|
switch (st) {
|
|||
|
|
case State::Normal: {
|
|||
|
|
if (is_space(c)) {
|
|||
|
|
if (!cur.empty()) { args.emplace_back(std::move(cur)); cur.clear(); }
|
|||
|
|
++i;
|
|||
|
|
} else if (c == '\'') {
|
|||
|
|
st = State::InSingle; ++i;
|
|||
|
|
} else if (c == '"') {
|
|||
|
|
st = State::InDouble; ++i;
|
|||
|
|
} else if (c == '\\') {
|
|||
|
|
++i; // consume backslash
|
|||
|
|
if (i >= N) {
|
|||
|
|
return {false, "结尾处孤立的反斜杠(未转义任何字符)", i, {}};
|
|||
|
|
}
|
|||
|
|
// Try C-style escapes first
|
|||
|
|
auto esc = c_style_escape(input, i);
|
|||
|
|
if (esc.first) {
|
|||
|
|
cur.push_back(esc.second);
|
|||
|
|
} else {
|
|||
|
|
// Not a known C escape: take the next char literally
|
|||
|
|
cur.push_back(input[i]);
|
|||
|
|
++i;
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
cur.push_back(c); ++i;
|
|||
|
|
}
|
|||
|
|
} break;
|
|||
|
|
|
|||
|
|
case State::InSingle: {
|
|||
|
|
if (c == '\'') { st = State::Normal; ++i; }
|
|||
|
|
else { cur.push_back(c); ++i; }
|
|||
|
|
} break;
|
|||
|
|
|
|||
|
|
case State::InDouble: {
|
|||
|
|
if (c == '"') { st = State::Normal; ++i; }
|
|||
|
|
else if (c == '\\') {
|
|||
|
|
++i; // consume backslash
|
|||
|
|
if (i >= N) {
|
|||
|
|
return {false, "双引号内以反斜杠结尾,缺少被转义字符", i, {}};
|
|||
|
|
}
|
|||
|
|
// In POSIX shell, within double quotes, only certain escapes are special.
|
|||
|
|
// Here we support both POSIX subset and common C-style escapes for practicality.
|
|||
|
|
auto esc = c_style_escape(input, i);
|
|||
|
|
if (esc.first) {
|
|||
|
|
cur.push_back(esc.second);
|
|||
|
|
} else {
|
|||
|
|
// If not a C-style escape, allow escaping one char literally (e.g., $ `)
|
|||
|
|
cur.push_back(input[i]);
|
|||
|
|
++i;
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
cur.push_back(c); ++i;
|
|||
|
|
}
|
|||
|
|
} break;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (st == State::InSingle) {
|
|||
|
|
return {false, "缺少配对的单引号(')", i, {}};
|
|||
|
|
}
|
|||
|
|
if (st == State::InDouble) {
|
|||
|
|
return {false, "缺少配对的双引号(\")", i, {}};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (!cur.empty()) args.emplace_back(std::move(cur));
|
|||
|
|
|
|||
|
|
return {true, "", 0, std::move(args)};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Helper: build argv pointers backed by the strings' storage.
|
|||
|
|
// The returned vector includes a trailing nullptr, suitable for execv*.
|
|||
|
|
inline std::vector<const char*> make_argv(const std::vector<std::string>& args) {
|
|||
|
|
std::vector<const char*> argv;
|
|||
|
|
argv.reserve(args.size() + 1);
|
|||
|
|
for (const auto &s : args) argv.push_back(s.c_str());
|
|||
|
|
argv.push_back(nullptr);
|
|||
|
|
return argv;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#endif // ZLMEDIAKIT_SHELLPARSER_H
|