You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

710 lines
18 KiB

/**
* @file sorting.cpp
* Sorts chunks and imports
*
* @author Ben Gardner
* @license GPL v2+
*/
#include "sorting.h"
#include "newlines.h"
#include "prototypes.h"
#include <regex>
#include <unordered_map>
constexpr static auto LCURRENT = LSORT;
using namespace uncrustify;
Option<std::string> *include_category_options[] =
{
&options::include_category_0,
&options::include_category_1,
&options::include_category_2,
};
constexpr static int kIncludeCategoriesCount = 3;
struct include_category
{
include_category(const std::string &pattern)
: regex(pattern)
{
}
std::regex regex;
};
include_category *include_categories[kIncludeCategoriesCount];
unordered_map<Chunk *, int> chunk_priority_cache;
unordered_map<std::string, bool> filename_without_ext_cache;
/**
* Compare two series of chunks, starting with the given ones.
* @param pc1 first instance to compare
* @param pc2 second instance to compare
* @param tcare take care of case (lower case/ upper case) Issue #2091
*
* @retval == 0 both text elements are equal
* @retval > 0
* @retval < 0
*/
static int compare_chunks(Chunk *pc1, Chunk *pc2, bool tcare);
/**
* Sort all of the chunks in O(n log n) time with a maximum of O(n) swaps
*/
static void do_the_sort(Chunk **chunks, size_t num_chunks);
#define MARK_CHANGE() mark_change(__func__, __LINE__)
static void mark_change(const char *func, size_t line)
{
LOG_FUNC_ENTRY();
cpd.changes++;
if (cpd.pass_count == 0)
{
LOG_FMT(LCHANGE, "%s(%d): change %d on %s:%zu\n",
__func__, __LINE__, cpd.changes, func, line);
}
}
static void prepare_categories()
{
for (int i = 0; i < kIncludeCategoriesCount; ++i)
{
const auto &cat_pattern = (*include_category_options[i])();
if (!cat_pattern.empty())
{
include_categories[i] = new include_category(cat_pattern);
}
else
{
include_categories[i] = nullptr;
}
}
}
static void cleanup_categories()
{
chunk_priority_cache.clear();
filename_without_ext_cache.clear();
for (auto &include_category : include_categories)
{
if (include_category == nullptr)
{
continue;
}
delete include_category;
include_category = NULL;
}
}
static int get_chunk_priority(Chunk *pc)
{
if (chunk_priority_cache.count(pc) > 0)
{
return(chunk_priority_cache[pc]);
}
int category = kIncludeCategoriesCount;
for (int i = 0; i < kIncludeCategoriesCount; i++)
{
if (include_categories[i] != nullptr)
{
if (std::regex_match(pc->Text(), include_categories[i]->regex))
{
category = i;
break;
}
}
}
chunk_priority_cache[pc] = category;
return(category);
}
/**
* Returns true if the text contains filename without extension.
*/
static bool text_contains_filename_without_ext(const char *text)
{
if (filename_without_ext_cache.count(text) > 0)
{
return(filename_without_ext_cache[text]);
}
std::string filepath = cpd.filename;
size_t slash_idx = filepath.find_last_of("/\\");
std::string filename_without_ext = filepath;
if ( slash_idx != std::string::npos
&& slash_idx < (filepath.size() - 1))
{
std::string filename = filepath.substr(slash_idx + 1);
size_t dot_idx = filename.find_last_of('.');
filename_without_ext = filename.substr(0, dot_idx);
}
const std::regex special_chars = std::regex(R"([-[\]{}()*+?.,\^$|#\s])");
const std::string sanitized_filename = std::regex_replace(filename_without_ext, special_chars, R"(\$&)");
const std::regex filename_pattern = std::regex("\\S?" + sanitized_filename + "\\b.*");
filename_without_ext_cache[text] = std::regex_match(text, filename_pattern);
return(filename_without_ext_cache[text]);
}
/**
* Get chunk text without the extension.
*/
static UncText get_text_without_ext(const UncText &chunk_text)
{
UncText result = chunk_text;
int idx = result.rfind(".", result.size() - 1);
if (idx == -1)
{
return(result);
}
result.erase(idx, result.size() - idx);
return(result);
}
/**
* Returns true if UncText has "." which implies extension.
*/
static bool has_dot(const UncText &chunk_text)
{
int idx = chunk_text.rfind(".", chunk_text.size() - 1);
return(idx != -1);
}
/**
* Returns chunk string required for sorting.
*/
static UncText chunk_sort_str(Chunk *pc)
{
if (pc->GetParentType() == CT_PP_INCLUDE)
{
return(UncText{ pc->GetStr(), 0, pc->Len() - 1 });
}
return(pc->GetStr());
}
//! Compare two chunks
static int compare_chunks(Chunk *pc1, Chunk *pc2, bool tcare)
{
LOG_FUNC_ENTRY();
LOG_FMT(LSORT, "%s(%d): @begin pc1->len is %zu, line is %zu, column is %zu\n",
__func__, __LINE__, pc1->Len(), pc1->GetOrigLine(), pc1->GetOrigCol());
LOG_FMT(LSORT, "%s(%d): @begin pc2->len is %zu, line is %zu, column is %zu\n",
__func__, __LINE__, pc2->Len(), pc2->GetOrigLine(), pc2->GetOrigCol());
if (pc1 == pc2) // same chunk is always identical thus return 0 differences
{
return(0);
}
while ( pc1->IsNotNullChunk()
&& pc2->IsNotNullChunk())
{
auto const &s1_ext = chunk_sort_str(pc1);
auto const &s2_ext = chunk_sort_str(pc2);
log_rule_B("mod_sort_incl_import_ignore_extension");
auto const &s1 = (options::mod_sort_incl_import_ignore_extension()) ? get_text_without_ext(s1_ext) : s1_ext;
auto const &s2 = (options::mod_sort_incl_import_ignore_extension()) ? get_text_without_ext(s2_ext) : s2_ext;
log_rule_B("mod_sort_incl_import_prioritize_filename");
if (options::mod_sort_incl_import_prioritize_filename())
{
bool s1_contains_filename = text_contains_filename_without_ext(s1.c_str());
bool s2_contains_filename = text_contains_filename_without_ext(s2.c_str());
if ( s1_contains_filename
&& !s2_contains_filename)
{
return(-1);
}
else if ( !s1_contains_filename
&& s2_contains_filename)
{
return(1);
}
}
if (options::mod_sort_incl_import_prioritize_extensionless())
{
log_rule_B("mod_sort_incl_import_prioritize_extensionless");
const bool s1_has_dot = has_dot(s1_ext);
const bool s2_has_dot = has_dot(s2_ext);
if ( s1_has_dot
&& !s2_has_dot)
{
return(1);
}
else if ( !s1_has_dot
&& s2_has_dot)
{
return(-1);
}
}
if (options::mod_sort_incl_import_prioritize_angle_over_quotes())
{
log_rule_B("mod_sort_incl_import_prioritize_angle_over_quotes");
if ( s1.startswith("<")
&& s2.startswith("\""))
{
return(-1);
}
else if ( s1.startswith("\"")
&& s2.startswith("<"))
{
return(1);
}
}
int ppc1 = get_chunk_priority(pc1);
int ppc2 = get_chunk_priority(pc2);
if (ppc1 != ppc2)
{
return(ppc1 - ppc2);
}
LOG_FMT(LSORT, "%s(%d): text is %s, pc1->len is %zu, line is %zu, column is %zu\n",
__func__, __LINE__, pc1->Text(), pc1->Len(), pc1->GetOrigLine(), pc1->GetOrigCol());
LOG_FMT(LSORT, "%s(%d): text is %s, pc2->len is %zu, line is %zu, column is %zu\n",
__func__, __LINE__, pc2->Text(), pc2->Len(), pc2->GetOrigLine(), pc2->GetOrigCol());
int ret_val = UncText::compare(s1, s2, std::min(s1.size(), s2.size()), tcare);
LOG_FMT(LSORT, "%s(%d): ret_val is %d\n",
__func__, __LINE__, ret_val);
if (ret_val != 0)
{
return(ret_val);
}
if (pc1->Len() != pc2->Len())
{
return(pc1->Len() - pc2->Len());
}
// Same word, same length. Step to the next chunk.
pc1 = pc1->GetNext();
LOG_FMT(LSORT, "%s(%d): text is %s, pc1->len is %zu, line is %zu, column is %zu\n",
__func__, __LINE__, pc1->Text(), pc1->Len(), pc1->GetOrigLine(), pc1->GetOrigCol());
if (pc1->Is(CT_MEMBER))
{
pc1 = pc1->GetNext();
LOG_FMT(LSORT, "%s(%d): text is %s, pc1->len is %zu, line is %zu, column is %zu\n",
__func__, __LINE__, pc1->Text(), pc1->Len(), pc1->GetOrigLine(), pc1->GetOrigCol());
}
pc2 = pc2->GetNext();
LOG_FMT(LSORT, "%s(%d): text is %s, pc2->len is %zu, line is %zu, column is %zu\n",
__func__, __LINE__, pc2->Text(), pc2->Len(), pc2->GetOrigLine(), pc2->GetOrigCol());
if (pc2->Is(CT_MEMBER))
{
pc2 = pc2->GetNext();
LOG_FMT(LSORT, "%s(%d): text is %s, pc2->len is %zu, line is %zu, column is %zu\n",
__func__, __LINE__, pc2->Text(), pc2->Len(), pc2->GetOrigLine(), pc2->GetOrigCol());
}
LOG_FMT(LSORT, "%s(%d): >>>text is %s, pc1->len is %zu, line is %zu, column is %zu\n",
__func__, __LINE__, pc1->Text(), pc1->Len(), pc1->GetOrigLine(), pc1->GetOrigCol());
LOG_FMT(LSORT, "%s(%d): >>>text is %s, pc2->len is %zu, line is %zu, column is %zu\n",
__func__, __LINE__, pc2->Text(), pc2->Len(), pc2->GetOrigLine(), pc2->GetOrigCol());
// If we hit a newline or null chuck, we are done
if ( pc1->IsNullChunk()
|| pc1->IsNewline()
|| pc2->IsNullChunk()
|| pc2->IsNewline())
{
break;
}
}
if ( pc1->IsNullChunk()
|| !pc2->IsNewline())
{
return(-1);
}
if (!pc1->IsNewline())
{
return(1);
}
return(0);
} // compare_chunks
/**
* Sort all of the chunks in O(n log n) time with a maximum of O(n) swaps
*/
static void do_the_sort(Chunk **chunks, size_t num_chunks)
{
LOG_FUNC_ENTRY();
LOG_FMT(LSORT, "%s(%d): %zu chunks:",
__func__, __LINE__, num_chunks);
for (size_t idx = 0; idx < num_chunks; idx++)
{
LOG_FMT(LSORT, " [%s]", chunks[idx]->Text());
}
LOG_FMT(LSORT, "\n");
log_rule_B("mod_sort_case_sensitive");
bool take_care = options::mod_sort_case_sensitive(); // Issue #2091
// Sort an array of the chunk positions in order to minimize the number of swaps
std::vector<size_t> chunk_positions(num_chunks);
for (size_t idx = 0; idx < num_chunks; idx++)
{
chunk_positions[idx] = idx;
}
sort(chunk_positions.begin(), chunk_positions.end(), [chunks, take_care](const size_t &l, const size_t &r) {
return(compare_chunks(chunks[l], chunks[r], take_care) < 0);
});
// Swap the chunk positions
for (size_t idx = 0; idx < num_chunks; idx++)
{
if (chunk_positions[idx] != idx)
{
log_rule_B("mod_sort_incl_import_grouping_enabled");
const size_t from = chunk_positions[idx];
const size_t to = chunk_positions[from];
chunks[from]->SwapLines(chunks[to]);
Chunk *pc = chunks[from];
chunks[from] = chunks[to];
chunks[to] = pc;
chunk_positions[from] = from;
chunk_positions[idx] = to;
idx--;
}
}
} // do_the_sort
/**
* Remove blank lines between chunks.
*/
static void remove_blank_lines_between_imports(Chunk **chunks, size_t num_chunks)
{
LOG_FUNC_ENTRY();
if (num_chunks < 2)
{
return;
}
for (size_t idx = 0; idx < (num_chunks - 1); idx++)
{
Chunk *chunk1 = chunks[idx]->GetNextNl();
chunk1->SetNlCount(1);
MARK_CHANGE();
}
}
/**
* Delete chunks on line having chunk.
*/
static void delete_chunks_on_line_having_chunk(Chunk *chunk)
{
LOG_FUNC_ENTRY();
Chunk *pc = chunk->GetFirstChunkOnLine();
while ( pc->IsNotNullChunk()
&& !pc->IsComment())
{
Chunk *next_pc = pc->GetNext();
LOG_FMT(LCHUNK, "%s(%d): Removed '%s' on orig line %zu\n",
__func__, __LINE__, pc->Text(), pc->GetOrigLine());
if (pc->IsNewline())
{
Chunk::Delete(pc);
break;
}
else
{
Chunk::Delete(pc);
}
pc = next_pc;
}
}
/**
* Dedupe import/include directives.
*/
static void dedupe_imports(Chunk **chunks, size_t num_chunks)
{
LOG_FUNC_ENTRY();
log_rule_B("mod_sort_case_sensitive");
for (size_t idx = 1; idx < num_chunks; idx++)
{
auto const &s1 = chunk_sort_str(chunks[idx - 1]);
auto const &s2 = chunk_sort_str(chunks[idx]);
if (s1.size() != s2.size())
{
continue;
}
int ret_val = UncText::compare(s1, s2, std::min(s1.size(), s2.size()), options::mod_sort_case_sensitive());
if (ret_val == 0)
{
delete_chunks_on_line_having_chunk(chunks[idx - 1]);
}
}
}
/**
* Add blank line before the chunk.
*/
static void blankline_add_before(Chunk *pc)
{
Chunk *newline = newline_add_before(pc->GetFirstChunkOnLine());
if (newline->GetNlCount() < 2)
{
double_newline(newline);
}
}
/**
* Group imports.
*/
static void group_imports_by_adding_newlines(Chunk **chunks, size_t num_chunks)
{
LOG_FUNC_ENTRY();
// Group imports based on first character, typically quote or angle.
int c_idx = -1;
int c_idx_last = -1;
for (size_t idx = 0; idx < num_chunks; idx++)
{
if (chunks[idx]->GetStr().size() > 0)
{
c_idx = chunks[idx]->GetStr().at(0);
}
else
{
c_idx = -1;
}
if ( c_idx_last != c_idx
&& idx > 0)
{
blankline_add_before(chunks[idx]);
}
c_idx_last = c_idx;
}
// Group imports based on having extension.
bool chunk_has_dot = false;
bool chunk_last_has_dot = false;
for (size_t idx = 0; idx < num_chunks; idx++)
{
chunk_has_dot = has_dot(chunks[idx]->GetStr());
if ( chunk_last_has_dot != chunk_has_dot
&& idx > 0)
{
blankline_add_before(chunks[idx]);
}
chunk_last_has_dot = chunk_has_dot;
}
// Group imports based on priority defined by config.
int chunk_pri = -1;
int chunk_pri_last = -1;
for (size_t idx = 0; idx < num_chunks; idx++)
{
chunk_pri = get_chunk_priority(chunks[idx]);
if ( chunk_pri_last != chunk_pri
&& idx > 0)
{
blankline_add_before(chunks[idx]);
}
chunk_pri_last = chunk_pri;
}
// Group imports that contain filename pattern.
bool chunk_has_filename = false;
bool last_chunk_has_filename = false;
for (size_t idx = 0; idx < num_chunks; idx++)
{
auto const &chunk_text = chunk_sort_str(chunks[idx]);
chunk_has_filename = text_contains_filename_without_ext(chunk_text.c_str());
if ( !chunk_has_filename
&& last_chunk_has_filename)
{
blankline_add_before(chunks[idx]);
}
last_chunk_has_filename = chunk_has_filename;
}
} // group_imports_by_adding_newlines
void sort_imports()
{
LOG_FUNC_ENTRY();
const int max_number_to_sort = 1024;
const int max_lines_to_check_for_sort_after_include = 128;
const int max_gap_threshold_between_include_to_sort = 32;
Chunk *chunks[max_number_to_sort];
size_t num_chunks = 0;
Chunk *p_last = Chunk::NullChunkPtr;
Chunk *p_imp = Chunk::NullChunkPtr;
Chunk *p_imp_last = Chunk::NullChunkPtr;
prepare_categories();
Chunk *pc = Chunk::GetHead();
log_rule_B("mod_sort_incl_import_grouping_enabled");
while (pc->IsNotNullChunk())
{
// Simple optimization to limit the sorting. Any MAX_LINES_TO_CHECK_AFTER_INCLUDE lines after last
// import is seen are ignore from sorting.
if ( options::mod_sort_incl_import_grouping_enabled()
&& p_imp_last->IsNotNullChunk()
&& (pc->GetOrigLine() - p_imp_last->GetOrigLine()) > max_lines_to_check_for_sort_after_include)
{
break;
}
Chunk *next = pc->GetNext();
if (pc->IsNewline())
{
bool did_import = false;
if ( p_imp->IsNotNullChunk()
&& ( p_last->Is(CT_SEMICOLON)
|| p_imp->TestFlags(PCF_IN_PREPROC)))
{
if (num_chunks < max_number_to_sort)
{
LOG_FMT(LSORT, "%s(%d): p_imp is %s\n",
__func__, __LINE__, p_imp->Text());
chunks[num_chunks++] = p_imp;
}
else
{
fprintf(stderr, "Number of 'import' to be sorted is too big for the current value %d.\n", max_number_to_sort);
fprintf(stderr, "Please make a report.\n");
log_flush(true);
exit(EX_SOFTWARE);
}
did_import = true;
}
log_rule_B("mod_sort_incl_import_grouping_enabled");
if ( !did_import
|| ( !options::mod_sort_incl_import_grouping_enabled()
&& pc->GetNlCount() > 1)
|| ( options::mod_sort_incl_import_grouping_enabled()
&& p_imp_last->IsNotNullChunk()
&& (pc->GetOrigLine() - p_imp_last->GetOrigLine()) > max_gap_threshold_between_include_to_sort)
|| next->IsNullChunk())
{
if (num_chunks > 1)
{
log_rule_B("mod_sort_incl_import_grouping_enabled");
if (options::mod_sort_incl_import_grouping_enabled())
{
remove_blank_lines_between_imports(chunks, num_chunks);
do_the_sort(chunks, num_chunks);
group_imports_by_adding_newlines(chunks, num_chunks);
dedupe_imports(chunks, num_chunks);
}
else
{
do_the_sort(chunks, num_chunks);
}
}
num_chunks = 0;
}
p_imp_last = p_imp;
p_imp = Chunk::NullChunkPtr;
p_last = Chunk::NullChunkPtr;
}
else if (pc->Is(CT_IMPORT))
{
log_rule_B("mod_sort_import");
if (options::mod_sort_import())
{
p_imp = pc->GetNext();
}
}
else if (pc->Is(CT_USING))
{
log_rule_B("mod_sort_using");
if (options::mod_sort_using())
{
p_imp = pc->GetNext();
}
}
else if (pc->Is(CT_PP_INCLUDE))
{
log_rule_B("mod_sort_include");
if (options::mod_sort_include())
{
p_imp = pc->GetNext();
p_last = pc;
}
}
else if (!pc->IsComment())
{
p_last = pc;
}
pc = next;
}
cleanup_categories();
} // sort_imports