2 سال پیش · 0e8e94d8bf
--- a/scripts/source_stats.py
+++ b/scripts/source_stats.py
@@ -27,6 +27,12 @@ LINE_RE = re.compile(
 
															     (?P<class_intro>\b(class|struct)\s+(?P<class_name>\w+)\b)|
														
 
															     (?P<end_open_curly>{\s*(?P<open_curly_trailing_comment>//.*)?)|
														
 
															     (?P<trailing_comment>//.*)|
														
 
															+    (?P<internal_comment>/\*.*\*/)|
														
 
															+    (?P<string_literal>"([^"]|\\")*"|'([^']|\\')*')|
														
 
															+    (?P<float_literal>\b(0[xb][0-9a-fA-F']*|[0-9][0-9']*)\.[0-9a-fA-F']*([eEpP][0-9a-fA-F']*)?)|
														
 
															+    (?P<int_literal>\b(0[xb][0-9a-fA-F']+|[0-9][0-9']*)([eEpP][0-9a-fA-F']*)?)|
														
 
															+    (?P<symbol>[\[\]{}(),.;]|[-+=!@#$%^&*/?|<>]+)|
														
 
															+    (?P<keyword>\b(auto|bool|break|case|catch|char|class|const|continue|default|do|double|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|nullptr|operator|private|protected|public|return|short|signed|sizeof|static|struct|switch|template|this|throw|true|try|typedef|union|unsigned|using|virtual|void|while)\b)|
														
 
															     (?P<id>\b\w+\b)
														
 
															 """,
														
 
															     re.X,
														
@@ -44,6 +50,23 @@ class Stats:
 
															     comment_line_widths: Counter[int] = field(default_factory=lambda: Counter())
														
 
															     lines_with_trailing_comments: int = 0
														
 
															     classes: int = 0
														
 
															+    internal_comments: int = 0
														
 
															+    string_literals: int = 0
														
 
															+    string_literals_per_line: Counter[int] = field(
														
 
															+        default_factory=lambda: Counter()
														
 
															+    )
														
 
															+    int_literals: int = 0
														
 
															+    int_literals_per_line: Counter[int] = field(
														
 
															+        default_factory=lambda: Counter()
														
 
															+    )
														
 
															+    float_literals: int = 0
														
 
															+    float_literals_per_line: Counter[int] = field(
														
 
															+        default_factory=lambda: Counter()
														
 
															+    )
														
 
															+    symbols: int = 0
														
 
															+    symbols_per_line: Counter[int] = field(default_factory=lambda: Counter())
														
 
															+    keywords: int = 0
														
 
															+    keywords_per_line: Counter[int] = field(default_factory=lambda: Counter())
														
 
															     identifiers: int = 0
														
 
															     identifier_widths: Counter[int] = field(default_factory=lambda: Counter())
														
 
															     ids_per_line: Counter[int] = field(default_factory=lambda: Counter())
														
@@ -56,6 +79,17 @@ class Stats:
 
															         self.comment_line_widths.update(other.comment_line_widths)
														
 
															         self.lines_with_trailing_comments += other.lines_with_trailing_comments
														
 
															         self.classes += other.classes
														
 
															+        self.internal_comments += other.internal_comments
														
 
															+        self.string_literals += other.string_literals
														
 
															+        self.string_literals_per_line.update(other.string_literals_per_line)
														
 
															+        self.int_literals += other.int_literals
														
 
															+        self.int_literals_per_line.update(other.int_literals_per_line)
														
 
															+        self.float_literals += other.float_literals
														
 
															+        self.float_literals_per_line.update(other.float_literals_per_line)
														
 
															+        self.symbols += other.symbols
														
 
															+        self.symbols_per_line.update(other.symbols_per_line)
														
 
															+        self.keywords += other.keywords
														
 
															+        self.keywords_per_line.update(other.keywords_per_line)
														
 
															         self.identifiers += other.identifiers
														
 
															         self.identifier_widths.update(other.identifier_widths)
														
 
															         self.ids_per_line.update(other.ids_per_line)
														
@@ -82,6 +116,11 @@ def scan_file(file: Path) -> Stats:
 
															             else:
														
 
															                 stats.comment_line_widths[len(line)] += 1
														
 
															             continue
														
 
															+        line_string_literals = 0
														
 
															+        line_int_literals = 0
														
 
															+        line_float_literals = 0
														
 
															+        line_symbols = 0
														
 
															+        line_keywords = 0
														
 
															         line_identifiers = 0
														
 
															         for m in re.finditer(LINE_RE, line):
														
 
															             if m.group("trailing_comment"):
														
@@ -89,10 +128,23 @@ def scan_file(file: Path) -> Stats:
 
															                 break
														
 
															             if m.group("class_intro"):
														
 
															                 stats.classes += 1
														
 
															+                line_keywords += 1
														
 
															                 line_identifiers += 1
														
 
															                 stats.identifier_widths[len(m.group("class_name"))] += 1
														
 
															             elif m.group("end_open_curly"):
														
 
															-                pass
														
 
															+                line_symbols += 1
														
 
															+            elif m.group("internal_comment"):
														
 
															+                stats.internal_comments += 1
														
 
															+            elif m.group("string_literal"):
														
 
															+                line_string_literals += 1
														
 
															+            elif m.group("int_literal"):
														
 
															+                line_int_literals += 1
														
 
															+            elif m.group("float_literal"):
														
 
															+                line_float_literals += 1
														
 
															+            elif m.group("symbol"):
														
 
															+                line_symbols += 1
														
 
															+            elif m.group("keyword"):
														
 
															+                line_keywords += 1
														
 
															             else:
														
 
															                 assert m.group("id"), "Line is '%s', and match is '%s'" % (
														
 
															                     line,
														
@@ -100,6 +152,16 @@ def scan_file(file: Path) -> Stats:
 
															                 )
														
 
															                 line_identifiers += 1
														
 
															                 stats.identifier_widths[len(m.group("id"))] += 1
														
 
															+        stats.string_literals += line_string_literals
														
 
															+        stats.string_literals_per_line[line_string_literals] += 1
														
 
															+        stats.int_literals += line_int_literals
														
 
															+        stats.int_literals_per_line[line_int_literals] += 1
														
 
															+        stats.float_literals += line_float_literals
														
 
															+        stats.float_literals_per_line[line_float_literals] += 1
														
 
															+        stats.symbols += line_symbols
														
 
															+        stats.symbols_per_line[line_symbols] += 1
														
 
															+        stats.keywords += line_keywords
														
 
															+        stats.keywords_per_line[line_keywords] += 1
														
 
															         stats.identifiers += line_identifiers
														
 
															         stats.ids_per_line[line_identifiers] += 1
														
 
															     return stats
														
@@ -136,19 +198,57 @@ Comment lines: %(comment_lines)d
 
															 Empty comment lines: %(empty_comment_lines)d
														
 
															 Lines with trailing comments: %(lines_with_trailing_comments)d
														
 
															 Classes: %(classes)d
														
 
															+Internal comments: %(internal_comments)d
														
 
															+String literals: %(string_literals)d
														
 
															+Int literals: %(int_literals)d
														
 
															+Float literals: %(float_literals)d
														
 
															+Symbols: %(symbols)d
														
 
															+Keywords: %(keywords)d
														
 
															 IDs: %(identifiers)d"""
														
 
															         % asdict(stats)
														
 
															     )
														
 
															+    tokens = (
														
 
															+        stats.string_literals
														
 
															+        + stats.int_literals
														
 
															+        + stats.float_literals
														
 
															+        + stats.symbols
														
 
															+        + stats.keywords
														
 
															+        + stats.identifiers
														
 
															+    )
														
 
															+    print(
														
 
															+        f"""
														
 
															+Fraction of blank lines: {stats.blank_lines / stats.lines}
														
 
															+Fraction of comment lines: {stats.comment_lines / stats.lines}
														
 
															+
														
 
															+Total counted tokens: {tokens}
														
 
															+Fraction string literals: {stats.string_literals / tokens}
														
 
															+Fraction int literals: {stats.int_literals / tokens}
														
 
															+Fraction float literals: {stats.float_literals / tokens}
														
 
															+Fraction symbols: {stats.symbols / tokens}
														
 
															+Fraction keywords: {stats.keywords / tokens}
														
 
															+Fraction IDs: {stats.identifiers / tokens}
														
 
															+    """
														
 
															+    )
														
 
															+
														
 
															     def print_histogram(
														
 
															         title: str, data: Dict[int, int], column_format: str
														
 
															     ) -> None:
														
 
															         print()
														
 
															-        print(title)
														
 
															         key_min = min(data.keys())
														
 
															         key_max = max(data.keys()) + 1
														
 
															         values = [data.get(k, 0) for k in range(key_min, key_max)]
														
 
															         keys = [column_format % k for k in range(key_min, key_max)]
														
 
															+        total = sum(values)
														
 
															+        median = key_min
														
 
															+        count = total
														
 
															+        for k in range(key_min, key_max):
														
 
															+            count -= data.get(k, 0)
														
 
															+            if count <= total / 2:
														
 
															+                median = k
														
 
															+                break
														
 
															+
														
 
															+        print(title + f" (median: {median})")
														
 
															         fig = tpl.figure()
														
 
															         fig.barh(values, keys)
														
 
															         fig.show()
														
@@ -156,6 +256,29 @@ IDs: %(identifiers)d"""
 
															     print_histogram(
														
 
															         "## Comment line widths ##", stats.comment_line_widths, "%d columns"
														
 
															     )
														
 
															+
														
 
															+    print_histogram(
														
 
															+        "## String literals per line ##",
														
 
															+        stats.string_literals_per_line,
														
 
															+        "%d literals",
														
 
															+    )
														
 
															+    print_histogram(
														
 
															+        "## Int literals per line ##",
														
 
															+        stats.int_literals_per_line,
														
 
															+        "%d literals",
														
 
															+    )
														
 
															+    print_histogram(
														
 
															+        "## Float literals per line ##",
														
 
															+        stats.float_literals_per_line,
														
 
															+        "%d literals",
														
 
															+    )
														
 
															+    print_histogram(
														
 
															+        "## Symbols per line ##", stats.symbols_per_line, "%d symbols"
														
 
															+    )
														
 
															+    print_histogram(
														
 
															+        "## Keywords per line ##", stats.keywords_per_line, "%d keywords"
														
 
															+    )
														
 
															+
														
 
															     print_histogram("## ID widths ##", stats.identifier_widths, "%d characters")
														
 
															     print_histogram("## IDs per line ##", stats.ids_per_line, "%d ids")