A script for analyzing text files/source code to find the most common symbols, bigrams and trigrams. It can be used as a tool when designing a keyboard layout to know which symbols and combination of symbols to have on easy access layers and combos.
Rules:
- Single: Symbols, digits and whitespace, no letters
- Tuple: Symbols, no letters, digits or whitespace
- Triple: Symbols and whitespace as 2nd char, no letters or digits
Flags:
- File types
ext=kt,xml,java (default: kt)
- Exclude folders
ignore=build,tmp (default: build)
- Show top N items
top=10 (default: 25)
To be used with kscript
.
# Run for Kotlin and XML files, ignore build folders
./Bigrams.kt ext=kt,xml ignore=build ./path/to/folder
Analyzed 199 files
Extension 'kt'
+------+------------+------------+-------------+
|Top 25| Single | Tuple | Triple |
+------+------------+------------+-------------+
| 1 | 25037 | () 2098 | ) { 1081 |
| 2 | . 15799 | )) 604 | ()) 340 |
| 3 | ( 6556 | -> 590 | = " 243 |
| 4 | ) 6556 | ). 552 | >() 215 |
| 5 | = 3304 | ), 449 | (), 208 |
| 6 | , 2563 | ", 272 | `() 184 |
| 7 | _ 2307 | >( 262 | ) = 184 |
| 8 | { 2271 | ") 217 | > = 181 |
| 9 | } 2271 | /* 213 | > { 181 |
| 10 | : 1809 | */ 213 | , " 135 |
| 11 | > 1308 | (" 213 | ) } 115 |
| 12 | " 1306 | ?. 191 | /** 109 |
| 13 | * 834 | `( 186 | * © 98 |
| 14 | / 783 | // 159 | ) : 93 |
| 15 | < 713 | >> 146 | = { 92 |
| 16 | - 660 | "" 116 | )). 87 |
| 17 | ? 536 | ** 109 | ? = 80 |
| 18 | ` 380 | == 107 | (). 77 |
| 19 | @ 363 | ): 89 | "", 75 |
| 20 | 0 278 | ?: 74 | "), 63 |
| 21 | 2 268 | ?> 62 | >>( 62 |
| 22 | 1 228 | ?) 61 | = _ 61 |
| 23 | ! 139 | )? 57 | ) - 57 |
| 24 | [ 136 | != 42 | : ( 43 |
| 25 | ] 136 | && 39 | )?. 43 |
+------+------------+------------+-------------+
Extension 'xml'
+------+------------+------------+-------------+
|Top 25| Single | Tuple | Triple |
+------+------------+------------+-------------+
| 1 | 66829 | =" 8336 | <!- 1768 |
| 2 | _ 22234 | </ 6219 | !-- 1768 |
| 3 | " 16820 | "> 6044 | --> 1768 |
| 4 | > 14613 | -- 3536 | .</ 1344 |
| 5 | < 14612 | <! 1856 | ="@ 835 |
| 6 | = 8353 | -> 1769 | "@+ 290 |
| 7 | / 7794 | !- 1768 | " / 169 |
| 8 | - 7788 | .< 1344 | ?</ 135 |
| 9 | . 4146 | "@ 835 | :// 117 |
| 10 | : 2348 | \' 356 | ><! 110 |
| 11 | ! 2067 | @+ 290 | ~ © 97 |
| 12 | , 1516 | /> 223 | "?> 94 |
| 13 | @ 841 | >< 199 | . - 91 |
| 14 | \ 778 | "? 176 | ></ 89 |
| 15 | 2 638 | ?< 135 | ">< 88 |
| 16 | 1 592 | :/ 117 | <![ 88 |
| 17 | ? 511 | // 117 | ]]> 88 |
| 18 | 0 500 | <? 95 | ]>< 88 |
| 19 | ’ 414 | ?> 95 | !</ 83 |
| 20 | % 362 | ![ 88 | ="? 82 |
| 21 | ' 361 | ]] 88 | >]] 75 |
| 22 | + 290 | ]> 88 | "</ 69 |
| 23 | $ 273 | !< 83 | ">" 65 |
| 24 | ; 227 | >] 75 | ">\ 44 |
| 25 | 4 211 | "< 69 | , % 38 |
+------+------------+------------+-------------+
Extension 'Combined'
+------+------------+------------+-------------+
|Top 25| Single | Tuple | Triple |
+------+------------+------------+-------------+
| 1 | 91866 | =" 8336 | <!- 1768 |
| 2 | _ 24541 | </ 6219 | !-- 1768 |
| 3 | . 19945 | "> 6044 | --> 1768 |
| 4 | " 18126 | -- 3536 | .</ 1344 |
| 5 | > 15921 | -> 2359 | ) { 1081 |
| 6 | < 15325 | () 2101 | ="@ 835 |
| 7 | = 11657 | <! 1856 | ()) 340 |
| 8 | / 8577 | !- 1768 | "@+ 290 |
| 9 | - 8448 | .< 1344 | = " 246 |
| 10 | ( 6664 | "@ 836 | >() 215 |
| 11 | ) 6664 | )) 604 | (), 208 |
| 12 | : 4157 | ). 592 | `() 184 |
| 13 | , 4079 | ), 449 | ) = 184 |
| 14 | { 2286 | \' 356 | > = 181 |
| 15 | } 2286 | @+ 290 | > { 181 |
| 16 | ! 2206 | // 276 | " / 169 |
| 17 | @ 1204 | ", 272 | , " 136 |
| 18 | ? 1047 | >( 262 | ?</ 135 |
| 19 | 2 906 | /> 223 | :// 126 |
| 20 | * 838 | ") 217 | ) } 115 |
| 21 | 1 820 | /* 213 | ><! 110 |
| 22 | 0 778 | */ 213 | /** 109 |
| 23 | \ 778 | (" 213 | * © 98 |
| 24 | ’ 414 | >< 199 | ~ © 97 |
| 25 | ` 380 | ?. 191 | = { 96 |
+------+------------+------------+-------------+
Analyzes took 1 seconds.