################################################################# # Simulates the UNIX commands: sort | uniq -c | sort -r # # Discard all but one of identical lines from input # # and count the number of occurrences # # # # This script should be included (as a procedure) in a script, # # that finds the strings of concern. This is done with # # # # include uniq_proc.psc # # # # All the strings you want to uniq are parsed through # # the procedure # # # # uniq (call uniq 'string$') # # # # which take only one argument. (Special characters can cause # # the procedure to crash, although some are replaced with dots) # # # # The uniq lines and the number of occurrences are printed # # with the procedure call # # print_uniq (call print_uniq) # # # # which takes no arguments # # # # E.g.: # # include uniq_proc.psc # # numbOfInt = Get number of intervals # # for y to numbOfInt # # lab$ = Get label of interval... y 1 # # call uniq 'lab$' # # endfor # # call print_uniq # # # # Most of the vars are declared as local, but the following # # vars are global: # # numbOfDifTypes # # numbOfThisType_'x' (x = name of string, array) # # type_'y'$ (y = 1..n, array) # # total_input # # returnWhitoutWhitespace$ # # # # 26.05.2006 John Tøndering # # Troubles, errors, questions: Find my e-mail address at # # http://www.cphling.dk/pers/johtnd # ################################################################# # initialization numbOfDifTypes = 0 numbOfThisType = 0 total_input = 0 ## This procedure finds every uniq input and the number of ## occurrences procedure uniq .input$ # First: delete some characters (otherwise the script will crash) call remove_whitespace '.input$' .input$ = returnWhitoutWhitespace$ total_input += 1 .match = 0 .numbOfDifTypesPresent = numbOfDifTypes for .tester to .numbOfDifTypesPresent .test_equals$ = type_'.tester'$ if .input$ = .test_equals$ and .match = 0 .match = 1 numbOfThisType_'.input$' += 1 endif endfor if .match = 0 numbOfDifTypes += 1 type_'numbOfDifTypes'$ = .input$ numbOfThisType_'.input$' = 1 endif endproc ## Printing, but first I will sort by number of occurrrences procedure print_uniq call sort_uniq printline ----------------------------------------------- for .q to numbOfDifTypes .type$ = type_'.q'$ .count = numbOfThisType_'.type$' printline 'tab$''.count''tab$''.type$' endfor printline ----------------------------------------------- printline 'tab$''total_input''tab$'('numbOfDifTypes' different strings) endproc procedure sort_uniq .stopklods = 0 .repeat = 1 repeat .old_count = 0 .stopklods += 1 if .stopklods > 10000 pause Problably gone into an infinite loop (Try raise the value of .stopklods) endif .keepOnSorting = 0 for .q to numbOfDifTypes type$ = type_'.q'$ .count = numbOfThisType_'type$' if .count > .old_count and .old_count > 0 .keepOnSorting = 1 .low_count_type$ = type_'.q'$ .low_count = .count .hq = .q - 1 .high_count_type$ = type_'.hq'$ type_'.q'$ = .high_count_type$ type_'.hq'$ = .low_count_type$ endif .old_count = .count endfor if .keepOnSorting = 1 .repeat = 1 else .repeat = 0 endif until .repeat = 0 endproc # Deletes whitespace and ",=+\:?!()/-" (quotes can not be deleted) # These symbols are replaced with "." (a dot) procedure remove_whitespace .wst$ .laeng = length (.wst$) returnWhitoutWhitespace$ = "" for .q to .laeng .let$ = left$(.wst$, 1) if .let$ = "," or .let$ = "=" or .let$ = "+" or .let$ = "\" or .let$ = ":" or .let$ = "?" ...or .let$ = "!" or .let$ = "(" or .let$ = ")" or .let$ = "/" or .let$ = "-" returnWhitoutWhitespace$ = returnWhitoutWhitespace$ + "." elsif .let$ = "æ" or .let$ = "Æ" returnWhitoutWhitespace$ = returnWhitoutWhitespace$ + "ae" elsif .let$ = "ø" or .let$ = "Ø" returnWhitoutWhitespace$ = returnWhitoutWhitespace$ + "oe" elsif .let$ = "å" or .let$ = "Å" returnWhitoutWhitespace$ = returnWhitoutWhitespace$ + "aa" elsif .let$ = " " returnWhitoutWhitespace$ = returnWhitoutWhitespace$ + "." else returnWhitoutWhitespace$ = returnWhitoutWhitespace$ + .let$ endif .rest = .laeng - .q .wst$ = right$ (.wst$, .rest) endfor endproc