Häufigkeitsanalyse von Wörtern in einem Text

Häufigkeitsanalyse von Wörtern in einem Text (Kleinprojekte)

Erstellen Sie ein Programm, dass eine Textdatei einliesst und die hundert häufigsten Wörter ausgibt.

0 Kommentare

Bitte melde dich an um einen Kommentar abzugeben

4 Lösung(en)

python
javascript
csharp
cpp

from string import punctuation

def sort_items(x, y):
    """Sort by value first, and by key (reverted) second."""
    return cmp(x[1], y[1]) or cmp(y[0], x[0])

N = 100
words = {}
filename = raw_input("File to analyze: ")
words_gen = (word.strip(punctuation).lower() for line in open(filename)
                                             for word in line.split())

for word in words_gen:
    words[word] = words.get(word, 0) + 1

top_words = sorted(words.iteritems(), cmp=sort_items, reverse=True)[:N]

for word, frequency in top_words:
    print "%-10s:%5d" % (word, frequency)

/*-----------------------------------------------------*\
| um uns allen das gehassel mit dem file-einlesen hier  |
| hier zu ersparen, wird der text über html geladen und |
| liegt als string $txt vor.                            |
\*-----------------------------------------------------*/

function wordFrequency(txt, nMostFrequent) {
  nMostFrequent = nMostFrequent || 100;
  // text vorbereiten und in array umformen
  txt = txt.toUpperCase();
  txt = txt.replace(/ß/g, 'ss');
  txt = txt.replace(/[^A-Z ÄÖÜ]/g, '');
  txt = txt.split(' ').sort();
  // häufigkeiten ermitteln
  let i = 0;
  while (i < txt.length) {
    txt[i] = [txt[i], 1];
    while (txt[i][0] == txt[i+1]) {
      txt[i][1]++;
      txt.splice(i+1, 1);
    }
    i++;
  }
  // nach häufigkeit sortieren
  txt = txt.sort(function(a, b) { return a[1] < b[1]});

  return txt.slice(0, nMostFrequent);
}

 // ausgabe
console.table( wordFrequency(txt) );

Lösung von: Lisa Salander (Heidi-Klum-Gymnasium Bottrop)

// NET Core 3.x

using System;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;

namespace CS_Aufgabe_Haeufigkeit_Woerter
{
    class Program
    {
        static void Main(string[] args)
        {
            string path = @"C:\...\test.txt";

            if (!File.Exists(path)) return;

            new Regex(@"^[\s]*$").Replace(File.ReadAllText(path), "")
                .ToLower()
                .Split()
                .Where(x => x.Length > 0)
                .GroupBy(s => s)
                .ToDictionary(g => g.Key, g => g.Count())
                .OrderBy(x => -x.Value)
                .Take(100)
                .ToList()
                .ForEach(x => Console.WriteLine(x));
        }
    }
}

Lösung von: Jens Kelm (@JKooP)

// C++ 14 | VS-2022

#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <vector>
#include <algorithm>
#include <map>

constexpr size_t MOST_FRQ{ 100 };

std::string get_file(const std::string& path) {
    std::ifstream input(path);
    if (!input) {
        std::cerr << "Pfad nicht gefunden: " << path << "\n";
        return "";
    }
    std::stringstream buffer;
    buffer << input.rdbuf();
    return buffer.str();
}

std::vector<std::string> get_split_string(const std::string& str) {
    std::vector<std::string> out{};
    std::string wrd{};
    for (const auto& c : str) {
        if (!std::isalnum(c)) {
            if(wrd.length() > 1)
                out.push_back(wrd);
            wrd.clear();
        }
        else wrd.push_back(c);
    }
    out.push_back(wrd);
    return out;
}

template<typename T>
std::map<T, int> get_map(const std::vector<T>& v) {
    std::map<T, int> m;
    for (const auto& i : v)
        if (m.count(i)) m[i]++;
        else m.emplace(i, 1);
    return m;
}

template<typename T, typename V>
std::pair<V, T> get_swapped_pair(const std::pair<T, V>& p) {
    return std::pair<V, T>(p.second, p.first);
}

template<typename T, typename V>
std::multimap<V, T> get_ordered_map(const std::map<T, V>& src) {
    std::multimap<V, T> dst;
    std::transform(src.begin(), src.end(), std::inserter(dst, dst.begin()), get_swapped_pair<T, V>);
    return dst;
}

int main() {
    const auto path{ "C:\\...\\pseudo.txt" };
    auto file{ get_file(path) };
    std::transform(file.begin(), file.end(), file.begin(), ::tolower);
    const auto split{ get_split_string(file) };
    const auto group{ get_map(split) };
    const auto order{ get_ordered_map(group) };
    auto i{ 0 };
    for (auto it{ order.rbegin() }; it != order.rend() && i++ < MOST_FRQ; ++it)
        std::cout << it->second << " -> " << it->first << "\n";
}

Lösung von: Jens Kelm (@JKooP)

Zeit:	2-4
Schwierigkeit:	k.A.
Webcode:	8jq4-ci6b
Autor:	Martin Guggisberg (Universität Basel / PH FHNW)

Programmieren lernen

Web-Code:

Häufigkeitsanalyse von Wörtern in einem Text (Kleinprojekte)

0 Kommentare

4 Lösung(en)

Aktionen

Bewertung

Meta