Software Engineering for Smart Data Analytics & Smart Data Analytics for Software Engineering
TFQuarantine
import java.io.*; import java.util.*; import java.util.Map.Entry; @SuppressWarnings({ "rawtypes", "unchecked" }) public class TF_24 { // // Interfaces to simulate first class functions and IO-operations // as objects of anonymous classes // static interface IO<R> { R execute() throws IOException; } static interface Function<P, R> { R apply(P input); } static class Constant<V> implements Function<Void, V> { V value; public Constant(V value) { this.value = value; } @Override public V apply(Void ignore) { return value; } } // // The Quarantine class for this example // static class TFQuarantine { List<Function> funcs = new ArrayList<Function>(); public <V> TFQuarantine(V value) { funcs.add(new Constant<V>(value)); } public TFQuarantine bind(Function func) { funcs.add(func); return this; } private Object guardedExecution(Object value) { if (value instanceof IO) { try { return ((IO) value).execute(); } catch (IOException e) { throw new RuntimeException("Could not complete execution because: " + e.getLocalizedMessage()); } } else { return value; } } public void execute() { Object value = null; for (Function func : funcs) { value = func.apply(guardedExecution(value)); } guardedExecution(value); } } // // Primitive File Access // // Extracted because we read the text file as well as the stop words file // but can not chain these reads in a sequence as we need to pass both // contents on. // public static String readFile(String pathToFile) throws IOException { File file = new File(pathToFile); FileInputStream stream = null; stream = new FileInputStream(file); byte[] rawData = new byte[(int) file.length()]; stream.read(rawData); stream.close(); return new String(rawData); } // // The functions // /** * Takes a path to a file and returns the entire contents of the file as a * string */ static Function readText = new Function<String, IO>() { public IO apply(final String pathToFile) { return new IO() { public String execute() throws IOException { return readFile(pathToFile); } }; } }; /** * Takes a string and returns a copy with all nonalphanumeric chars replaced * by white space */ static Function filterCharsAndNormalize = new Function<String, String>() { public String apply(String strData) { return strData.replaceAll("[\\W_]+", " ").toLowerCase(); } }; /** * Takes a string and scans for words, returning a list of words. */ static Function scan = new Function<String, List<String>>() { public List<String> apply(String strData) { return Arrays.asList(strData.split(" ")); } }; /** * Takes a list of words and returns a copy with all stop words removed */ static Function removeStopWords = new Function<List<String>, IO>() { public IO apply(final List<String> wordList) { return new IO<List<String>>() { public List<String> execute() throws IOException { List<String> stopWords = new ArrayList<String>(); stopWords.addAll(Arrays.asList(readFile("stop_words.txt").split(","))); // add single-letter words for (char c = 'a'; c <= 'z'; c++) { stopWords.add(Character.toString(c)); } ArrayList<String> result = new ArrayList<String>(wordList); result.removeAll(stopWords); return result; } }; } }; /** * Takes a list of words and returns a dictionary associating words with * frequencies of occurrence */ static Function frequencies = new Function<List<String>, Map<String, Integer>>() { public Map<String, Integer> apply(List<String> wordList) { Map<String, Integer> wordFreqs = new HashMap<String, Integer>(); for (String w : wordList) { Integer count = wordFreqs.get(w); wordFreqs.put(w, (count == null) ? 1 : count + 1); } return wordFreqs; } }; /** * Takes a dictionary of words and their frequencies and returns a list of * pairs where the entries are sorted by frequency */ static Function sort = new Function<Map<String, Integer>, List<Entry<String, Integer>>>() { public List<Entry<String, Integer>> apply(Map<String, Integer> wordFreq) { List<Entry<String, Integer>> result = new ArrayList<Entry<String, Integer>>(); result.addAll(wordFreq.entrySet()); Comparator<Entry<String, Integer>> comparator = new Comparator<Entry<String, Integer>>() { public int compare(Entry<String, Integer> left, Entry<String, Integer> right) { return -left.getValue().compareTo(right.getValue()); } }; Collections.sort(result, comparator); return result; } }; /** * Takes a list and returns a linked list containing the first num entries * of the original list. */ static Function first25 = new Function<List<Entry<String, Integer>>, LinkedList<Entry<String, Integer>>>() { public LinkedList<Entry<String, Integer>> apply(List<Entry<String, Integer>> list) { LinkedList<Entry<String, Integer>> result = new LinkedList<Entry<String, Integer>>(); Iterator<Entry<String, Integer>> it = list.iterator(); for (int count = 0; it.hasNext() && (count < 25); count++) { result.add(it.next()); } return result; } }; /** * Takes a list of pairs where the entries are sorted by frequency and print * them. */ static Function printAll = new Function<LinkedList<Entry<String, Integer>>, IO>() { public IO<Void> apply(final LinkedList<Entry<String, Integer>> wordFreqs) { return new IO<Void>() { public Void execute() { for (Entry<String, Integer> wordFreq : wordFreqs) { System.out.println("" + wordFreq.getKey() + " - " + wordFreq.getValue()); } return null; } }; } }; // // The main function // public static void main(String[] args) throws IOException { new TFQuarantine(args[0]) // .bind(readText) // .bind(filterCharsAndNormalize) // .bind(scan) // .bind(removeStopWords) // .bind(frequencies) // .bind(sort) // .bind(first25) // .bind(printAll) // .execute(); // } }