SDA SE Wiki

Software Engineering for Smart Data Analytics & Smart Data Analytics for Software Engineering

User Tools

Site Tools


Quarantine Style

TFQuarantine

import java.io.*;
import java.util.*;
import java.util.Map.Entry;

@SuppressWarnings({ "rawtypes", "unchecked" })
public class TF_24 {

	//
	// Interfaces to simulate first class functions and IO-operations
	// as objects of anonymous classes
	//

	static interface IO<R> {
		R execute() throws IOException;
	}

	static interface Function<P, R> {
		R apply(P input);
	}

	static class Constant<V> implements Function<Void, V> {

		V value;

		public Constant(V value) {
			this.value = value;
		}

		@Override
		public V apply(Void ignore) {
			return value;
		}

	}

	//
	// The Quarantine class for this example
	//
	static class TFQuarantine {

		List<Function> funcs = new ArrayList<Function>();

		public <V> TFQuarantine(V value) {
			funcs.add(new Constant<V>(value));
		}

		public TFQuarantine bind(Function func) {
			funcs.add(func);
			return this;
		}

		private Object guardedExecution(Object value) {
			if (value instanceof IO) {
				try {
					return ((IO) value).execute();
				} catch (IOException e) {
					throw new RuntimeException("Could not complete execution because: " + e.getLocalizedMessage());
				}
			} else {
				return value;
			}
		}

		public void execute() {
			Object value = null;
			for (Function func : funcs) {
				value = func.apply(guardedExecution(value));
			}
			guardedExecution(value);
		}

	}

	//
	// Primitive File Access
	//
	// Extracted because we read the text file as well as the stop words file
	// but can not chain these reads in a sequence as we need to pass both
	// contents on.
	//
	public static String readFile(String pathToFile) throws IOException {
		File file = new File(pathToFile);
		FileInputStream stream = null;
		stream = new FileInputStream(file);
		byte[] rawData = new byte[(int) file.length()];
		stream.read(rawData);
		stream.close();
		return new String(rawData);
	}

	//
	// The functions
	//

	/**
	 * Takes a path to a file and returns the entire contents of the file as a
	 * string
	 */
	static Function readText = new Function<String, IO>() {

		public IO apply(final String pathToFile) {

			return new IO() {
				public String execute() throws IOException {
					return readFile(pathToFile);
				}
			};
		}
	};

	/**
	 * Takes a string and returns a copy with all nonalphanumeric chars replaced
	 * by white space
	 */
	static Function filterCharsAndNormalize = new Function<String, String>() {

		public String apply(String strData) {
			return strData.replaceAll("[\\W_]+", " ").toLowerCase();
		}
	};

	/**
	 * Takes a string and scans for words, returning a list of words.
	 */
	static Function scan = new Function<String, List<String>>() {

		public List<String> apply(String strData) {
			return Arrays.asList(strData.split(" "));
		}
	};

	/**
	 * Takes a list of words and returns a copy with all stop words removed
	 */
	static Function removeStopWords = new Function<List<String>, IO>() {

		public IO apply(final List<String> wordList) {

			return new IO<List<String>>() {
				public List<String> execute() throws IOException {
					List<String> stopWords = new ArrayList<String>();
					stopWords.addAll(Arrays.asList(readFile("stop_words.txt").split(",")));
					// add single-letter words
					for (char c = 'a'; c <= 'z'; c++) {
						stopWords.add(Character.toString(c));
					}
					ArrayList<String> result = new ArrayList<String>(wordList);
					result.removeAll(stopWords);
					return result;
				}
			};
		}
	};

	/**
	 * Takes a list of words and returns a dictionary associating words with
	 * frequencies of occurrence
	 */
	static Function frequencies = new Function<List<String>, Map<String, Integer>>() {

		public Map<String, Integer> apply(List<String> wordList) {
			Map<String, Integer> wordFreqs = new HashMap<String, Integer>();
			for (String w : wordList) {
				Integer count = wordFreqs.get(w);
				wordFreqs.put(w, (count == null) ? 1 : count + 1);
			}
			return wordFreqs;
		}
	};

	/**
	 * Takes a dictionary of words and their frequencies and returns a list of
	 * pairs where the entries are sorted by frequency
	 */
	static Function sort = new Function<Map<String, Integer>, List<Entry<String, Integer>>>() {

		public List<Entry<String, Integer>> apply(Map<String, Integer> wordFreq) {
			List<Entry<String, Integer>> result = new ArrayList<Entry<String, Integer>>();
			result.addAll(wordFreq.entrySet());
			Comparator<Entry<String, Integer>> comparator = new Comparator<Entry<String, Integer>>() {
				public int compare(Entry<String, Integer> left, Entry<String, Integer> right) {
					return -left.getValue().compareTo(right.getValue());
				}
			};
			Collections.sort(result, comparator);
			return result;
		}
	};

	/**
	 * Takes a list and returns a linked list containing the first num entries
	 * of the original list.
	 */
	static Function first25 = new Function<List<Entry<String, Integer>>, LinkedList<Entry<String, Integer>>>() {

		public LinkedList<Entry<String, Integer>> apply(List<Entry<String, Integer>> list) {
			LinkedList<Entry<String, Integer>> result = new LinkedList<Entry<String, Integer>>();
			Iterator<Entry<String, Integer>> it = list.iterator();
			for (int count = 0; it.hasNext() && (count < 25); count++) {
				result.add(it.next());
			}
			return result;
		}

	};

	/**
	 * Takes a list of pairs where the entries are sorted by frequency and print
	 * them.
	 */
	static Function printAll = new Function<LinkedList<Entry<String, Integer>>, IO>() {

		public IO<Void> apply(final LinkedList<Entry<String, Integer>> wordFreqs) {

			return new IO<Void>() {
				public Void execute() {
					for (Entry<String, Integer> wordFreq : wordFreqs) {
						System.out.println("" + wordFreq.getKey() + " - " + wordFreq.getValue());
					}
					return null;
				}
			};
		}
	};

	//
	// The main function
	//
	public static void main(String[] args) throws IOException {
		new TFQuarantine(args[0]) //
				.bind(readText) //
				.bind(filterCharsAndNormalize) //
				.bind(scan) //
				.bind(removeStopWords) //
				.bind(frequencies) //
				.bind(sort) //
				.bind(first25) //
				.bind(printAll) //
				.execute(); //
	}

}
teaching/seminars/style/2014/quarantine.txt · Last modified: 2018/05/24 15:13 by daniel

SEWiki, © 2024