package com.freedville.nlprules;

import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.io.IOUtils;

public class ProgrammingLanguageRulesBasedAnnotator {

	public void readFile(String filename) throws Exception {
		List<String> sentences = getSentences(filename);
		for(String sentence : sentences) {
			//Start of rule.  In any sentence with the word language...
			if(sentence.toLowerCase().contains("language")) {
				List<String> languages = new ArrayList<>();
				String[] words = getWords(sentence);
				//Any capitalized word (except the first) is a language
				for(int i=1; i<words.length; i++) {
					if(words[i].length() > 0 && 
						Character.isUpperCase(words[i].charAt(0))) {
						languages.add(words[i]);
					}
				}
				//Print extracted results with context.
				if(!languages.isEmpty()) {
					System.out.println(sentence.trim() + "\n\t" + languages + "\n");
				}
			}
		}
	}
	
	private List<String> getSentences(String filename) throws Exception {
		List<String> sentences = new ArrayList<String>();
		List<String> lines = IOUtils.readLines(new FileInputStream(new File(filename)));
		for(String line : lines) {
			//Note: Overly simplistic sentence detection.
			sentences.addAll(Arrays.asList(line.split("\\.")));
		}
		return sentences;	
	}
	
	private String[] getWords(String sentence) {
		//Note: overly simplistic tokenization.
		return sentence.trim().split("[ ,:()]");
	}
	
	public static void main(String[] args) throws Exception {
		if(args.length == 0) {
			throw new IllegalArgumentException("Please pass a file name!");
		}

		new ProgrammingLanguageRulesBasedAnnotator().readFile(args[0]);
	}

}