Sentiment Analysis in Java

Sentiment Analysis in Java Homework Sample

The provided code allows for sentiment analysis of film reviews, and has some Junit tests in it. You need to extend the code, and provide additional tests. You can look at based on the words in the review, words like good, great, fun are positive words and words like suck, terrible, bad are negative. You can search through the reviewthe existing tests to find out how the code is used. The code should make a prediction s and see how many times each word is used in conjunction with each review rating. Some words are used almost exclusively for positive reviews and some are used for negative reviews, but others are used more equally and they contribute little to the overall score. For more Java programming assignments contact us for a quote.

Solution:

ReviewAnalysis.java

package a6;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.Scanner;

public class ReviewAnalysis {

/**
* Goes through the process of – reading the file of movie reviews – getting a
* score for each word – finding the best scoring word with a count greater than
* some threshold – scoring a few reviews and comparing the computer-generated
* score with the actual rating.
*
* You should go through the main method to see what the overall structure is of the project.
* Then, fill in code for the methods that get called.
*
* @param args
*/
public static void main(String[] args) {

String[] lines = null; // need to declare outside the try scope for later use.
// Read the file and put each line in a String array.
try {
lines = convertFileToStringArray(“src/a6/MovieReviews.txt”);
} catch (FileNotFoundException e) {
// Report and quit if the file wasn’t found.
System.out.println(“File was not found”);
return;
}

// Create three arrays with our known maximum size of 16444 elements.
// 1. words: All the words in the reviews
// 2. word_score: A word at index x has a total point value at word_score[x]
// 3. word_count: A word at index x has a total number of appearances at
// word_count[x]
String[] words = new String[16444]; // Cheat by knowing # of words in advance.
double[] wordScore = new double[16444];
int[] wordCount = new int[16444];

// We need to track how many words have been added to the arrays.
// As words gets added, this number should go up to match.
int numberOfSpacesUsedInWords = 0; // This variable is super important. If you don’t know the point of it – stop – and figure it out.

// Go through each review. For each review
// 1. Pull off the movie score from the front of the review.
// 2. Go through the text of the review. Process each word by adding to its
// score and count.
for (int i = 0; i < lines.length; i++) {
Scanner s = new Scanner(lines[i]);
// Pull off the movie score – we can assume it is there.
int lineScore = s.nextInt();
// Get each word from the review and add to its score or initialize it. Keep track of spaces used in the arrays.
numberOfSpacesUsedInWords = processWords(s, lineScore, words, wordScore, wordCount, numberOfSpacesUsedInWords);
}

// Now, use these processed movie reviews. We are interested in two things:
// 1. What is the highest-rated word that appears often enough to be meaningful?
// 2. How well does this approach guess at a rating?

// First, find the best word.
// We are not that interested in words that do not appear multiple times as a best word.
int wordCountFilter = 15;

// Search for the best scoring word with a word_count greater than
// word_count_filter.
int bestIndex = indexOfBestWord(wordScore, wordCount, wordCountFilter, numberOfSpacesUsedInWords);
if (bestIndex != -1) {
System.out.println(
“The best scoring word is ” + words[bestIndex] + ” with a count of ” + wordCount[bestIndex]
+ ” and average score of: ” + wordScore[bestIndex] / wordCount[bestIndex]);
} else { // If the filter is too high, no word may match.
System.out.println(“No word found with a word count above ” + wordCountFilter);
}

// Then, test the estimates of the rating against the actual rating.
// Pick a sample of reviews, estimate scores from the words
// and compare the estimates with the actual movie scores.
for (int index = 360; index < 380; index++) {
String testReview = lines[index];
Scanner testScanner = new Scanner(testReview);
// Get the score and then the rest of the line.
int actualScore = testScanner.nextInt();
String reviewText = testScanner.nextLine();
double estimatedScore = scoreReview(reviewText, words, wordScore, wordCount, numberOfSpacesUsedInWords);
String formattedEstimate = String.format(“%.1f”, estimatedScore);
String sentimentPrediction = “Wrong”;
if ((actualScore >= 2.0 && estimatedScore >= 2.0) || (actualScore < 2.0 && estimatedScore < 2.0))
sentimentPrediction = “Correct”;
System.out.println(“estimated score: ” + formattedEstimate + ” actual score: ” + actualScore + ” Sentiment Predication: ” + sentimentPrediction + ” | Review: ”
+ reviewText);
testScanner.close();
}
}

/**
* Given a filename, open the file and read lines from the file (a line is
* defined as what a scanner nextLine() method produces). Convert each line to
* lower-case. (Use the toLowerCase() method). Store the lines in a String[].
*
* Since we need to know how big the String array should be, first go through
* the file and count the number of lines. Then, make a String[] of that size,
* and make a new Scanner from the file and go through the file again, placing
* each line in the array.
*
* Implement this by calling the two methods below this that break this problem
* into two parts.
*
* @param filename
* @return An array of strings with each line of the file an element of the
* array.
* @throws FileNotFoundException
*/
public static String[] convertFileToStringArray(String filename) throws FileNotFoundException {
BufferedReader reader = new BufferedReader(new FileReader (filename));
String line = null;
int size=0;

try {
while((line = reader.readLine()) != null)
size++;
reader.close();

}catch(Exception e){
}

String[] arr=new String[size];
reader = new BufferedReader(new FileReader (filename));
int index=0;
try {
while((line = reader.readLine()) != null) {
arr[index]=line.toLowerCase();
index++;
}

reader.close();

}catch(Exception e){
}

return arr;

}

/**
* Count the number of lines in a file.
* @param filename
* @return the number of lines in the file. A line is defined as
* what a Scanner nextLine() provides.
* @throws FileNotFoundException
*/
public static int countLines(String filename) throws FileNotFoundException {
BufferedReader reader = new BufferedReader(new FileReader (filename));
String line;
int size=0;

try {
while((line = reader.readLine()) != null)
size++;
reader.close();

}catch(Exception e){
}

return size;
}

/**
* Given a filename and a count of the number of lines in the file, fill a String array
* with those lines. Each line should be converted to lower-case. Punctuation remains.
*
* Your use of a Scanner should depend on the “throws” notation
* on the method signature to say that code that calls this method (for us, from
* main) should handle the exception itself.
*
* @param filename
* @param lineCount
* @return a String[] with each line in the file an element in the array.
* @throws FileNotFoundException
*/
public static String[] fillArrayWithLines(String filename, int lineCount) throws FileNotFoundException {
String[] arr=new String[lineCount];
BufferedReader reader = new BufferedReader(new FileReader (filename));
int index=0;
String line;

try {
while((line = reader.readLine()) != null) {
arr[index]=line.toLowerCase();
index++;
}

reader.close();

}catch(Exception e){
}

return arr;
}

/**
* Looks for word in the words array in the first numberOfSpacesUsedInWords elements.
* Do not look at the entire length of the array – many spots are not used
* until the end of the word counting process.
*
* @param words: An array of String values. The first numberOfSpacesUsedInWords are filled.
* @param word: The search word.
* @param numberOfSpacesUsedInWords: the number of elements currently used in the words array.
* @return the index of the search word in words, or -1 if not found.
*/
public static int indexOfWordInArray(String[] words, String word, int numberOfSpacesUsedInWords) {
for(int i=0;i<numberOfSpacesUsedInWords;i++)
if(words[i].equals(word))
return i;
return -1; // Implement this method
}

/**
* For a review sentence, estimate the movie rating based on the words in the
* review. For each word, find its index in the words list, then compute its
* average score (scores[index]/wordCount[index]) and add it to a cumulative
* review score. Count up the number of words in the review and use the count
* and the cumulative review score to get an averaged movie score.
*
* Assume that at least one word in the review is in the array of words.
*
* @param review: The text of the review.
* @param words: The array of words found in all reviews.
* @param scores: The cumulative score for each word in words.
* @param wordCount: The number of times each word in words appears in all the
* reviews.
* @param numberOfSpacesUsedInWords: The number of elements in the arrays to be used.
* @return the average score for the words in review.
*/
public static double scoreReview(String review, String[] words, double[] scores, int[] wordCount, int numberOfSpacesUsedInWords) {
double score=0;
int count=0;
Scanner s=new Scanner(review);
while(s.hasNext()){
String w=s.next();
int index=indexOfWordInArray(words,w,numberOfSpacesUsedInWords);
score+=scores[index]/wordCount[index];
count++;
}
return score/count; // Implement this method
}

/**
* Search through numberOfSpacesUsedInWords elements of the scores array. Following an
* optimization loop pattern, find the index of the highest average scoring
* word. With this index the actual word can be found later. Ignore words whose
* counts are not greater than the countAbove value.
*
* @param scores: An array of cumulative scores for a word.
* @param counts: An array of times the word appeared in the reviews.
* @param countAbove: Words with counts below or equal to countAbove are
* ignored.
* @param numberOfSpacesUsedInWords: Specifies the number of valid elements in the
* arrays.
* @return the index of the best average score or -1 if none satisfy the
* countAbove threshold.
*/
public static int indexOfBestWord(double[] scores, int[] counts, int countAbove, int numberOfSpacesUsedInWords) {
int index_best=0;
double score_best=-999999;
for(int i=0;i<numberOfSpacesUsedInWords;i++){
if(counts[i]<=countAbove)
continue;
if(scores[i]>score_best)
{
score_best=scores[i];
index_best=i;
}

}
if(score_best==-999999)
return -1;
return index_best;
}

/**
* Process the words in the scanner s. If a token in s is already in words, then
* add the lineScore to the word_score location for that word and add 1 to the
* word_count for that location. If the token is not is words, then add the
* token to the next available spot in words and add the lineScore to word_score
* at that location and put a count of 1 in word_count at that location. Adjust
* numberOfSpacesUsedInWords by adding 1 when a new spot is used up. Do not change
* this if the word is already in the array.
*
* @param s: A Scanner with the text part of a movie review
* @param lineScore: the integer movie rating taken from the review
* @param words: an array to hold the words from the reviews
* @param wordScore: an array holding the cumulative (summed) score for that word.
* @param wordCount: an array holding the number of times a word has been seen
* in reviews
* @param numberOfSpacesUsedInWords: the number of elements used in the arrays
* @return the new numberOfSpacesUsedInWords. If no new words are found in s, then it
* is the same value as the input numberOfWordSoFar.
*/
public static int processWords(Scanner s, int lineScore, String[] words, double[] wordScore, int[] wordCount, int numberOfSpacesUsedInWords) {

String current=””;
while(s.hasNext()){
current=s.next();
int index=indexOfWordInArray(words,current,numberOfSpacesUsedInWords);
if(index==-1){
words[numberOfSpacesUsedInWords]=current;
wordScore[numberOfSpacesUsedInWords]=lineScore;
wordCount[numberOfSpacesUsedInWords]=1;
numberOfSpacesUsedInWords++;
}else{
wordCount[index]++;
wordScore[index]+=lineScore;
}
}
return numberOfSpacesUsedInWords; // Implement this method
}

}

ReviewAnalysisTest.java

package a6;

import static org.junit.Assert.*;

import java.io.FileNotFoundException;
import java.util.Scanner;

import org.junit.*;

public class ReviewAnalysisTest {

@Test
public void testScoreReview() {
String review = “this is good”;
String[] words = {“this”, “is”, “good”};
double[] scores = {1.0, 1.5, 3.5};
int[] wordCount = {1, 1, 1};
int numberOfWordsSoFar = 3;
double score = ReviewAnalysis.scoreReview(review, words, scores, wordCount, numberOfWordsSoFar);
assertEquals(“failed normal test”, 2.0, score, 1e-9);
}

@Test
public void testIndexOfBestWord() {
double[] scores = {4.0, 1.5, 3.5};
int[] wordCount = {1, 5, 3};
int numberOfWordsSoFar = 3;
int index = ReviewAnalysis.indexOfBestWord(scores, wordCount, 0, numberOfWordsSoFar);
assertEquals(“failed normal test”, 0, index);
index = ReviewAnalysis.indexOfBestWord(scores, wordCount, 3, numberOfWordsSoFar);
assertEquals(“failed normal test with a cutoff count”, 1, index);
}

@Test
public void testIndexOfBestWordNoWordsAboveCount() {
double[] scores = {4.0, 1.5, 3.5};
int[] wordCount = {1, 5, 3};
int numberOfWordsSoFar = 3;
int index = ReviewAnalysis.indexOfBestWord(scores, wordCount, 10, numberOfWordsSoFar);
assertEquals(“failed no words with high enough count test”, -1, index);
}

@Test
public void testIndexOfWordInArray() {
String[] words = {“this”, “is”, “good”};
int numberOfWordsSoFar = 3;
int index = ReviewAnalysis.indexOfWordInArray(words, “this”, numberOfWordsSoFar);
assertEquals(“failed normal test”, 0, index);
index = ReviewAnalysis.indexOfWordInArray(words, “good”, numberOfWordsSoFar);
assertEquals(“failed normal test”, 2, index);
index = ReviewAnalysis.indexOfWordInArray(words, “boy”, numberOfWordsSoFar);
assertEquals(“failed normal test word not in array”, -1, index);
}

@Test
public void testProcessWords() {
String[] words = {“this”, “is”, “good”, “”};
double[] scores = {1.0, 2.0, 3.0, 0.0};
int[] wordCount = {1, 1, 1, 0};
int numberOfWordsSoFar = 3;
String more = “this a”;
Scanner s = new Scanner(more);
int reviewScore = 3;
numberOfWordsSoFar = ReviewAnalysis.processWords(s, reviewScore, words, scores, wordCount, numberOfWordsSoFar);
assertEquals(“failed to add word to numberOfWordsSoFar”, 4, numberOfWordsSoFar);
assertEquals(“failed to update scores”, 4.0, scores[0], 1e-9);
assertEquals(“failed to update wordCount”, 2, wordCount[0]);
assertEquals(“failed to update wordCount”, 1, wordCount[3]);
}

@Test
public void testConvertFileToStringArray() {
try {
String[] lines = ReviewAnalysis.convertFileToStringArray(“src/a6/testFile.txt”);
assertEquals(“failed to add lines to array”, 2, lines.length);
assertEquals(“failed to add make all text lower case”, “this is a test”, lines[0]);
} catch (FileNotFoundException e) {
fail(“File reported as not found but should have been found.”);
}
}

}