-
Notifications
You must be signed in to change notification settings - Fork 15
/
tf*idf-product.pl
52 lines (46 loc) · 1.04 KB
/
tf*idf-product.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
use strict;
use Text::CSV_XS;
my $csv = Text::CSV_XS->new({sep_char => "\t"});
#read all the IDF values
my %idf;
open(IN,"output/idf.txt");
while(my $line = <IN>)
{
if($csv->parse($line))
{
my @cols = $csv->fields();
$idf{$cols[0]}=$cols[1];
}
}
close(IN);
opendir(DIR,"output/tf");
my @files = readdir(DIR);
foreach my $f (@files)
{
if($f !~ /^\./)
{
print "Processing file: ".$f."\n";
my %tfidf;
open(IN,"output/tf/$f");
while(my $line = <IN>)
{
if($csv->parse($line))
{
my @cols = $csv->fields();
my $val = $cols[1] * $idf{$cols[0]};
$tfidf{$cols[0]} = $val;
}
else
{
my $error = $csv->error_input;
print "Error: ".$error."\n";
}
}
close(IN);
open(OUT,">output/tfidf/$f");
foreach my $ti (sort keys %tfidf)
{
print OUT $ti."\t".$tfidf{$ti}."\n";
}
}
}