1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
| struct tnode
{
char *word;
int lines[MAXLINES];
int line_count;
struct tnode *left;
struct tnode *right;
};
int line_number = 1;
struct tnode *talloc(void)
{
return (struct tnode *)malloc(sizeof(struct tnode));
}
char *strdup(const char *s)
{
char *p = (char *)malloc(strlen(s) + 1);
if (p != NULL)
strcpy(p, s);
return p;
}
struct tnode *addtree(struct tnode *p, char *w, int lineno)
{
int cond;
if (p == NULL)
{
p = talloc();
p->word = strdup(w);
p->lines[0] = lineno;
p->line_count = 1;
p->left = p->right = NULL;
}
else if ((cond = strcmp(w, p->word)) == 0)
{
if (p->lines[p->line_count - 1] != lineno)
p->lines[p->line_count++] = lineno;
}
else if (cond < 0)
p->left = addtree(p->left, w, lineno);
else
p->right = addtree(p->right, w, lineno);
return p;
}
void treeprint(struct tnode *p)
{
if (p != NULL)
{
treeprint(p->left);
printf("%s: ", p->word);
for (int i = 0; i < p->line_count; i++)
printf("%d ", p->lines[i]);
printf("\n");
treeprint(p->right);
}
}
int getword(char *word, int lim)
{
int c;
char *w = word;
while (isspace(c = getch()))
if (c == '\n')
line_number++;
if (c != EOF)
*w++ = c;
if (!isalpha(c))
{
*w = '\0';
return c;
}
for (; --lim > 0; w++)
if (!isalnum(*w = getch()))
{
ungetch(*w);
break;
}
*w = '\0';
return word[0];
}
int is_noise_word(char *word)
{
static char *noise_words[] = {
"the", "and", "a", "an", "in", "on", "of", "to", "is", "are", "was", "were", "it", "that", "this", "with", "as", "for", "by", "at", "from", "but", "or", "not", "be", "have", "has", "had", "do", "does", "did", "will", "would", "can", "could", "should", "shall", "may", "might", "must", "if", "then", "else", "when", "where", "which", "who", "whom", "whose", "why", "how", NULL};
for (char **p = noise_words; *p != NULL; p++)
if (strcmp(word, *p) == 0)
return 1;
return 0;
}
int main(void)
{
struct tnode *root = NULL;
char word[MAXWORD];
while (getword(word, MAXWORD) != EOF)
if (isalpha(word[0]) && !is_noise_word(word))
root = addtree(root, word, line_number);
treeprint(root);
return 0;
}
|