package main
import (
"./fileio"
"fmt"
"os"
"strconv"
"strings"
)
type Morph struct {
surface string
base string
pos string
pos1 string
}
type Chunk struct {
morphs []Morph
dst int
srcs []int
}
func parseArticle(lines []string) [][]Chunk {
article := make([][]Chunk, 0)
morphemes := make([]Morph, 0)
sentence := make([]Chunk, 0)
var chunk Chunk
for _, line := range lines {
if line == "EOS" {
if len(morphemes) > 0 {
chunk.morphs = morphemes
sentence = append(sentence, chunk)
morphemes = make([]Morph, 0)
}
if len(sentence) > 0 {
initSourceIndex(sentence)
article = append(article, sentence)
sentence = make([]Chunk, 0)
}
continue
}
if line[0] == '*' {
if len(morphemes) > 0 {
chunk.morphs = morphemes
sentence = append(sentence, chunk)
morphemes = make([]Morph, 0)
}
//* 文節番号 係り先の文節番号(係り先なし:-1) 主辞の形態素番号/機能語の形態素番号 係り関係のスコア
words := strings.Split(line, " ")
// Remove "D"
dst, err := strconv.Atoi(words[2][:len(words[2])-1])
if err != nil {
panic(err)
}
chunk = initChunk(chunk, dst)
continue
}
//表層形\t品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
word := strings.Split(line, "\t")
words := strings.Split(word[1], ",")
morpheme := Morph{
surface: word[0],
base: words[6],
pos: words[0],
pos1: words[1],
}
morphemes = append(morphemes, morpheme)
}
return article
}
func initChunk(chunk Chunk, dst int) Chunk {
chunk.morphs = make([]Morph, 0)
chunk.dst = dst
chunk.srcs = make([]int, 0)
return chunk
}
func initSourceIndex(sentence []Chunk) {
for i, chunk := range sentence {
if chunk.dst < 0 {
continue
}
sentence[chunk.dst].srcs = append(sentence[chunk.dst].srcs, i)
}
}
func containVerb(morphs []Morph) bool {
for _, morph := range morphs {
if morph.pos == "動詞" {
return true
}
}
return false
}
func containParticle(morphs []Morph) bool {
for _, morph := range morphs {
if morph.pos == "助詞" {
return true
}
}
return false
}
func fetchVerb(morphs []Morph) string {
for _, morph := range morphs {
if morph.pos == "動詞" {
return morph.base
}
}
return ""
}
func fetchParticle(morphs []Morph) string {
for _, morph := range morphs {
if morph.pos == "助詞" {
return morph.base
}
}
return ""
}
func printVerbCases(sentence []Chunk) {
for _, chunk := range sentence {
if containVerb(chunk.morphs) == false {
continue
}
particles := ""
for _, i := range chunk.srcs {
if containParticle(sentence[i].morphs) {
particles += fetchParticle(sentence[i].morphs) + " "
}
}
if particles != "" {
fmt.Printf("%s\t%s\n", fetchVerb(chunk.morphs), strings.TrimRight(particles, " "))
}
}
}
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: main <filepath>")
os.Exit(1)
}
lines := fileio.ReadFileAllLines(os.Args[1])
article := parseArticle(lines)
for _, sentence := range article {
printVerbCases(sentence)
}
}
package fileio
import (
"bufio"
"fmt"
"io"
"os"
)
func WriteFileAllLines(fileName string, lines []string) {
file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
panic(err)
}
defer file.Close()
w := bufio.NewWriter(file)
for _, line := range lines {
fmt.Fprintln(w, line)
}
w.Flush()
}
func WriteFileString(fileName string, str string) {
file, err := os.Create(fileName)
if err != nil {
panic(err)
}
defer file.Close()
file.Write([]byte(str))
}
func ReadAllLine(r io.Reader) []string {
sc := bufio.NewScanner(r)
lines := []string{}
for sc.Scan() {
lines = append(lines, sc.Text())
}
return lines
}
func ReadFileAllLines(fileName string) []string {
fp, err := os.Open(fileName)
if err != nil {
panic(err)
}
defer fp.Close()
lines := ReadAllLine(fp)
return lines
}
#!/bin/bash
mkdir fileio
mv fileio.go fileio
go run main.go