diff --git a/README.md b/README.md index a86b1f2be8f92c9b9101e2c45e90b53b0054e599..fb6f41aa278468b3031399a7eabfbb63fddcda8b 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,19 @@ Create Gitlab issues from RSS Feeds with optional labelling. Created to monitor RSS feeds and bring posts to our attention (Security Releases, Product Updates etc) +## Avoiding Duplication +We try to be as clever as is reasonably possible in terms of not duplicating RSS feed items into Gitlab. +A SQLite DB is used to store the GUID/FeedID combination which is checked when assessing articles for synchronisation. +In addition we also add the RSS feed's item GUID at the bottom of the issue description. Before synchronising an RSS item +we run an issue search in the associated project, if we dont find the GUID in any issue we assume its not already been created. +This helps to guard against scenarios where you lose the SQLite DB and dont want RSS items reduplicating into Gitlab. +If found in Gitlab it is marked as syncronised in the local database as well as printing an link to the existing issue(s) to stdout. + +## Limiting what is initially synced. +Each feed entry in the config file can have an "added_since" property set. This is used to only sync RSS items that have a +Published/Updated date greater than the provided value. This can be useful on RSS feeds where you dont want to import historic items, +just new posts going forward. + ## Config file The config file **MUST** be named config.yaml, an example one is provided [here](config.yaml.example). Below is a brief @@ -35,7 +48,7 @@ Make sure the location of your DATA_DIR environment variable is set to a persist that is contained within it stores the state of which RSS items have already been synced. ### Run it -```sh +```bash docker run -e GITLAB_API_TOKEN=<INSERT_TOKEN> -e DATA_DIR=/data -e CONFIG_DIR=/app -v <PATH_TO_DATA_DIR>:/data -v <PATH_TO_CONFIG_DIR>/config adamhf/rss-sync:latest ``` diff --git a/main.go b/main.go index 0540196c80f6a02999bd8b1c8fbdefbe3fa3c33a..37e9136bf147a0c7d4c25c8913ea6d3a0e8925ee 100644 --- a/main.go +++ b/main.go @@ -15,6 +15,7 @@ import ( "net/http" "os" "path" + "strings" "time" ) @@ -48,6 +49,33 @@ type EnvValues struct { GitlabAPIKey string } +func hasExistingGitlabIssue(guid string, projectID int, gitlabClient *gitlab.Client) bool{ + searchOptions := &gitlab.SearchOptions{ + Page:1, + PerPage:10, + } + issues, _, err := gitlabClient.Search.IssuesByProject(projectID, guid, searchOptions) + if err != nil { + fmt.Printf("Unable to query Gitlab for existing issues\n") + } + retVal := false + if len(issues) == 1 { + retVal = true + fmt.Printf("Found existing issues for %s in project (%s)\n", guid, issues[0].WebURL) + + } else if len(issues) > 1 { + retVal = true + var urls []string + for _, issue := range issues { + urls = append(urls, issue.WebURL) + } + fmt.Printf("Found multiple existing issues for %s in project (%s)\n", guid, strings.Join(urls, ", ")) + } + + return retVal + +} + func (feed Feed) checkFeed(db *gorm.DB, gitlabClient *gitlab.Client) { fp := gofeed.NewParser() rss, err := fp.ParseURL(feed.FeedURL) @@ -73,14 +101,6 @@ func (feed Feed) checkFeed(db *gorm.DB, gitlabClient *gitlab.Client) { fmt.Printf("New Items: %d\n", len(newArticle)) for _, item := range newArticle { - // Prefer description over content - var body string - if item.Description != "" { - body = item.Description - } else { - body = item.Content - } - var time *time.Time // Prefer updated time to published if item.UpdatedParsed != nil { @@ -94,9 +114,24 @@ func (feed Feed) checkFeed(db *gorm.DB, gitlabClient *gitlab.Client) { continue } + // Check Gitlab to see if we already have a matching issue there + if hasExistingGitlabIssue(item.GUID, feed.GitlabProjectID, gitlabClient) { + // We think its new but there is already a matching GUID in Gitlab. Mark as Sync'd + db.Create(&SyncedItems{UUID: item.GUID, Feed: feed.ID}) + continue + } + + // Prefer description over content + var body string + if item.Description != "" { + body = item.Description + } else { + body = item.Content + } + issueOptions := &gitlab.CreateIssueOptions{ Title: gitlab.String(item.Title), - Description: gitlab.String(body), + Description: gitlab.String(body + "\n" + item.GUID), Labels: feed.Labels, CreatedAt: time, }