Commit 4ac16d63 authored by Tuna's avatar Tuna Committed by GitHub
Browse files

Merge pull request #61 from ngtuna/refactor-watcher

Refactor watcher
parents c1ca5b4e 894b227f
Showing with 1508 additions and 3795 deletions
+1508 -3795
This diff is collapsed.
......@@ -41,5 +41,5 @@ func Run(conf *config.Config) {
log.Fatal(err)
}
controller.Controller(conf, eventHandler)
controller.Start(conf, eventHandler)
}
......@@ -17,196 +17,315 @@ limitations under the License.
package controller
import (
"net/http"
"fmt"
"os"
"os/signal"
"syscall"
"time"
"github.com/Sirupsen/logrus"
"github.com/skippbox/kubewatch/config"
"github.com/skippbox/kubewatch/pkg/handlers"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/apis/extensions/v1beta1"
"k8s.io/kubernetes/pkg/client/cache"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/controller/framework"
"k8s.io/kubernetes/pkg/fields"
cmdutil "k8s.io/kubernetes/pkg/kubectl/cmd/util"
"k8s.io/kubernetes/pkg/util/wait"
"github.com/skippbox/kubewatch/pkg/utils"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/apimachinery/pkg/util/wait"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
api_v1 "k8s.io/client-go/pkg/api/v1"
meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
"k8s.io/client-go/kubernetes"
)
func Controller(conf *config.Config, eventHandler handlers.Handler) {
const maxRetries = 5
factory := cmdutil.NewFactory(nil)
kubeConfig, err := factory.ClientConfig()
if err != nil {
logrus.Fatal(err)
}
// Controller object
type Controller struct {
logger *logrus.Entry
clientset kubernetes.Interface
queue workqueue.RateLimitingInterface
informer cache.SharedIndexInformer
eventHandler handlers.Handler
}
kubeClient := client.NewOrDie(kubeConfig)
kubeExtensionsClient := client.NewExtensionsOrDie(kubeConfig)
func Start(conf *config.Config, eventHandler handlers.Handler) {
kubeClient := utils.GetClientOutOfCluster()
if conf.Resource.Pod {
watchPods(kubeClient, eventHandler)
}
if conf.Resource.Services {
watchServices(kubeClient, eventHandler)
}
c := newControllerPod(kubeClient, eventHandler)
stopCh := make(chan struct{})
defer close(stopCh)
if conf.Resource.ReplicationController {
watchReplicationControllers(kubeClient, eventHandler)
}
if conf.Resource.Deployment {
watchDeployments(kubeExtensionsClient, eventHandler)
}
go c.Run(stopCh)
if conf.Resource.Job {
watchJobs(kubeExtensionsClient, eventHandler)
sigterm := make(chan os.Signal, 1)
signal.Notify(sigterm, syscall.SIGTERM)
signal.Notify(sigterm, syscall.SIGINT)
<-sigterm
}
if conf.Resource.PersistentVolume {
var servicesStore cache.Store
servicesStore = watchPersistenVolumes(kubeClient, servicesStore, eventHandler)
}
logrus.Fatal(http.ListenAndServe(":8081", nil))
//if conf.Resource.Services {
// watchServices(kubeClient, eventHandler)
//}
//
//if conf.Resource.ReplicationController {
// watchReplicationControllers(kubeClient, eventHandler)
//}
//
//if conf.Resource.Deployment {
// watchDeployments(kubeExtensionsClient, eventHandler)
//}
//
//if conf.Resource.Job {
// watchJobs(kubeExtensionsClient, eventHandler)
//}
//
//if conf.Resource.PersistentVolume {
// var servicesStore cache.Store
// servicesStore = watchPersistenVolumes(kubeClient, servicesStore, eventHandler)
//}
//logrus.Fatal(http.ListenAndServe(":8081", nil))
}
func watchPods(client *client.Client, eventHandler handlers.Handler) cache.Store {
//Define what we want to look for (Pods)
watchlist := cache.NewListWatchFromClient(client, "pods", api.NamespaceAll, fields.Everything())
resyncPeriod := 30 * time.Minute
//Setup an informer to call functions when the watchlist changes
eStore, eController := framework.NewInformer(
watchlist,
&api.Pod{},
resyncPeriod,
framework.ResourceEventHandlerFuncs{
AddFunc: eventHandler.ObjectCreated,
DeleteFunc: eventHandler.ObjectDeleted,
func newControllerPod(client kubernetes.Interface, eventHandler handlers.Handler) *Controller {
queue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter())
informer := cache.NewSharedIndexInformer(
&cache.ListWatch{
ListFunc: func(options meta_v1.ListOptions) (runtime.Object, error) {
return client.CoreV1().Pods(meta_v1.NamespaceAll).List(options)
},
WatchFunc: func(options meta_v1.ListOptions) (watch.Interface, error) {
return client.CoreV1().Pods(meta_v1.NamespaceAll).Watch(options)
},
},
&api_v1.Pod{},
0, //Skip resync
cache.Indexers{},
)
//Run the controller as a goroutine
go eController.Run(wait.NeverStop)
return eStore
}
func watchServices(client *client.Client, eventHandler handlers.Handler) cache.Store {
//Define what we want to look for (Services)
watchlist := cache.NewListWatchFromClient(client, "services", api.NamespaceAll, fields.Everything())
resyncPeriod := 30 * time.Minute
//Setup an informer to call functions when the watchlist changes
eStore, eController := framework.NewInformer(
watchlist,
&api.Service{},
resyncPeriod,
framework.ResourceEventHandlerFuncs{
AddFunc: eventHandler.ObjectCreated,
DeleteFunc: eventHandler.ObjectDeleted,
UpdateFunc: eventHandler.ObjectUpdated,
informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
key, err := cache.MetaNamespaceKeyFunc(obj)
if err == nil {
queue.Add(key)
}
},
)
//Run the controller as a goroutine
go eController.Run(wait.NeverStop)
return eStore
}
func watchReplicationControllers(client *client.Client, eventHandler handlers.Handler) cache.Store {
//Define what we want to look for (ReplicationControllers)
watchlist := cache.NewListWatchFromClient(client, "replicationcontrollers", api.NamespaceAll, fields.Everything())
resyncPeriod := 30 * time.Minute
//Setup an informer to call functions when the watchlist changes
eStore, eController := framework.NewInformer(
watchlist,
&api.ReplicationController{},
resyncPeriod,
framework.ResourceEventHandlerFuncs{
AddFunc: eventHandler.ObjectCreated,
DeleteFunc: eventHandler.ObjectDeleted,
UpdateFunc: func(old, new interface{}) {
key, err := cache.MetaNamespaceKeyFunc(new)
if err == nil {
queue.Add(key)
}
},
)
//Run the controller as a goroutine
go eController.Run(wait.NeverStop)
return eStore
DeleteFunc: func(obj interface{}) {
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
if err == nil {
queue.Add(key)
}
},
})
return &Controller{
logger: logrus.WithField("pkg", "kubewatch-pod"),
clientset: client,
informer: informer,
queue: queue,
eventHandler: eventHandler,
}
}
func watchDeployments(client *client.ExtensionsClient, eventHandler handlers.Handler) cache.Store {
//Define what we want to look for (Deployments)
watchlist := cache.NewListWatchFromClient(client, "deployments", api.NamespaceAll, fields.Everything())
// Run starts the kubewatch controller
func (c *Controller) Run(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer c.queue.ShutDown()
resyncPeriod := 30 * time.Minute
c.logger.Info("Starting kubewatch controller")
//Setup an informer to call functions when the watchlist changes
eStore, eController := framework.NewInformer(
watchlist,
&v1beta1.Deployment{},
resyncPeriod,
framework.ResourceEventHandlerFuncs{
AddFunc: eventHandler.ObjectCreated,
DeleteFunc: eventHandler.ObjectDeleted,
},
)
go c.informer.Run(stopCh)
//Run the controller as a goroutine
go eController.Run(wait.NeverStop)
if !cache.WaitForCacheSync(stopCh, c.HasSynced) {
utilruntime.HandleError(fmt.Errorf("Timed out waiting for caches to sync"))
return
}
c.logger.Info("Kubewatch controller synced and ready")
return eStore
wait.Until(c.runWorker, time.Second, stopCh)
}
func watchJobs(client *client.ExtensionsClient, eventHandler handlers.Handler) cache.Store {
//Define what we want to look for (Jobs)
watchlist := cache.NewListWatchFromClient(client, "jobs", api.NamespaceAll, fields.Everything())
// HasSynced is required for the cache.Controller interface.
func (c *Controller) HasSynced() bool {
return c.informer.HasSynced()
}
resyncPeriod := 30 * time.Minute
// LastSyncResourceVersion is required for the cache.Controller interface.
func (c *Controller) LastSyncResourceVersion() string {
return c.informer.LastSyncResourceVersion()
}
//Setup an informer to call functions when the watchlist changes
eStore, eController := framework.NewInformer(
watchlist,
&v1beta1.Job{},
resyncPeriod,
framework.ResourceEventHandlerFuncs{
AddFunc: eventHandler.ObjectCreated,
DeleteFunc: eventHandler.ObjectDeleted,
},
)
func (c *Controller) runWorker() {
for c.processNextItem() {
// continue looping
}
}
//Run the controller as a goroutine
go eController.Run(wait.NeverStop)
func (c *Controller) processNextItem() bool {
key, quit := c.queue.Get()
if quit {
return false
}
defer c.queue.Done(key)
err := c.processItem(key.(string))
if err == nil {
// No error, reset the ratelimit counters
c.queue.Forget(key)
} else if c.queue.NumRequeues(key) < maxRetries {
c.logger.Errorf("Error processing %s (will retry): %v", key, err)
c.queue.AddRateLimited(key)
} else {
// err != nil and too many retries
c.logger.Errorf("Error processing %s (giving up): %v", key, err)
c.queue.Forget(key)
utilruntime.HandleError(err)
}
return eStore
return true
}
func watchPersistenVolumes(client *client.Client, store cache.Store, eventHandler handlers.Handler) cache.Store {
//Define what we want to look for (PersistenVolumes)
watchlist := cache.NewListWatchFromClient(client, "persistentvolumes", api.NamespaceAll, fields.Everything())
resyncPeriod := 30 * time.Minute
func (c *Controller) processItem(key string) error {
c.logger.Infof("Processing change to Pod %s", key)
//Setup an informer to call functions when the watchlist changes
eStore, eController := framework.NewInformer(
watchlist,
&api.PersistentVolume{},
resyncPeriod,
framework.ResourceEventHandlerFuncs{
AddFunc: eventHandler.ObjectCreated,
DeleteFunc: eventHandler.ObjectDeleted,
},
)
obj, exists, err := c.informer.GetIndexer().GetByKey(key)
if err != nil {
return fmt.Errorf("Error fetching object with key %s from store: %v", key, err)
}
//Run the controller as a goroutine
go eController.Run(wait.NeverStop)
if !exists {
c.eventHandler.ObjectDeleted(obj)
return nil
}
return eStore
c.eventHandler.ObjectCreated(obj)
return nil
}
//
//func watchServices(client *client.Client, eventHandler handlers.Handler) cache.Store {
// //Define what we want to look for (Services)
// watchlist := cache.NewListWatchFromClient(client, "services", api.NamespaceAll, fields.Everything())
//
// resyncPeriod := 30 * time.Minute
//
// //Setup an informer to call functions when the watchlist changes
// eStore, eController := framework.NewInformer(
// watchlist,
// &api.Service{},
// resyncPeriod,
// framework.ResourceEventHandlerFuncs{
// AddFunc: eventHandler.ObjectCreated,
// DeleteFunc: eventHandler.ObjectDeleted,
// UpdateFunc: eventHandler.ObjectUpdated,
// },
// )
//
// //Run the controller as a goroutine
// go eController.Run(wait.NeverStop)
//
// return eStore
//}
//
//func watchReplicationControllers(client *client.Client, eventHandler handlers.Handler) cache.Store {
// //Define what we want to look for (ReplicationControllers)
// watchlist := cache.NewListWatchFromClient(client, "replicationcontrollers", api.NamespaceAll, fields.Everything())
//
// resyncPeriod := 30 * time.Minute
//
// //Setup an informer to call functions when the watchlist changes
// eStore, eController := framework.NewInformer(
// watchlist,
// &api.ReplicationController{},
// resyncPeriod,
// framework.ResourceEventHandlerFuncs{
// AddFunc: eventHandler.ObjectCreated,
// DeleteFunc: eventHandler.ObjectDeleted,
// },
// )
//
// //Run the controller as a goroutine
// go eController.Run(wait.NeverStop)
//
// return eStore
//}
//
//func watchDeployments(client *client.ExtensionsClient, eventHandler handlers.Handler) cache.Store {
// //Define what we want to look for (Deployments)
// watchlist := cache.NewListWatchFromClient(client, "deployments", api.NamespaceAll, fields.Everything())
//
// resyncPeriod := 30 * time.Minute
//
// //Setup an informer to call functions when the watchlist changes
// eStore, eController := framework.NewInformer(
// watchlist,
// &v1beta1.Deployment{},
// resyncPeriod,
// framework.ResourceEventHandlerFuncs{
// AddFunc: eventHandler.ObjectCreated,
// DeleteFunc: eventHandler.ObjectDeleted,
// },
// )
//
// //Run the controller as a goroutine
// go eController.Run(wait.NeverStop)
//
// return eStore
//}
//
//func watchJobs(client *client.ExtensionsClient, eventHandler handlers.Handler) cache.Store {
// //Define what we want to look for (Jobs)
// watchlist := cache.NewListWatchFromClient(client, "jobs", api.NamespaceAll, fields.Everything())
//
// resyncPeriod := 30 * time.Minute
//
// //Setup an informer to call functions when the watchlist changes
// eStore, eController := framework.NewInformer(
// watchlist,
// &v1beta1.Job{},
// resyncPeriod,
// framework.ResourceEventHandlerFuncs{
// AddFunc: eventHandler.ObjectCreated,
// DeleteFunc: eventHandler.ObjectDeleted,
// },
// )
//
// //Run the controller as a goroutine
// go eController.Run(wait.NeverStop)
//
// return eStore
//}
//
//func watchPersistenVolumes(client *client.Client, store cache.Store, eventHandler handlers.Handler) cache.Store {
// //Define what we want to look for (PersistenVolumes)
// watchlist := cache.NewListWatchFromClient(client, "persistentvolumes", api.NamespaceAll, fields.Everything())
//
// resyncPeriod := 30 * time.Minute
//
// //Setup an informer to call functions when the watchlist changes
// eStore, eController := framework.NewInformer(
// watchlist,
// &api.PersistentVolume{},
// resyncPeriod,
// framework.ResourceEventHandlerFuncs{
// AddFunc: eventHandler.ObjectCreated,
// DeleteFunc: eventHandler.ObjectDeleted,
// },
// )
//
// //Run the controller as a goroutine
// go eController.Run(wait.NeverStop)
//
// return eStore
//}
......@@ -17,9 +17,9 @@ limitations under the License.
package event
import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/apis/extensions"
"k8s.io/kubernetes/pkg/apis/extensions/v1beta1"
"k8s.io/client-go/pkg/api/v1"
"k8s.io/client-go/pkg/apis/extensions/v1beta1"
"k8s.io/client-go/pkg/apis/batch"
)
// Event represent an event got from k8s api server
......@@ -44,39 +44,39 @@ var m = map[string]string{
// New create new KubewatchEvent
func New(obj interface{}, action string) Event {
var namespace, kind, component, host, reason, status, name string
if apiService, ok := obj.(*api.Service); ok {
if apiService, ok := obj.(*v1.Service); ok {
namespace = apiService.ObjectMeta.Namespace
name = apiService.Name
kind = "service"
component = string(apiService.Spec.Type)
reason = action
status = m[action]
} else if apiPod, ok := obj.(*api.Pod); ok {
} else if apiPod, ok := obj.(*v1.Pod); ok {
namespace = apiPod.ObjectMeta.Namespace
name = apiPod.Name
kind = "pod"
reason = action
host = apiPod.Spec.NodeName
status = m[action]
} else if apiRC, ok := obj.(*api.ReplicationController); ok {
} else if apiRC, ok := obj.(*v1.ReplicationController); ok {
namespace = apiRC.ObjectMeta.Namespace
name = apiRC.Name
kind = "replication controller"
reason = action
status = m[action]
} else if apiDeployment, ok := obj.(*extensions.Deployment); ok {
} else if apiDeployment, ok := obj.(*v1beta1.Deployment); ok {
namespace = apiDeployment.ObjectMeta.Namespace
name = apiDeployment.Name
kind = "deployment"
reason = action
status = m[action]
} else if apiJob, ok := obj.(*v1beta1.Job); ok {
} else if apiJob, ok := obj.(*batch.Job); ok {
namespace = apiJob.ObjectMeta.Namespace
name = apiJob.Name
kind = "job"
reason = action
status = m[action]
} else if apiPV, ok := obj.(*api.PersistentVolume); ok {
} else if apiPV, ok := obj.(*v1.PersistentVolume); ok {
name = apiPV.Name
kind = "persistent volume"
reason = action
......
package utils
import (
"os"
"github.com/Sirupsen/logrus"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
)
// GetClient returns a k8s clientset to the request from inside of cluster
func GetClient() kubernetes.Interface {
config, err := rest.InClusterConfig()
if err != nil {
logrus.Fatalf("Can not get kubernetes config: %v", err)
}
clientset, err := kubernetes.NewForConfig(config)
if err != nil {
logrus.Fatalf("Can not create kubernetes client: %v", err)
}
return clientset
}
func buildOutOfClusterConfig() (*rest.Config, error) {
kubeconfigPath := os.Getenv("KUBECONFIG")
if kubeconfigPath == "" {
kubeconfigPath = os.Getenv("HOME") + "/.kube/config"
}
return clientcmd.BuildConfigFromFlags("", kubeconfigPath)
}
// GetClientOutOfCluster returns a k8s clientset to the request from outside of cluster
func GetClientOutOfCluster() kubernetes.Interface {
config, err := buildOutOfClusterConfig()
if err != nil {
logrus.Fatalf("Can not get kubernetes config: %v", err)
}
clientset, err := kubernetes.NewForConfig(config)
return clientset
}
\ No newline at end of file
*.sublime-*
.DS_Store
*.swp
*.swo
tags
sudo: false
language: go
go:
- 1.4
- 1.5
- 1.6
- tip
Copyright (c) 2012, Martin Angers
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Purell
Purell is a tiny Go library to normalize URLs. It returns a pure URL. Pure-ell. Sanitizer and all. Yeah, I know...
Based on the [wikipedia paper][wiki] and the [RFC 3986 document][rfc].
[![build status](https://secure.travis-ci.org/PuerkitoBio/purell.png)](http://travis-ci.org/PuerkitoBio/purell)
## Install
`go get github.com/PuerkitoBio/purell`
## Changelog
* **2016-07-27 (v1.0.0)** : Normalize IDN to ASCII (thanks to @zenovich).
* **2015-02-08** : Add fix for relative paths issue ([PR #5][pr5]) and add fix for unnecessary encoding of reserved characters ([see issue #7][iss7]).
* **v0.2.0** : Add benchmarks, Attempt IDN support.
* **v0.1.0** : Initial release.
## Examples
From `example_test.go` (note that in your code, you would import "github.com/PuerkitoBio/purell", and would prefix references to its methods and constants with "purell."):
```go
package purell
import (
"fmt"
"net/url"
)
func ExampleNormalizeURLString() {
if normalized, err := NormalizeURLString("hTTp://someWEBsite.com:80/Amazing%3f/url/",
FlagLowercaseScheme|FlagLowercaseHost|FlagUppercaseEscapes); err != nil {
panic(err)
} else {
fmt.Print(normalized)
}
// Output: http://somewebsite.com:80/Amazing%3F/url/
}
func ExampleMustNormalizeURLString() {
normalized := MustNormalizeURLString("hTTpS://someWEBsite.com:443/Amazing%fa/url/",
FlagsUnsafeGreedy)
fmt.Print(normalized)
// Output: http://somewebsite.com/Amazing%FA/url
}
func ExampleNormalizeURL() {
if u, err := url.Parse("Http://SomeUrl.com:8080/a/b/.././c///g?c=3&a=1&b=9&c=0#target"); err != nil {
panic(err)
} else {
normalized := NormalizeURL(u, FlagsUsuallySafeGreedy|FlagRemoveDuplicateSlashes|FlagRemoveFragment)
fmt.Print(normalized)
}
// Output: http://someurl.com:8080/a/c/g?c=3&a=1&b=9&c=0
}
```
## API
As seen in the examples above, purell offers three methods, `NormalizeURLString(string, NormalizationFlags) (string, error)`, `MustNormalizeURLString(string, NormalizationFlags) (string)` and `NormalizeURL(*url.URL, NormalizationFlags) (string)`. They all normalize the provided URL based on the specified flags. Here are the available flags:
```go
const (
// Safe normalizations
FlagLowercaseScheme NormalizationFlags = 1 << iota // HTTP://host -> http://host, applied by default in Go1.1
FlagLowercaseHost // http://HOST -> http://host
FlagUppercaseEscapes // http://host/t%ef -> http://host/t%EF
FlagDecodeUnnecessaryEscapes // http://host/t%41 -> http://host/tA
FlagEncodeNecessaryEscapes // http://host/!"#$ -> http://host/%21%22#$
FlagRemoveDefaultPort // http://host:80 -> http://host
FlagRemoveEmptyQuerySeparator // http://host/path? -> http://host/path
// Usually safe normalizations
FlagRemoveTrailingSlash // http://host/path/ -> http://host/path
FlagAddTrailingSlash // http://host/path -> http://host/path/ (should choose only one of these add/remove trailing slash flags)
FlagRemoveDotSegments // http://host/path/./a/b/../c -> http://host/path/a/c
// Unsafe normalizations
FlagRemoveDirectoryIndex // http://host/path/index.html -> http://host/path/
FlagRemoveFragment // http://host/path#fragment -> http://host/path
FlagForceHTTP // https://host -> http://host
FlagRemoveDuplicateSlashes // http://host/path//a///b -> http://host/path/a/b
FlagRemoveWWW // http://www.host/ -> http://host/
FlagAddWWW // http://host/ -> http://www.host/ (should choose only one of these add/remove WWW flags)
FlagSortQuery // http://host/path?c=3&b=2&a=1&b=1 -> http://host/path?a=1&b=1&b=2&c=3
// Normalizations not in the wikipedia article, required to cover tests cases
// submitted by jehiah
FlagDecodeDWORDHost // http://1113982867 -> http://66.102.7.147
FlagDecodeOctalHost // http://0102.0146.07.0223 -> http://66.102.7.147
FlagDecodeHexHost // http://0x42660793 -> http://66.102.7.147
FlagRemoveUnnecessaryHostDots // http://.host../path -> http://host/path
FlagRemoveEmptyPortSeparator // http://host:/path -> http://host/path
// Convenience set of safe normalizations
FlagsSafe NormalizationFlags = FlagLowercaseHost | FlagLowercaseScheme | FlagUppercaseEscapes | FlagDecodeUnnecessaryEscapes | FlagEncodeNecessaryEscapes | FlagRemoveDefaultPort | FlagRemoveEmptyQuerySeparator
// For convenience sets, "greedy" uses the "remove trailing slash" and "remove www. prefix" flags,
// while "non-greedy" uses the "add (or keep) the trailing slash" and "add www. prefix".
// Convenience set of usually safe normalizations (includes FlagsSafe)
FlagsUsuallySafeGreedy NormalizationFlags = FlagsSafe | FlagRemoveTrailingSlash | FlagRemoveDotSegments
FlagsUsuallySafeNonGreedy NormalizationFlags = FlagsSafe | FlagAddTrailingSlash | FlagRemoveDotSegments
// Convenience set of unsafe normalizations (includes FlagsUsuallySafe)
FlagsUnsafeGreedy NormalizationFlags = FlagsUsuallySafeGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagRemoveWWW | FlagSortQuery
FlagsUnsafeNonGreedy NormalizationFlags = FlagsUsuallySafeNonGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagAddWWW | FlagSortQuery
// Convenience set of all available flags
FlagsAllGreedy = FlagsUnsafeGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
FlagsAllNonGreedy = FlagsUnsafeNonGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
)
```
For convenience, the set of flags `FlagsSafe`, `FlagsUsuallySafe[Greedy|NonGreedy]`, `FlagsUnsafe[Greedy|NonGreedy]` and `FlagsAll[Greedy|NonGreedy]` are provided for the similarly grouped normalizations on [wikipedia's URL normalization page][wiki]. You can add (using the bitwise OR `|` operator) or remove (using the bitwise AND NOT `&^` operator) individual flags from the sets if required, to build your own custom set.
The [full godoc reference is available on gopkgdoc][godoc].
Some things to note:
* `FlagDecodeUnnecessaryEscapes`, `FlagEncodeNecessaryEscapes`, `FlagUppercaseEscapes` and `FlagRemoveEmptyQuerySeparator` are always implicitly set, because internally, the URL string is parsed as an URL object, which automatically decodes unnecessary escapes, uppercases and encodes necessary ones, and removes empty query separators (an unnecessary `?` at the end of the url). So this operation cannot **not** be done. For this reason, `FlagRemoveEmptyQuerySeparator` (as well as the other three) has been included in the `FlagsSafe` convenience set, instead of `FlagsUnsafe`, where Wikipedia puts it.
* The `FlagDecodeUnnecessaryEscapes` decodes the following escapes (*from -> to*):
- %24 -> $
- %26 -> &
- %2B-%3B -> +,-./0123456789:;
- %3D -> =
- %40-%5A -> @ABCDEFGHIJKLMNOPQRSTUVWXYZ
- %5F -> _
- %61-%7A -> abcdefghijklmnopqrstuvwxyz
- %7E -> ~
* When the `NormalizeURL` function is used (passing an URL object), this source URL object is modified (that is, after the call, the URL object will be modified to reflect the normalization).
* The *replace IP with domain name* normalization (`http://208.77.188.166/ → http://www.example.com/`) is obviously not possible for a library without making some network requests. This is not implemented in purell.
* The *remove unused query string parameters* and *remove default query parameters* are also not implemented, since this is a very case-specific normalization, and it is quite trivial to do with an URL object.
### Safe vs Usually Safe vs Unsafe
Purell allows you to control the level of risk you take while normalizing an URL. You can aggressively normalize, play it totally safe, or anything in between.
Consider the following URL:
`HTTPS://www.RooT.com/toto/t%45%1f///a/./b/../c/?z=3&w=2&a=4&w=1#invalid`
Normalizing with the `FlagsSafe` gives:
`https://www.root.com/toto/tE%1F///a/./b/../c/?z=3&w=2&a=4&w=1#invalid`
With the `FlagsUsuallySafeGreedy`:
`https://www.root.com/toto/tE%1F///a/c?z=3&w=2&a=4&w=1#invalid`
And with `FlagsUnsafeGreedy`:
`http://root.com/toto/tE%1F/a/c?a=4&w=1&w=2&z=3`
## TODOs
* Add a class/default instance to allow specifying custom directory index names? At the moment, removing directory index removes `(^|/)((?:default|index)\.\w{1,4})$`.
## Thanks / Contributions
@rogpeppe
@jehiah
@opennota
@pchristopher1275
@zenovich
## License
The [BSD 3-Clause license][bsd].
[bsd]: http://opensource.org/licenses/BSD-3-Clause
[wiki]: http://en.wikipedia.org/wiki/URL_normalization
[rfc]: http://tools.ietf.org/html/rfc3986#section-6
[godoc]: http://go.pkgdoc.org/github.com/PuerkitoBio/purell
[pr5]: https://github.com/PuerkitoBio/purell/pull/5
[iss7]: https://github.com/PuerkitoBio/purell/issues/7
/*
Package purell offers URL normalization as described on the wikipedia page:
http://en.wikipedia.org/wiki/URL_normalization
*/
package purell
import (
"bytes"
"fmt"
"net/url"
"regexp"
"sort"
"strconv"
"strings"
"github.com/PuerkitoBio/urlesc"
"golang.org/x/net/idna"
"golang.org/x/text/secure/precis"
"golang.org/x/text/unicode/norm"
)
// A set of normalization flags determines how a URL will
// be normalized.
type NormalizationFlags uint
const (
// Safe normalizations
FlagLowercaseScheme NormalizationFlags = 1 << iota // HTTP://host -> http://host, applied by default in Go1.1
FlagLowercaseHost // http://HOST -> http://host
FlagUppercaseEscapes // http://host/t%ef -> http://host/t%EF
FlagDecodeUnnecessaryEscapes // http://host/t%41 -> http://host/tA
FlagEncodeNecessaryEscapes // http://host/!"#$ -> http://host/%21%22#$
FlagRemoveDefaultPort // http://host:80 -> http://host
FlagRemoveEmptyQuerySeparator // http://host/path? -> http://host/path
// Usually safe normalizations
FlagRemoveTrailingSlash // http://host/path/ -> http://host/path
FlagAddTrailingSlash // http://host/path -> http://host/path/ (should choose only one of these add/remove trailing slash flags)
FlagRemoveDotSegments // http://host/path/./a/b/../c -> http://host/path/a/c
// Unsafe normalizations
FlagRemoveDirectoryIndex // http://host/path/index.html -> http://host/path/
FlagRemoveFragment // http://host/path#fragment -> http://host/path
FlagForceHTTP // https://host -> http://host
FlagRemoveDuplicateSlashes // http://host/path//a///b -> http://host/path/a/b
FlagRemoveWWW // http://www.host/ -> http://host/
FlagAddWWW // http://host/ -> http://www.host/ (should choose only one of these add/remove WWW flags)
FlagSortQuery // http://host/path?c=3&b=2&a=1&b=1 -> http://host/path?a=1&b=1&b=2&c=3
// Normalizations not in the wikipedia article, required to cover tests cases
// submitted by jehiah
FlagDecodeDWORDHost // http://1113982867 -> http://66.102.7.147
FlagDecodeOctalHost // http://0102.0146.07.0223 -> http://66.102.7.147
FlagDecodeHexHost // http://0x42660793 -> http://66.102.7.147
FlagRemoveUnnecessaryHostDots // http://.host../path -> http://host/path
FlagRemoveEmptyPortSeparator // http://host:/path -> http://host/path
// Convenience set of safe normalizations
FlagsSafe NormalizationFlags = FlagLowercaseHost | FlagLowercaseScheme | FlagUppercaseEscapes | FlagDecodeUnnecessaryEscapes | FlagEncodeNecessaryEscapes | FlagRemoveDefaultPort | FlagRemoveEmptyQuerySeparator
// For convenience sets, "greedy" uses the "remove trailing slash" and "remove www. prefix" flags,
// while "non-greedy" uses the "add (or keep) the trailing slash" and "add www. prefix".
// Convenience set of usually safe normalizations (includes FlagsSafe)
FlagsUsuallySafeGreedy NormalizationFlags = FlagsSafe | FlagRemoveTrailingSlash | FlagRemoveDotSegments
FlagsUsuallySafeNonGreedy NormalizationFlags = FlagsSafe | FlagAddTrailingSlash | FlagRemoveDotSegments
// Convenience set of unsafe normalizations (includes FlagsUsuallySafe)
FlagsUnsafeGreedy NormalizationFlags = FlagsUsuallySafeGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagRemoveWWW | FlagSortQuery
FlagsUnsafeNonGreedy NormalizationFlags = FlagsUsuallySafeNonGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagAddWWW | FlagSortQuery
// Convenience set of all available flags
FlagsAllGreedy = FlagsUnsafeGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
FlagsAllNonGreedy = FlagsUnsafeNonGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
)
const (
defaultHttpPort = ":80"
defaultHttpsPort = ":443"
)
// Regular expressions used by the normalizations
var rxPort = regexp.MustCompile(`(:\d+)/?$`)
var rxDirIndex = regexp.MustCompile(`(^|/)((?:default|index)\.\w{1,4})$`)
var rxDupSlashes = regexp.MustCompile(`/{2,}`)
var rxDWORDHost = regexp.MustCompile(`^(\d+)((?:\.+)?(?:\:\d*)?)$`)
var rxOctalHost = regexp.MustCompile(`^(0\d*)\.(0\d*)\.(0\d*)\.(0\d*)((?:\.+)?(?:\:\d*)?)$`)
var rxHexHost = regexp.MustCompile(`^0x([0-9A-Fa-f]+)((?:\.+)?(?:\:\d*)?)$`)
var rxHostDots = regexp.MustCompile(`^(.+?)(:\d+)?$`)
var rxEmptyPort = regexp.MustCompile(`:+$`)
// Map of flags to implementation function.
// FlagDecodeUnnecessaryEscapes has no action, since it is done automatically
// by parsing the string as an URL. Same for FlagUppercaseEscapes and FlagRemoveEmptyQuerySeparator.
// Since maps have undefined traversing order, make a slice of ordered keys
var flagsOrder = []NormalizationFlags{
FlagLowercaseScheme,
FlagLowercaseHost,
FlagRemoveDefaultPort,
FlagRemoveDirectoryIndex,
FlagRemoveDotSegments,
FlagRemoveFragment,
FlagForceHTTP, // Must be after remove default port (because https=443/http=80)
FlagRemoveDuplicateSlashes,
FlagRemoveWWW,
FlagAddWWW,
FlagSortQuery,
FlagDecodeDWORDHost,
FlagDecodeOctalHost,
FlagDecodeHexHost,
FlagRemoveUnnecessaryHostDots,
FlagRemoveEmptyPortSeparator,
FlagRemoveTrailingSlash, // These two (add/remove trailing slash) must be last
FlagAddTrailingSlash,
}
// ... and then the map, where order is unimportant
var flags = map[NormalizationFlags]func(*url.URL){
FlagLowercaseScheme: lowercaseScheme,
FlagLowercaseHost: lowercaseHost,
FlagRemoveDefaultPort: removeDefaultPort,
FlagRemoveDirectoryIndex: removeDirectoryIndex,
FlagRemoveDotSegments: removeDotSegments,
FlagRemoveFragment: removeFragment,
FlagForceHTTP: forceHTTP,
FlagRemoveDuplicateSlashes: removeDuplicateSlashes,
FlagRemoveWWW: removeWWW,
FlagAddWWW: addWWW,
FlagSortQuery: sortQuery,
FlagDecodeDWORDHost: decodeDWORDHost,
FlagDecodeOctalHost: decodeOctalHost,
FlagDecodeHexHost: decodeHexHost,
FlagRemoveUnnecessaryHostDots: removeUnncessaryHostDots,
FlagRemoveEmptyPortSeparator: removeEmptyPortSeparator,
FlagRemoveTrailingSlash: removeTrailingSlash,
FlagAddTrailingSlash: addTrailingSlash,
}
// MustNormalizeURLString returns the normalized string, and panics if an error occurs.
// It takes an URL string as input, as well as the normalization flags.
func MustNormalizeURLString(u string, f NormalizationFlags) string {
result, e := NormalizeURLString(u, f)
if e != nil {
panic(e)
}
return result
}
// NormalizeURLString returns the normalized string, or an error if it can't be parsed into an URL object.
// It takes an URL string as input, as well as the normalization flags.
func NormalizeURLString(u string, f NormalizationFlags) (string, error) {
if parsed, e := url.Parse(u); e != nil {
return "", e
} else {
options := make([]precis.Option, 1, 3)
options[0] = precis.IgnoreCase
if f&FlagLowercaseHost == FlagLowercaseHost {
options = append(options, precis.FoldCase())
}
options = append(options, precis.Norm(norm.NFC))
profile := precis.NewFreeform(options...)
if parsed.Host, e = idna.ToASCII(profile.NewTransformer().String(parsed.Host)); e != nil {
return "", e
}
return NormalizeURL(parsed, f), nil
}
panic("Unreachable code.")
}
// NormalizeURL returns the normalized string.
// It takes a parsed URL object as input, as well as the normalization flags.
func NormalizeURL(u *url.URL, f NormalizationFlags) string {
for _, k := range flagsOrder {
if f&k == k {
flags[k](u)
}
}
return urlesc.Escape(u)
}
func lowercaseScheme(u *url.URL) {
if len(u.Scheme) > 0 {
u.Scheme = strings.ToLower(u.Scheme)
}
}
func lowercaseHost(u *url.URL) {
if len(u.Host) > 0 {
u.Host = strings.ToLower(u.Host)
}
}
func removeDefaultPort(u *url.URL) {
if len(u.Host) > 0 {
scheme := strings.ToLower(u.Scheme)
u.Host = rxPort.ReplaceAllStringFunc(u.Host, func(val string) string {
if (scheme == "http" && val == defaultHttpPort) || (scheme == "https" && val == defaultHttpsPort) {
return ""
}
return val
})
}
}
func removeTrailingSlash(u *url.URL) {
if l := len(u.Path); l > 0 {
if strings.HasSuffix(u.Path, "/") {
u.Path = u.Path[:l-1]
}
} else if l = len(u.Host); l > 0 {
if strings.HasSuffix(u.Host, "/") {
u.Host = u.Host[:l-1]
}
}
}
func addTrailingSlash(u *url.URL) {
if l := len(u.Path); l > 0 {
if !strings.HasSuffix(u.Path, "/") {
u.Path += "/"
}
} else if l = len(u.Host); l > 0 {
if !strings.HasSuffix(u.Host, "/") {
u.Host += "/"
}
}
}
func removeDotSegments(u *url.URL) {
if len(u.Path) > 0 {
var dotFree []string
var lastIsDot bool
sections := strings.Split(u.Path, "/")
for _, s := range sections {
if s == ".." {
if len(dotFree) > 0 {
dotFree = dotFree[:len(dotFree)-1]
}
} else if s != "." {
dotFree = append(dotFree, s)
}
lastIsDot = (s == "." || s == "..")
}
// Special case if host does not end with / and new path does not begin with /
u.Path = strings.Join(dotFree, "/")
if u.Host != "" && !strings.HasSuffix(u.Host, "/") && !strings.HasPrefix(u.Path, "/") {
u.Path = "/" + u.Path
}
// Special case if the last segment was a dot, make sure the path ends with a slash
if lastIsDot && !strings.HasSuffix(u.Path, "/") {
u.Path += "/"
}
}
}
func removeDirectoryIndex(u *url.URL) {
if len(u.Path) > 0 {
u.Path = rxDirIndex.ReplaceAllString(u.Path, "$1")
}
}
func removeFragment(u *url.URL) {
u.Fragment = ""
}
func forceHTTP(u *url.URL) {
if strings.ToLower(u.Scheme) == "https" {
u.Scheme = "http"
}
}
func removeDuplicateSlashes(u *url.URL) {
if len(u.Path) > 0 {
u.Path = rxDupSlashes.ReplaceAllString(u.Path, "/")
}
}
func removeWWW(u *url.URL) {
if len(u.Host) > 0 && strings.HasPrefix(strings.ToLower(u.Host), "www.") {
u.Host = u.Host[4:]
}
}
func addWWW(u *url.URL) {
if len(u.Host) > 0 && !strings.HasPrefix(strings.ToLower(u.Host), "www.") {
u.Host = "www." + u.Host
}
}
func sortQuery(u *url.URL) {
q := u.Query()
if len(q) > 0 {
arKeys := make([]string, len(q))
i := 0
for k, _ := range q {
arKeys[i] = k
i++
}
sort.Strings(arKeys)
buf := new(bytes.Buffer)
for _, k := range arKeys {
sort.Strings(q[k])
for _, v := range q[k] {
if buf.Len() > 0 {
buf.WriteRune('&')
}
buf.WriteString(fmt.Sprintf("%s=%s", k, urlesc.QueryEscape(v)))
}
}
// Rebuild the raw query string
u.RawQuery = buf.String()
}
}
func decodeDWORDHost(u *url.URL) {
if len(u.Host) > 0 {
if matches := rxDWORDHost.FindStringSubmatch(u.Host); len(matches) > 2 {
var parts [4]int64
dword, _ := strconv.ParseInt(matches[1], 10, 0)
for i, shift := range []uint{24, 16, 8, 0} {
parts[i] = dword >> shift & 0xFF
}
u.Host = fmt.Sprintf("%d.%d.%d.%d%s", parts[0], parts[1], parts[2], parts[3], matches[2])
}
}
}
func decodeOctalHost(u *url.URL) {
if len(u.Host) > 0 {
if matches := rxOctalHost.FindStringSubmatch(u.Host); len(matches) > 5 {
var parts [4]int64
for i := 1; i <= 4; i++ {
parts[i-1], _ = strconv.ParseInt(matches[i], 8, 0)
}
u.Host = fmt.Sprintf("%d.%d.%d.%d%s", parts[0], parts[1], parts[2], parts[3], matches[5])
}
}
}
func decodeHexHost(u *url.URL) {
if len(u.Host) > 0 {
if matches := rxHexHost.FindStringSubmatch(u.Host); len(matches) > 2 {
// Conversion is safe because of regex validation
parsed, _ := strconv.ParseInt(matches[1], 16, 0)
// Set host as DWORD (base 10) encoded host
u.Host = fmt.Sprintf("%d%s", parsed, matches[2])
// The rest is the same as decoding a DWORD host
decodeDWORDHost(u)
}
}
}
func removeUnncessaryHostDots(u *url.URL) {
if len(u.Host) > 0 {
if matches := rxHostDots.FindStringSubmatch(u.Host); len(matches) > 1 {
// Trim the leading and trailing dots
u.Host = strings.Trim(matches[1], ".")
if len(matches) > 2 {
u.Host += matches[2]
}
}
}
}
func removeEmptyPortSeparator(u *url.URL) {
if len(u.Host) > 0 {
u.Host = rxEmptyPort.ReplaceAllString(u.Host, "")
}
}
language: go
go:
- 1.4.3
- 1.5.3
- release
- 1.4
- tip
install:
- go build .
script:
- go test -v ./...
- go test -v
urlesc [![Build Status](https://travis-ci.org/PuerkitoBio/urlesc.png?branch=master)](https://travis-ci.org/PuerkitoBio/urlesc) [![GoDoc](http://godoc.org/github.com/PuerkitoBio/urlesc?status.svg)](http://godoc.org/github.com/PuerkitoBio/urlesc)
======
Package urlesc implements query escaping as per RFC 3986.
It contains some parts of the net/url package, modified so as to allow
some reserved characters incorrectly escaped by net/url (see [issue 5684](https://github.com/golang/go/issues/5684)).
## Install
go get github.com/PuerkitoBio/urlesc
## License
Go license (BSD-3-Clause)
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package urlesc implements query escaping as per RFC 3986.
// It contains some parts of the net/url package, modified so as to allow
// some reserved characters incorrectly escaped by net/url.
// See https://github.com/golang/go/issues/5684
package urlesc
import (
"bytes"
"net/url"
"strings"
)
type encoding int
const (
encodePath encoding = 1 + iota
encodeUserPassword
encodeQueryComponent
encodeFragment
)
// Return true if the specified character should be escaped when
// appearing in a URL string, according to RFC 3986.
func shouldEscape(c byte, mode encoding) bool {
// §2.3 Unreserved characters (alphanum)
if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
return false
}
switch c {
case '-', '.', '_', '~': // §2.3 Unreserved characters (mark)
return false
// §2.2 Reserved characters (reserved)
case ':', '/', '?', '#', '[', ']', '@', // gen-delims
'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // sub-delims
// Different sections of the URL allow a few of
// the reserved characters to appear unescaped.
switch mode {
case encodePath: // §3.3
// The RFC allows sub-delims and : @.
// '/', '[' and ']' can be used to assign meaning to individual path
// segments. This package only manipulates the path as a whole,
// so we allow those as well. That leaves only ? and # to escape.
return c == '?' || c == '#'
case encodeUserPassword: // §3.2.1
// The RFC allows : and sub-delims in
// userinfo. The parsing of userinfo treats ':' as special so we must escape
// all the gen-delims.
return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || c == ']' || c == '@'
case encodeQueryComponent: // §3.4
// The RFC allows / and ?.
return c != '/' && c != '?'
case encodeFragment: // §4.1
// The RFC text is silent but the grammar allows
// everything, so escape nothing but #
return c == '#'
}
}
// Everything else must be escaped.
return true
}
// QueryEscape escapes the string so it can be safely placed
// inside a URL query.
func QueryEscape(s string) string {
return escape(s, encodeQueryComponent)
}
func escape(s string, mode encoding) string {
spaceCount, hexCount := 0, 0
for i := 0; i < len(s); i++ {
c := s[i]
if shouldEscape(c, mode) {
if c == ' ' && mode == encodeQueryComponent {
spaceCount++
} else {
hexCount++
}
}
}
if spaceCount == 0 && hexCount == 0 {
return s
}
t := make([]byte, len(s)+2*hexCount)
j := 0
for i := 0; i < len(s); i++ {
switch c := s[i]; {
case c == ' ' && mode == encodeQueryComponent:
t[j] = '+'
j++
case shouldEscape(c, mode):
t[j] = '%'
t[j+1] = "0123456789ABCDEF"[c>>4]
t[j+2] = "0123456789ABCDEF"[c&15]
j += 3
default:
t[j] = s[i]
j++
}
}
return string(t)
}
var uiReplacer = strings.NewReplacer(
"%21", "!",
"%27", "'",
"%28", "(",
"%29", ")",
"%2A", "*",
)
// unescapeUserinfo unescapes some characters that need not to be escaped as per RFC3986.
func unescapeUserinfo(s string) string {
return uiReplacer.Replace(s)
}
// Escape reassembles the URL into a valid URL string.
// The general form of the result is one of:
//
// scheme:opaque
// scheme://userinfo@host/path?query#fragment
//
// If u.Opaque is non-empty, String uses the first form;
// otherwise it uses the second form.
//
// In the second form, the following rules apply:
// - if u.Scheme is empty, scheme: is omitted.
// - if u.User is nil, userinfo@ is omitted.
// - if u.Host is empty, host/ is omitted.
// - if u.Scheme and u.Host are empty and u.User is nil,
// the entire scheme://userinfo@host/ is omitted.
// - if u.Host is non-empty and u.Path begins with a /,
// the form host/path does not add its own /.
// - if u.RawQuery is empty, ?query is omitted.
// - if u.Fragment is empty, #fragment is omitted.
func Escape(u *url.URL) string {
var buf bytes.Buffer
if u.Scheme != "" {
buf.WriteString(u.Scheme)
buf.WriteByte(':')
}
if u.Opaque != "" {
buf.WriteString(u.Opaque)
} else {
if u.Scheme != "" || u.Host != "" || u.User != nil {
buf.WriteString("//")
if ui := u.User; ui != nil {
buf.WriteString(unescapeUserinfo(ui.String()))
buf.WriteByte('@')
}
if h := u.Host; h != "" {
buf.WriteString(h)
}
}
if u.Path != "" && u.Path[0] != '/' && u.Host != "" {
buf.WriteByte('/')
}
buf.WriteString(escape(u.Path, encodePath))
}
if u.RawQuery != "" {
buf.WriteByte('?')
buf.WriteString(u.RawQuery)
}
if u.Fragment != "" {
buf.WriteByte('#')
buf.WriteString(escape(u.Fragment, encodeFragment))
}
return buf.String()
}
Copyright (C) 2013 Blake Mizerany
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
This diff is collapsed.
// Package quantile computes approximate quantiles over an unbounded data
// stream within low memory and CPU bounds.
//
// A small amount of accuracy is traded to achieve the above properties.
//
// Multiple streams can be merged before calling Query to generate a single set
// of results. This is meaningful when the streams represent the same type of
// data. See Merge and Samples.
//
// For more detailed information about the algorithm used, see:
//
// Effective Computation of Biased Quantiles over Data Streams
//
// http://www.cs.rutgers.edu/~muthu/bquant.pdf
package quantile
import (
"math"
"sort"
)
// Sample holds an observed value and meta information for compression. JSON
// tags have been added for convenience.
type Sample struct {
Value float64 `json:",string"`
Width float64 `json:",string"`
Delta float64 `json:",string"`
}
// Samples represents a slice of samples. It implements sort.Interface.
type Samples []Sample
func (a Samples) Len() int { return len(a) }
func (a Samples) Less(i, j int) bool { return a[i].Value < a[j].Value }
func (a Samples) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
type invariant func(s *stream, r float64) float64
// NewLowBiased returns an initialized Stream for low-biased quantiles
// (e.g. 0.01, 0.1, 0.5) where the needed quantiles are not known a priori, but
// error guarantees can still be given even for the lower ranks of the data
// distribution.
//
// The provided epsilon is a relative error, i.e. the true quantile of a value
// returned by a query is guaranteed to be within (1±Epsilon)*Quantile.
//
// See http://www.cs.rutgers.edu/~muthu/bquant.pdf for time, space, and error
// properties.
func NewLowBiased(epsilon float64) *Stream {
ƒ := func(s *stream, r float64) float64 {
return 2 * epsilon * r
}
return newStream(ƒ)
}
// NewHighBiased returns an initialized Stream for high-biased quantiles
// (e.g. 0.01, 0.1, 0.5) where the needed quantiles are not known a priori, but
// error guarantees can still be given even for the higher ranks of the data
// distribution.
//
// The provided epsilon is a relative error, i.e. the true quantile of a value
// returned by a query is guaranteed to be within 1-(1±Epsilon)*(1-Quantile).
//
// See http://www.cs.rutgers.edu/~muthu/bquant.pdf for time, space, and error
// properties.
func NewHighBiased(epsilon float64) *Stream {
ƒ := func(s *stream, r float64) float64 {
return 2 * epsilon * (s.n - r)
}
return newStream(ƒ)
}
// NewTargeted returns an initialized Stream concerned with a particular set of
// quantile values that are supplied a priori. Knowing these a priori reduces
// space and computation time. The targets map maps the desired quantiles to
// their absolute errors, i.e. the true quantile of a value returned by a query
// is guaranteed to be within (Quantile±Epsilon).
//
// See http://www.cs.rutgers.edu/~muthu/bquant.pdf for time, space, and error properties.
func NewTargeted(targets map[float64]float64) *Stream {
ƒ := func(s *stream, r float64) float64 {
var m = math.MaxFloat64
var f float64
for quantile, epsilon := range targets {
if quantile*s.n <= r {
f = (2 * epsilon * r) / quantile
} else {
f = (2 * epsilon * (s.n - r)) / (1 - quantile)
}
if f < m {
m = f
}
}
return m
}
return newStream(ƒ)
}
// Stream computes quantiles for a stream of float64s. It is not thread-safe by
// design. Take care when using across multiple goroutines.
type Stream struct {
*stream
b Samples
sorted bool
}
func newStream(ƒ invariant) *Stream {
x := &stream{ƒ: ƒ}
return &Stream{x, make(Samples, 0, 500), true}
}
// Insert inserts v into the stream.
func (s *Stream) Insert(v float64) {
s.insert(Sample{Value: v, Width: 1})
}
func (s *Stream) insert(sample Sample) {
s.b = append(s.b, sample)
s.sorted = false
if len(s.b) == cap(s.b) {
s.flush()
}
}
// Query returns the computed qth percentiles value. If s was created with
// NewTargeted, and q is not in the set of quantiles provided a priori, Query
// will return an unspecified result.
func (s *Stream) Query(q float64) float64 {
if !s.flushed() {
// Fast path when there hasn't been enough data for a flush;
// this also yields better accuracy for small sets of data.
l := len(s.b)
if l == 0 {
return 0
}
i := int(float64(l) * q)
if i > 0 {
i -= 1
}
s.maybeSort()
return s.b[i].Value
}
s.flush()
return s.stream.query(q)
}
// Merge merges samples into the underlying streams samples. This is handy when
// merging multiple streams from separate threads, database shards, etc.
//
// ATTENTION: This method is broken and does not yield correct results. The
// underlying algorithm is not capable of merging streams correctly.
func (s *Stream) Merge(samples Samples) {
sort.Sort(samples)
s.stream.merge(samples)
}
// Reset reinitializes and clears the list reusing the samples buffer memory.
func (s *Stream) Reset() {
s.stream.reset()
s.b = s.b[:0]
}
// Samples returns stream samples held by s.
func (s *Stream) Samples() Samples {
if !s.flushed() {
return s.b
}
s.flush()
return s.stream.samples()
}
// Count returns the total number of samples observed in the stream
// since initialization.
func (s *Stream) Count() int {
return len(s.b) + s.stream.count()
}
func (s *Stream) flush() {
s.maybeSort()
s.stream.merge(s.b)
s.b = s.b[:0]
}
func (s *Stream) maybeSort() {
if !s.sorted {
s.sorted = true
sort.Sort(s.b)
}
}
func (s *Stream) flushed() bool {
return len(s.stream.l) > 0
}
type stream struct {
n float64
l []Sample
ƒ invariant
}
func (s *stream) reset() {
s.l = s.l[:0]
s.n = 0
}
func (s *stream) insert(v float64) {
s.merge(Samples{{v, 1, 0}})
}
func (s *stream) merge(samples Samples) {
// TODO(beorn7): This tries to merge not only individual samples, but
// whole summaries. The paper doesn't mention merging summaries at
// all. Unittests show that the merging is inaccurate. Find out how to
// do merges properly.
var r float64
i := 0
for _, sample := range samples {
for ; i < len(s.l); i++ {
c := s.l[i]
if c.Value > sample.Value {
// Insert at position i.
s.l = append(s.l, Sample{})
copy(s.l[i+1:], s.l[i:])
s.l[i] = Sample{
sample.Value,
sample.Width,
math.Max(sample.Delta, math.Floor(s.ƒ(s, r))-1),
// TODO(beorn7): How to calculate delta correctly?
}
i++
goto inserted
}
r += c.Width
}
s.l = append(s.l, Sample{sample.Value, sample.Width, 0})
i++
inserted:
s.n += sample.Width
r += sample.Width
}
s.compress()
}
func (s *stream) count() int {
return int(s.n)
}
func (s *stream) query(q float64) float64 {
t := math.Ceil(q * s.n)
t += math.Ceil(s.ƒ(s, t) / 2)
p := s.l[0]
var r float64
for _, c := range s.l[1:] {
r += p.Width
if r+c.Width+c.Delta > t {
return p.Value
}
p = c
}
return p.Value
}
func (s *stream) compress() {
if len(s.l) < 2 {
return
}
x := s.l[len(s.l)-1]
xi := len(s.l) - 1
r := s.n - 1 - x.Width
for i := len(s.l) - 2; i >= 0; i-- {
c := s.l[i]
if c.Width+x.Width+x.Delta <= s.ƒ(s, r) {
x.Width += c.Width
s.l[xi] = x
// Remove element at i.
copy(s.l[i:], s.l[i+1:])
s.l = s.l[:len(s.l)-1]
xi -= 1
} else {
x = c
xi = i
}
r -= c.Width
}
}
func (s *stream) samples() Samples {
samples := make(Samples, len(s.l))
copy(samples, s.l)
return samples
}
The MIT License
Copyright (c) 2014 Benedikt Lang <github at benediktlang.de>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
semver for golang [![Build Status](https://drone.io/github.com/blang/semver/status.png)](https://drone.io/github.com/blang/semver/latest) [![GoDoc](https://godoc.org/github.com/blang/semver?status.png)](https://godoc.org/github.com/blang/semver) [![Coverage Status](https://img.shields.io/coveralls/blang/semver.svg)](https://coveralls.io/r/blang/semver?branch=master)
======
semver is a [Semantic Versioning](http://semver.org/) library written in golang. It fully covers spec version `2.0.0`.
Usage
-----
```bash
$ go get github.com/blang/semver
```
Note: Always vendor your dependencies or fix on a specific version tag.
```go
import github.com/blang/semver
v1, err := semver.Make("1.0.0-beta")
v2, err := semver.Make("2.0.0-beta")
v1.Compare(v2)
```
Also check the [GoDocs](http://godoc.org/github.com/blang/semver).
Why should I use this lib?
-----
- Fully spec compatible
- No reflection
- No regex
- Fully tested (Coverage >99%)
- Readable parsing/validation errors
- Fast (See [Benchmarks](#benchmarks))
- Only Stdlib
- Uses values instead of pointers
- Many features, see below
Features
-----
- Parsing and validation at all levels
- Comparator-like comparisons
- Compare Helper Methods
- InPlace manipulation
- Ranges `>=1.0.0 <2.0.0 || >=3.0.0 !3.0.1-beta.1`
- Sortable (implements sort.Interface)
- database/sql compatible (sql.Scanner/Valuer)
- encoding/json compatible (json.Marshaler/Unmarshaler)
Ranges
------
A `Range` is a set of conditions which specify which versions satisfy the range.
A condition is composed of an operator and a version. The supported operators are:
- `<1.0.0` Less than `1.0.0`
- `<=1.0.0` Less than or equal to `1.0.0`
- `>1.0.0` Greater than `1.0.0`
- `>=1.0.0` Greater than or equal to `1.0.0`
- `1.0.0`, `=1.0.0`, `==1.0.0` Equal to `1.0.0`
- `!1.0.0`, `!=1.0.0` Not equal to `1.0.0`. Excludes version `1.0.0`.
A `Range` can link multiple `Ranges` separated by space:
Ranges can be linked by logical AND:
- `>1.0.0 <2.0.0` would match between both ranges, so `1.1.1` and `1.8.7` but not `1.0.0` or `2.0.0`
- `>1.0.0 <3.0.0 !2.0.3-beta.2` would match every version between `1.0.0` and `3.0.0` except `2.0.3-beta.2`
Ranges can also be linked by logical OR:
- `<2.0.0 || >=3.0.0` would match `1.x.x` and `3.x.x` but not `2.x.x`
AND has a higher precedence than OR. It's not possible to use brackets.
Ranges can be combined by both AND and OR
- `>1.0.0 <2.0.0 || >3.0.0 !4.2.1` would match `1.2.3`, `1.9.9`, `3.1.1`, but not `4.2.1`, `2.1.1`
Range usage:
```
v, err := semver.Parse("1.2.3")
range, err := semver.ParseRange(">1.0.0 <2.0.0 || >=3.0.0")
if range(v) {
//valid
}
```
Example
-----
Have a look at full examples in [examples/main.go](examples/main.go)
```go
import github.com/blang/semver
v, err := semver.Make("0.0.1-alpha.preview+123.github")
fmt.Printf("Major: %d\n", v.Major)
fmt.Printf("Minor: %d\n", v.Minor)
fmt.Printf("Patch: %d\n", v.Patch)
fmt.Printf("Pre: %s\n", v.Pre)
fmt.Printf("Build: %s\n", v.Build)
// Prerelease versions array
if len(v.Pre) > 0 {
fmt.Println("Prerelease versions:")
for i, pre := range v.Pre {
fmt.Printf("%d: %q\n", i, pre)
}
}
// Build meta data array
if len(v.Build) > 0 {
fmt.Println("Build meta data:")
for i, build := range v.Build {
fmt.Printf("%d: %q\n", i, build)
}
}
v001, err := semver.Make("0.0.1")
// Compare using helpers: v.GT(v2), v.LT, v.GTE, v.LTE
v001.GT(v) == true
v.LT(v001) == true
v.GTE(v) == true
v.LTE(v) == true
// Or use v.Compare(v2) for comparisons (-1, 0, 1):
v001.Compare(v) == 1
v.Compare(v001) == -1
v.Compare(v) == 0
// Manipulate Version in place:
v.Pre[0], err = semver.NewPRVersion("beta")
if err != nil {
fmt.Printf("Error parsing pre release version: %q", err)
}
fmt.Println("\nValidate versions:")
v.Build[0] = "?"
err = v.Validate()
if err != nil {
fmt.Printf("Validation failed: %s\n", err)
}
```
Benchmarks
-----
BenchmarkParseSimple-4 5000000 390 ns/op 48 B/op 1 allocs/op
BenchmarkParseComplex-4 1000000 1813 ns/op 256 B/op 7 allocs/op
BenchmarkParseAverage-4 1000000 1171 ns/op 163 B/op 4 allocs/op
BenchmarkStringSimple-4 20000000 119 ns/op 16 B/op 1 allocs/op
BenchmarkStringLarger-4 10000000 206 ns/op 32 B/op 2 allocs/op
BenchmarkStringComplex-4 5000000 324 ns/op 80 B/op 3 allocs/op
BenchmarkStringAverage-4 5000000 273 ns/op 53 B/op 2 allocs/op
BenchmarkValidateSimple-4 200000000 9.33 ns/op 0 B/op 0 allocs/op
BenchmarkValidateComplex-4 3000000 469 ns/op 0 B/op 0 allocs/op
BenchmarkValidateAverage-4 5000000 256 ns/op 0 B/op 0 allocs/op
BenchmarkCompareSimple-4 100000000 11.8 ns/op 0 B/op 0 allocs/op
BenchmarkCompareComplex-4 50000000 30.8 ns/op 0 B/op 0 allocs/op
BenchmarkCompareAverage-4 30000000 41.5 ns/op 0 B/op 0 allocs/op
BenchmarkSort-4 3000000 419 ns/op 256 B/op 2 allocs/op
BenchmarkRangeParseSimple-4 2000000 850 ns/op 192 B/op 5 allocs/op
BenchmarkRangeParseAverage-4 1000000 1677 ns/op 400 B/op 10 allocs/op
BenchmarkRangeParseComplex-4 300000 5214 ns/op 1440 B/op 30 allocs/op
BenchmarkRangeMatchSimple-4 50000000 25.6 ns/op 0 B/op 0 allocs/op
BenchmarkRangeMatchAverage-4 30000000 56.4 ns/op 0 B/op 0 allocs/op
BenchmarkRangeMatchComplex-4 10000000 153 ns/op 0 B/op 0 allocs/op
See benchmark cases at [semver_test.go](semver_test.go)
Motivation
-----
I simply couldn't find any lib supporting the full spec. Others were just wrong or used reflection and regex which i don't like.
Contribution
-----
Feel free to make a pull request. For bigger changes create a issue first to discuss about it.
License
-----
See [LICENSE](LICENSE) file.
package semver
import (
"encoding/json"
)
// MarshalJSON implements the encoding/json.Marshaler interface.
func (v Version) MarshalJSON() ([]byte, error) {
return json.Marshal(v.String())
}
// UnmarshalJSON implements the encoding/json.Unmarshaler interface.
func (v *Version) UnmarshalJSON(data []byte) (err error) {
var versionString string
if err = json.Unmarshal(data, &versionString); err != nil {
return
}
*v, err = Parse(versionString)
return
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment